Prefer pg_any_to_server/pg_server_to_any over pg_do_encoding_conversion.
A large majority of the callers of pg_do_encoding_conversion were specifying the database encoding as either source or target of the conversion, meaning that we can use the less general functions pg_any_to_server/pg_server_to_any instead. The main advantage of using the latter functions is that they can make use of a cached conversion-function lookup in the common case that the other encoding is the current client_encoding. It's notationally cleaner too in most cases, not least because of the historical artifact that the latter functions use "char *" rather than "unsigned char *" in their APIs. Note that pg_any_to_server will apply an encoding verification step in some cases where pg_do_encoding_conversion would have just done nothing. This seems to me to be a good idea at most of these call sites, though it partially negates the performance benefit. Per discussion of bug #9210.
This commit is contained in:
parent
49c817eab7
commit
769065c1b2
@ -1458,11 +1458,9 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
|
|||||||
{
|
{
|
||||||
char *enc;
|
char *enc;
|
||||||
|
|
||||||
enc = (char *)
|
enc = pg_any_to_server(qstr,
|
||||||
pg_do_encoding_conversion((unsigned char *) qstr,
|
entry->query_len,
|
||||||
entry->query_len,
|
entry->encoding);
|
||||||
entry->encoding,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
|
|
||||||
values[i++] = CStringGetTextDatum(enc);
|
values[i++] = CStringGetTextDatum(enc);
|
||||||
|
|
||||||
|
@ -158,10 +158,7 @@ ASN1_STRING_to_text(ASN1_STRING *str)
|
|||||||
nullterm = '\0';
|
nullterm = '\0';
|
||||||
BIO_write(membuf, &nullterm, 1);
|
BIO_write(membuf, &nullterm, 1);
|
||||||
size = BIO_get_mem_data(membuf, &sp);
|
size = BIO_get_mem_data(membuf, &sp);
|
||||||
dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
|
dp = pg_any_to_server(sp, size - 1, PG_UTF8);
|
||||||
size - 1,
|
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
result = cstring_to_text(dp);
|
result = cstring_to_text(dp);
|
||||||
if (dp != sp)
|
if (dp != sp)
|
||||||
pfree(dp);
|
pfree(dp);
|
||||||
@ -323,10 +320,7 @@ X509_NAME_to_text(X509_NAME *name)
|
|||||||
nullterm = '\0';
|
nullterm = '\0';
|
||||||
BIO_write(membuf, &nullterm, 1);
|
BIO_write(membuf, &nullterm, 1);
|
||||||
size = BIO_get_mem_data(membuf, &sp);
|
size = BIO_get_mem_data(membuf, &sp);
|
||||||
dp = (char *) pg_do_encoding_conversion((unsigned char *) sp,
|
dp = pg_any_to_server(sp, size - 1, PG_UTF8);
|
||||||
size - 1,
|
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
result = cstring_to_text(dp);
|
result = cstring_to_text(dp);
|
||||||
if (dp != sp)
|
if (dp != sp)
|
||||||
pfree(dp);
|
pfree(dp);
|
||||||
|
@ -635,7 +635,6 @@ read_extension_script_file(const ExtensionControlFile *control,
|
|||||||
const char *filename)
|
const char *filename)
|
||||||
{
|
{
|
||||||
int src_encoding;
|
int src_encoding;
|
||||||
int dest_encoding = GetDatabaseEncoding();
|
|
||||||
bytea *content;
|
bytea *content;
|
||||||
char *src_str;
|
char *src_str;
|
||||||
char *dest_str;
|
char *dest_str;
|
||||||
@ -645,7 +644,7 @@ read_extension_script_file(const ExtensionControlFile *control,
|
|||||||
|
|
||||||
/* use database encoding if not given */
|
/* use database encoding if not given */
|
||||||
if (control->encoding < 0)
|
if (control->encoding < 0)
|
||||||
src_encoding = dest_encoding;
|
src_encoding = GetDatabaseEncoding();
|
||||||
else
|
else
|
||||||
src_encoding = control->encoding;
|
src_encoding = control->encoding;
|
||||||
|
|
||||||
@ -655,10 +654,7 @@ read_extension_script_file(const ExtensionControlFile *control,
|
|||||||
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
||||||
|
|
||||||
/* convert the encoding to the database encoding */
|
/* convert the encoding to the database encoding */
|
||||||
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) src_str,
|
dest_str = pg_any_to_server(src_str, len, src_encoding);
|
||||||
len,
|
|
||||||
src_encoding,
|
|
||||||
dest_encoding);
|
|
||||||
|
|
||||||
/* if no conversion happened, we have to arrange for null termination */
|
/* if no conversion happened, we have to arrange for null termination */
|
||||||
if (dest_str == src_str)
|
if (dest_str == src_str)
|
||||||
|
@ -255,10 +255,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
char *recoded;
|
char *recoded;
|
||||||
|
|
||||||
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
|
recoded = pg_server_to_any(txt, strlen(txt), PG_UTF8);
|
||||||
strlen(txt),
|
|
||||||
GetDatabaseEncoding(),
|
|
||||||
PG_UTF8);
|
|
||||||
if (recoded != txt)
|
if (recoded != txt)
|
||||||
{
|
{
|
||||||
pfree(txt);
|
pfree(txt);
|
||||||
@ -284,10 +281,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
char *recoded;
|
char *recoded;
|
||||||
|
|
||||||
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
|
recoded = pg_any_to_server(txt, strlen(txt), PG_UTF8);
|
||||||
strlen(txt),
|
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
if (recoded != txt)
|
if (recoded != txt)
|
||||||
{
|
{
|
||||||
pfree(txt);
|
pfree(txt);
|
||||||
|
@ -209,10 +209,7 @@ t_readline(FILE *fp)
|
|||||||
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
|
(void) pg_verify_mbstr(PG_UTF8, buf, len, false);
|
||||||
|
|
||||||
/* And convert */
|
/* And convert */
|
||||||
recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
|
recoded = pg_any_to_server(buf, len, PG_UTF8);
|
||||||
len,
|
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
if (recoded == buf)
|
if (recoded == buf)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -418,9 +418,7 @@ db_encoding_strdup(int encoding, const char *str)
|
|||||||
char *mstr;
|
char *mstr;
|
||||||
|
|
||||||
/* convert the string to the database encoding */
|
/* convert the string to the database encoding */
|
||||||
pstr = (char *) pg_do_encoding_conversion(
|
pstr = pg_any_to_server(str, strlen(str), encoding);
|
||||||
(unsigned char *) str, strlen(str),
|
|
||||||
encoding, GetDatabaseEncoding());
|
|
||||||
mstr = strdup(pstr);
|
mstr = strdup(pstr);
|
||||||
if (pstr != str)
|
if (pstr != str)
|
||||||
pfree(pstr);
|
pfree(pstr);
|
||||||
@ -581,35 +579,32 @@ strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm
|
|||||||
{
|
{
|
||||||
size_t len;
|
size_t len;
|
||||||
wchar_t wbuf[MAX_L10N_DATA];
|
wchar_t wbuf[MAX_L10N_DATA];
|
||||||
int encoding;
|
|
||||||
|
|
||||||
encoding = GetDatabaseEncoding();
|
|
||||||
|
|
||||||
len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
|
len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* strftime call failed - return 0 with the contents of dst
|
* strftime call failed - return 0 with the contents of dst
|
||||||
* unspecified
|
* unspecified
|
||||||
*/
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
|
len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
elog(ERROR,
|
elog(ERROR, "could not convert string to UTF-8: error code %lu",
|
||||||
"could not convert string to UTF-8: error code %lu", GetLastError());
|
GetLastError());
|
||||||
|
|
||||||
dst[len] = '\0';
|
dst[len] = '\0';
|
||||||
if (encoding != PG_UTF8)
|
if (GetDatabaseEncoding() != PG_UTF8)
|
||||||
{
|
{
|
||||||
char *convstr =
|
char *convstr = pg_any_to_server(dst, len, PG_UTF8);
|
||||||
(char *) pg_do_encoding_conversion((unsigned char *) dst,
|
|
||||||
len, PG_UTF8, encoding);
|
|
||||||
|
|
||||||
if (dst != convstr)
|
if (convstr != dst)
|
||||||
{
|
{
|
||||||
strlcpy(dst, convstr, dstlen);
|
strlcpy(dst, convstr, dstlen);
|
||||||
len = strlen(dst);
|
len = strlen(dst);
|
||||||
|
pfree(convstr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,10 +345,7 @@ xml_recv(PG_FUNCTION_ARGS)
|
|||||||
xmlFreeDoc(doc);
|
xmlFreeDoc(doc);
|
||||||
|
|
||||||
/* Now that we know what we're dealing with, convert to server encoding */
|
/* Now that we know what we're dealing with, convert to server encoding */
|
||||||
newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
|
newstr = pg_any_to_server(str, nbytes, encoding);
|
||||||
nbytes,
|
|
||||||
encoding,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
|
|
||||||
if (newstr != str)
|
if (newstr != str)
|
||||||
{
|
{
|
||||||
@ -1793,10 +1790,8 @@ sqlchar_to_unicode(char *s)
|
|||||||
char *utf8string;
|
char *utf8string;
|
||||||
pg_wchar ret[2]; /* need space for trailing zero */
|
pg_wchar ret[2]; /* need space for trailing zero */
|
||||||
|
|
||||||
utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
|
/* note we're not assuming s is null-terminated */
|
||||||
pg_mblen(s),
|
utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
|
||||||
GetDatabaseEncoding(),
|
|
||||||
PG_UTF8);
|
|
||||||
|
|
||||||
pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
|
pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
|
||||||
pg_encoding_mblen(PG_UTF8, utf8string));
|
pg_encoding_mblen(PG_UTF8, utf8string));
|
||||||
@ -1892,19 +1887,15 @@ map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
|
|||||||
static char *
|
static char *
|
||||||
unicode_to_sqlchar(pg_wchar c)
|
unicode_to_sqlchar(pg_wchar c)
|
||||||
{
|
{
|
||||||
unsigned char utf8string[5]; /* need room for trailing zero */
|
char utf8string[8]; /* need room for trailing zero */
|
||||||
char *result;
|
char *result;
|
||||||
|
|
||||||
memset(utf8string, 0, sizeof(utf8string));
|
memset(utf8string, 0, sizeof(utf8string));
|
||||||
unicode_to_utf8(c, utf8string);
|
unicode_to_utf8(c, (unsigned char *) utf8string);
|
||||||
|
|
||||||
result = (char *) pg_do_encoding_conversion(utf8string,
|
result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
|
||||||
pg_encoding_mblen(PG_UTF8,
|
/* if pg_any_to_server didn't strdup, we must */
|
||||||
(char *) utf8string),
|
if (result == utf8string)
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
/* if pg_do_encoding_conversion didn't strdup, we must */
|
|
||||||
if (result == (char *) utf8string)
|
|
||||||
result = pstrdup(result);
|
result = pstrdup(result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1077,7 +1077,9 @@ pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
|
|||||||
char *utf8;
|
char *utf8;
|
||||||
|
|
||||||
utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
|
utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
|
||||||
len, GetMessageEncoding(), PG_UTF8);
|
len,
|
||||||
|
GetMessageEncoding(),
|
||||||
|
PG_UTF8);
|
||||||
if (utf8 != str)
|
if (utf8 != str)
|
||||||
len = strlen(utf8);
|
len = strlen(utf8);
|
||||||
|
|
||||||
|
@ -3811,9 +3811,7 @@ hv_store_string(HV *hv, const char *key, SV *val)
|
|||||||
char *hkey;
|
char *hkey;
|
||||||
SV **ret;
|
SV **ret;
|
||||||
|
|
||||||
hkey = (char *)
|
hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
|
||||||
pg_do_encoding_conversion((unsigned char *) key, strlen(key),
|
|
||||||
GetDatabaseEncoding(), PG_UTF8);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This seems nowhere documented, but under Perl 5.8.0 and up, hv_store()
|
* This seems nowhere documented, but under Perl 5.8.0 and up, hv_store()
|
||||||
@ -3841,9 +3839,7 @@ hv_fetch_string(HV *hv, const char *key)
|
|||||||
char *hkey;
|
char *hkey;
|
||||||
SV **ret;
|
SV **ret;
|
||||||
|
|
||||||
hkey = (char *)
|
hkey = pg_server_to_any(key, strlen(key), PG_UTF8);
|
||||||
pg_do_encoding_conversion((unsigned char *) key, strlen(key),
|
|
||||||
GetDatabaseEncoding(), PG_UTF8);
|
|
||||||
|
|
||||||
/* See notes in hv_store_string */
|
/* See notes in hv_store_string */
|
||||||
hlen = -(int) strlen(hkey);
|
hlen = -(int) strlen(hkey);
|
||||||
|
@ -9,24 +9,11 @@
|
|||||||
static inline char *
|
static inline char *
|
||||||
utf_u2e(char *utf8_str, size_t len)
|
utf_u2e(char *utf8_str, size_t len)
|
||||||
{
|
{
|
||||||
int enc = GetDatabaseEncoding();
|
|
||||||
char *ret;
|
char *ret;
|
||||||
|
|
||||||
/*
|
ret = pg_any_to_server(utf8_str, len, PG_UTF8);
|
||||||
* When we are in a PG_UTF8 or SQL_ASCII database
|
|
||||||
* pg_do_encoding_conversion() will not do any conversion (which is good)
|
|
||||||
* or verification (not so much), so we need to run the verification step
|
|
||||||
* separately.
|
|
||||||
*/
|
|
||||||
if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
|
|
||||||
{
|
|
||||||
pg_verify_mbstr_len(enc, utf8_str, len, false);
|
|
||||||
ret = utf8_str;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
|
|
||||||
len, PG_UTF8, enc);
|
|
||||||
|
|
||||||
|
/* ensure we have a copy even if no conversion happened */
|
||||||
if (ret == utf8_str)
|
if (ret == utf8_str)
|
||||||
ret = pstrdup(ret);
|
ret = pstrdup(ret);
|
||||||
|
|
||||||
@ -41,12 +28,14 @@ utf_u2e(char *utf8_str, size_t len)
|
|||||||
static inline char *
|
static inline char *
|
||||||
utf_e2u(const char *str)
|
utf_e2u(const char *str)
|
||||||
{
|
{
|
||||||
char *ret =
|
char *ret;
|
||||||
(char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
|
|
||||||
GetDatabaseEncoding(), PG_UTF8);
|
|
||||||
|
|
||||||
|
ret = pg_server_to_any(str, strlen(str), PG_UTF8);
|
||||||
|
|
||||||
|
/* ensure we have a copy even if no conversion happened */
|
||||||
if (ret == str)
|
if (ret == str)
|
||||||
ret = pstrdup(ret);
|
ret = pstrdup(ret);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,11 +90,9 @@ PLyUnicode_Bytes(PyObject *unicode)
|
|||||||
{
|
{
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
{
|
{
|
||||||
encoded = (char *) pg_do_encoding_conversion(
|
encoded = pg_any_to_server(utf8string,
|
||||||
(unsigned char *) utf8string,
|
strlen(utf8string),
|
||||||
strlen(utf8string),
|
PG_UTF8);
|
||||||
PG_UTF8,
|
|
||||||
GetDatabaseEncoding());
|
|
||||||
}
|
}
|
||||||
PG_CATCH();
|
PG_CATCH();
|
||||||
{
|
{
|
||||||
@ -109,7 +107,7 @@ PLyUnicode_Bytes(PyObject *unicode)
|
|||||||
/* finally, build a bytes object in the server encoding */
|
/* finally, build a bytes object in the server encoding */
|
||||||
rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
|
rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
|
||||||
|
|
||||||
/* if pg_do_encoding_conversion allocated memory, free it now */
|
/* if pg_any_to_server allocated memory, free it now */
|
||||||
if (utf8string != encoded)
|
if (utf8string != encoded)
|
||||||
pfree(encoded);
|
pfree(encoded);
|
||||||
|
|
||||||
@ -149,10 +147,7 @@ PLyUnicode_FromString(const char *s)
|
|||||||
char *utf8string;
|
char *utf8string;
|
||||||
PyObject *o;
|
PyObject *o;
|
||||||
|
|
||||||
utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
|
utf8string = pg_server_to_any(s, strlen(s), PG_UTF8);
|
||||||
strlen(s),
|
|
||||||
GetDatabaseEncoding(),
|
|
||||||
PG_UTF8);
|
|
||||||
|
|
||||||
o = PyUnicode_FromString(utf8string);
|
o = PyUnicode_FromString(utf8string);
|
||||||
|
|
||||||
|
@ -63,13 +63,17 @@
|
|||||||
static unsigned char *
|
static unsigned char *
|
||||||
utf_u2e(unsigned char *src)
|
utf_u2e(unsigned char *src)
|
||||||
{
|
{
|
||||||
return pg_do_encoding_conversion(src, strlen(src), PG_UTF8, GetDatabaseEncoding());
|
return (unsigned char *) pg_any_to_server((char *) src,
|
||||||
|
strlen(src),
|
||||||
|
PG_UTF8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned char *
|
static unsigned char *
|
||||||
utf_e2u(unsigned char *src)
|
utf_e2u(unsigned char *src)
|
||||||
{
|
{
|
||||||
return pg_do_encoding_conversion(src, strlen(src), GetDatabaseEncoding(), PG_UTF8);
|
return (unsigned char *) pg_server_to_any((char *) src,
|
||||||
|
strlen(src),
|
||||||
|
PG_UTF8);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PLTCL_UTF
|
#define PLTCL_UTF
|
||||||
|
Loading…
x
Reference in New Issue
Block a user