Merge duplicate upper/lower/initcap() routines in oracle_compat.c and
formatting.c to use common code; remove duplicate functions and support routines that are no longer needed.
This commit is contained in:
parent
eeee06919f
commit
f6ec7430f9
@ -1,7 +1,7 @@
|
||||
/* -----------------------------------------------------------------------
|
||||
* formatting.c
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.142 2008/06/17 16:09:06 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.143 2008/06/23 19:27:19 momjian Exp $
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group
|
||||
@ -925,9 +925,6 @@ static char *get_th(char *num, int type);
|
||||
static char *str_numth(char *dest, char *num, int type);
|
||||
static int strspace_len(char *str);
|
||||
static int strdigits_len(char *str);
|
||||
static char *str_toupper(char *buff);
|
||||
static char *str_tolower(char *buff);
|
||||
static char *str_initcap(char *buff);
|
||||
|
||||
static int seq_search(char *name, char **array, int type, int max, int *len);
|
||||
static void do_to_timestamp(text *date_txt, text *fmt,
|
||||
@ -1424,12 +1421,24 @@ str_numth(char *dest, char *num, int type)
|
||||
return dest;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the system provides the needed functions for wide-character manipulation
|
||||
* (which are all standardized by C99), then we implement upper/lower/initcap
|
||||
* using wide-character functions, if necessary. Otherwise we use the
|
||||
* traditional <ctype.h> functions, which of course will not work as desired
|
||||
* in multibyte character sets. Note that in either case we are effectively
|
||||
* assuming that the database character encoding matches the encoding implied
|
||||
* by LC_CTYPE.
|
||||
*/
|
||||
|
||||
/* ----------
|
||||
* Convert string to upper case. It is designed to be multibyte-aware.
|
||||
* wide-character-aware lower function
|
||||
* We pass the number of bytes so we can pass varlena and char*
|
||||
* to this function.
|
||||
* ----------
|
||||
*/
|
||||
static char *
|
||||
str_toupper(char *buff)
|
||||
char *
|
||||
str_tolower(char *buff, size_t nbytes)
|
||||
{
|
||||
char *result;
|
||||
|
||||
@ -1438,13 +1447,78 @@ str_toupper(char *buff)
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
result = wstring_upper(buff);
|
||||
{
|
||||
wchar_t *workspace;
|
||||
int curr_char = 0;
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towlower(workspace[curr_char]);
|
||||
|
||||
/* Make result large enough; case change might change number of bytes */
|
||||
result = palloc(curr_char * MB_CUR_MAX + 1);
|
||||
|
||||
wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
|
||||
pfree(workspace);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pstrdup(buff);
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
for (p = result; *p; p++)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* wide-character-aware upper function
|
||||
* We pass the number of bytes so we can pass varlena and char*
|
||||
* to this function.
|
||||
* ----------
|
||||
*/
|
||||
char *
|
||||
str_toupper(char *buff, size_t nbytes)
|
||||
{
|
||||
char *result;
|
||||
|
||||
if (!buff)
|
||||
return NULL;
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
wchar_t *workspace;
|
||||
int curr_char = 0;
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towupper(workspace[curr_char]);
|
||||
|
||||
/* Make result large enough; case change might change number of bytes */
|
||||
result = palloc(curr_char * MB_CUR_MAX + 1);
|
||||
|
||||
wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
|
||||
pfree(workspace);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
for (p = result; *p; p++)
|
||||
*p = pg_toupper((unsigned char) *p);
|
||||
@ -1453,41 +1527,14 @@ str_toupper(char *buff)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* Convert string to lower case. It is designed to be multibyte-aware.
|
||||
* ----------
|
||||
*/
|
||||
static char *
|
||||
str_tolower(char *buff)
|
||||
{
|
||||
char *result;
|
||||
|
||||
if (!buff)
|
||||
return NULL;
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
result = wstring_lower(buff);
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pstrdup(buff);
|
||||
|
||||
for (p = result; *p; p++)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* wide-character-aware initcap function
|
||||
* We pass the number of bytes so we can pass varlena and char*
|
||||
* to this function.
|
||||
* ----------
|
||||
*/
|
||||
static char *
|
||||
str_initcap(char *buff)
|
||||
char *
|
||||
str_initcap(char *buff, size_t nbytes)
|
||||
{
|
||||
char *result;
|
||||
bool wasalnum = false;
|
||||
@ -1499,35 +1546,34 @@ str_initcap(char *buff)
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
wchar_t *workspace;
|
||||
text *in_text;
|
||||
text *out_text;
|
||||
int i;
|
||||
int curr_char = 0;
|
||||
|
||||
in_text = cstring_to_text(buff);
|
||||
workspace = texttowcs(in_text);
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
{
|
||||
if (wasalnum)
|
||||
workspace[i] = towlower(workspace[i]);
|
||||
workspace[curr_char] = towlower(workspace[curr_char]);
|
||||
else
|
||||
workspace[i] = towupper(workspace[i]);
|
||||
wasalnum = iswalnum(workspace[i]);
|
||||
workspace[curr_char] = towupper(workspace[curr_char]);
|
||||
wasalnum = iswalnum(workspace[curr_char]);
|
||||
}
|
||||
|
||||
out_text = wcstotext(workspace, i);
|
||||
result = text_to_cstring(out_text);
|
||||
/* Make result large enough; case change might change number of bytes */
|
||||
result = palloc(curr_char * MB_CUR_MAX + 1);
|
||||
|
||||
wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
|
||||
pfree(workspace);
|
||||
pfree(in_text);
|
||||
pfree(out_text);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pstrdup(buff);
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
for (p = result; *p; p++)
|
||||
{
|
||||
@ -1851,7 +1897,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
{
|
||||
char *p = pstrdup(tmtcTzn(in));
|
||||
|
||||
strcpy(s, str_tolower(p));
|
||||
strcpy(s, str_tolower(p, strlen(p)));
|
||||
pfree(p);
|
||||
s += strlen(s);
|
||||
}
|
||||
@ -1893,11 +1939,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
|
||||
strlen(localized_full_months[tm->tm_mon - 1])));
|
||||
else
|
||||
{
|
||||
strcpy(workbuff, months_full[tm->tm_mon - 1]);
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
|
||||
str_toupper(workbuff, strlen(workbuff)));
|
||||
}
|
||||
s += strlen(s);
|
||||
break;
|
||||
@ -1906,7 +1954,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
|
||||
strlen(localized_full_months[tm->tm_mon - 1])));
|
||||
else
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
|
||||
s += strlen(s);
|
||||
@ -1916,7 +1965,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
|
||||
strlen(localized_full_months[tm->tm_mon - 1])));
|
||||
else
|
||||
{
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
|
||||
@ -1929,9 +1979,11 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
|
||||
strlen(localized_abbrev_months[tm->tm_mon - 1])));
|
||||
else
|
||||
strcpy(s, str_toupper(months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_toupper(months[tm->tm_mon - 1],
|
||||
strlen(months[tm->tm_mon - 1])));
|
||||
s += strlen(s);
|
||||
break;
|
||||
case DCH_Mon:
|
||||
@ -1939,7 +1991,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
|
||||
strlen(localized_abbrev_months[tm->tm_mon - 1])));
|
||||
else
|
||||
strcpy(s, months[tm->tm_mon - 1]);
|
||||
s += strlen(s);
|
||||
@ -1949,7 +2002,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
if (!tm->tm_mon)
|
||||
break;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
|
||||
strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
|
||||
strlen(localized_abbrev_months[tm->tm_mon - 1])));
|
||||
else
|
||||
{
|
||||
strcpy(s, months[tm->tm_mon - 1]);
|
||||
@ -1966,18 +2020,21 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
case DCH_DAY:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
|
||||
strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
|
||||
strlen(localized_full_days[tm->tm_wday])));
|
||||
else
|
||||
{
|
||||
strcpy(workbuff, days[tm->tm_wday]);
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
|
||||
str_toupper(workbuff, strlen(workbuff)));
|
||||
}
|
||||
s += strlen(s);
|
||||
break;
|
||||
case DCH_Day:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
|
||||
strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
|
||||
strlen(localized_full_days[tm->tm_wday])));
|
||||
else
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
|
||||
s += strlen(s);
|
||||
@ -1985,7 +2042,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
case DCH_day:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
|
||||
strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
|
||||
strlen(localized_full_days[tm->tm_wday])));
|
||||
else
|
||||
{
|
||||
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
|
||||
@ -1996,15 +2054,18 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
case DCH_DY:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
|
||||
strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
|
||||
strlen(localized_abbrev_days[tm->tm_wday])));
|
||||
else
|
||||
strcpy(s, str_toupper(days_short[tm->tm_wday]));
|
||||
strcpy(s, str_toupper(days_short[tm->tm_wday],
|
||||
strlen(days_short[tm->tm_wday])));
|
||||
s += strlen(s);
|
||||
break;
|
||||
case DCH_Dy:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
|
||||
strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
|
||||
strlen(localized_abbrev_days[tm->tm_wday])));
|
||||
else
|
||||
strcpy(s, days_short[tm->tm_wday]);
|
||||
s += strlen(s);
|
||||
@ -2012,7 +2073,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
|
||||
case DCH_dy:
|
||||
INVALID_FOR_INTERVAL;
|
||||
if (S_TM(n->suffix))
|
||||
strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
|
||||
strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
|
||||
strlen(localized_abbrev_days[tm->tm_wday])));
|
||||
else
|
||||
{
|
||||
strcpy(s, days_short[tm->tm_wday]);
|
||||
@ -4277,12 +4339,14 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
|
||||
case NUM_rn:
|
||||
if (IS_FILLMODE(Np->Num))
|
||||
{
|
||||
strcpy(Np->inout_p, str_tolower(Np->number_p));
|
||||
strcpy(Np->inout_p, str_tolower(Np->number_p,
|
||||
strlen(Np->number_p)));
|
||||
Np->inout_p += strlen(Np->inout_p) - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
|
||||
sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
|
||||
strlen(Np->number_p)));
|
||||
Np->inout_p += strlen(Np->inout_p) - 1;
|
||||
}
|
||||
break;
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.80 2008/06/17 16:09:06 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.81 2008/06/23 19:27:19 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -29,292 +29,16 @@
|
||||
#endif
|
||||
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/formatting.h"
|
||||
#include "utils/pg_locale.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
|
||||
|
||||
/*
|
||||
* If the system provides the needed functions for wide-character manipulation
|
||||
* (which are all standardized by C99), then we implement upper/lower/initcap
|
||||
* using wide-character functions. Otherwise we use the traditional <ctype.h>
|
||||
* functions, which of course will not work as desired in multibyte character
|
||||
* sets. Note that in either case we are effectively assuming that the
|
||||
* database character encoding matches the encoding implied by LC_CTYPE.
|
||||
*/
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
char *wstring_lower(char *str);
|
||||
char *wstring_upper(char *str);
|
||||
wchar_t *texttowcs(const text *txt);
|
||||
text *wcstotext(const wchar_t *str, int ncodes);
|
||||
#endif
|
||||
|
||||
static text *dotrim(const char *string, int stringlen,
|
||||
const char *set, int setlen,
|
||||
bool doltrim, bool dortrim);
|
||||
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
|
||||
/*
|
||||
* Convert a TEXT value into a palloc'd wchar string.
|
||||
*/
|
||||
wchar_t *
|
||||
texttowcs(const text *txt)
|
||||
{
|
||||
int nbytes = VARSIZE_ANY_EXHDR(txt);
|
||||
char *workstr;
|
||||
wchar_t *result;
|
||||
size_t ncodes;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if (nbytes < 0 ||
|
||||
nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Need a null-terminated version of the input */
|
||||
workstr = text_to_cstring(txt);
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
/* Do the conversion */
|
||||
ncodes = mbstowcs(result, workstr, nbytes + 1);
|
||||
|
||||
if (ncodes == (size_t) -1)
|
||||
{
|
||||
/*
|
||||
* Invalid multibyte character encountered. We try to give a useful
|
||||
* error message by letting pg_verifymbstr check the string. But it's
|
||||
* possible that the string is OK to us, and not OK to mbstowcs ---
|
||||
* this suggests that the LC_CTYPE locale is different from the
|
||||
* database encoding. Give a generic error message if verifymbstr
|
||||
* can't find anything wrong.
|
||||
*/
|
||||
pg_verifymbstr(workstr, nbytes, false);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("invalid multibyte character for locale"),
|
||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
||||
}
|
||||
|
||||
Assert(ncodes <= (size_t) nbytes);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Convert a wchar string into a palloc'd TEXT value. The wchar string
|
||||
* must be zero-terminated, but we also require the caller to pass the string
|
||||
* length, since it will know it anyway in current uses.
|
||||
*/
|
||||
text *
|
||||
wcstotext(const wchar_t *str, int ncodes)
|
||||
{
|
||||
text *result;
|
||||
size_t nbytes;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if (ncodes < 0 ||
|
||||
ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Make workspace certainly large enough for result */
|
||||
result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
|
||||
|
||||
/* Do the conversion */
|
||||
nbytes = wcstombs((char *) VARDATA(result), str,
|
||||
(ncodes + 1) * MB_CUR_MAX);
|
||||
|
||||
if (nbytes == (size_t) -1)
|
||||
{
|
||||
/* Invalid multibyte character encountered ... shouldn't happen */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("invalid multibyte character for locale")));
|
||||
}
|
||||
|
||||
Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
|
||||
|
||||
SET_VARSIZE(result, nbytes + VARHDRSZ);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
|
||||
|
||||
/*
|
||||
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
|
||||
* To make use of the upper/lower functionality, we need to map UTF8 to
|
||||
* UTF16, which for some reason mbstowcs and wcstombs won't do for us.
|
||||
* This conversion layer takes care of it.
|
||||
*/
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/* texttowcs for the case of UTF8 to UTF16 */
|
||||
static wchar_t *
|
||||
win32_utf8_texttowcs(const text *txt)
|
||||
{
|
||||
int nbytes = VARSIZE_ANY_EXHDR(txt);
|
||||
wchar_t *result;
|
||||
int r;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if (nbytes < 0 ||
|
||||
nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
/* stupid Microsloth API does not work for zero-length input */
|
||||
if (nbytes == 0)
|
||||
r = 0;
|
||||
else
|
||||
{
|
||||
/* Do the conversion */
|
||||
r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
|
||||
result, nbytes);
|
||||
|
||||
if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */
|
||||
{
|
||||
/* see notes above about error reporting */
|
||||
pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("invalid multibyte character for locale"),
|
||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
||||
}
|
||||
}
|
||||
|
||||
/* Append trailing null wchar (MultiByteToWideChar won't have) */
|
||||
Assert(r <= nbytes);
|
||||
result[r] = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* wcstotext for the case of UTF16 to UTF8 */
|
||||
static text *
|
||||
win32_utf8_wcstotext(const wchar_t *str)
|
||||
{
|
||||
text *result;
|
||||
int nbytes;
|
||||
int r;
|
||||
|
||||
/* Compute size of output string (this *will* include trailing null) */
|
||||
nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
|
||||
if (nbytes <= 0) /* shouldn't happen */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("UTF-16 to UTF-8 translation failed: %lu",
|
||||
GetLastError())));
|
||||
|
||||
result = palloc(nbytes + VARHDRSZ);
|
||||
|
||||
r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
|
||||
NULL, NULL);
|
||||
if (r != nbytes) /* shouldn't happen */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("UTF-16 to UTF-8 translation failed: %lu",
|
||||
GetLastError())));
|
||||
|
||||
SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* interface layer to check which encoding is in use */
|
||||
|
||||
static wchar_t *
|
||||
win32_texttowcs(const text *txt)
|
||||
{
|
||||
if (GetDatabaseEncoding() == PG_UTF8)
|
||||
return win32_utf8_texttowcs(txt);
|
||||
else
|
||||
return texttowcs(txt);
|
||||
}
|
||||
|
||||
static text *
|
||||
win32_wcstotext(const wchar_t *str, int ncodes)
|
||||
{
|
||||
if (GetDatabaseEncoding() == PG_UTF8)
|
||||
return win32_utf8_wcstotext(str);
|
||||
else
|
||||
return wcstotext(str, ncodes);
|
||||
}
|
||||
|
||||
/* use macros to cause routines below to call interface layer */
|
||||
|
||||
#define texttowcs win32_texttowcs
|
||||
#define wcstotext win32_wcstotext
|
||||
#endif /* WIN32 */
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
/*
|
||||
* string_upper and string_lower are used for correct multibyte upper/lower
|
||||
* transformations localized strings. Returns pointers to transformated
|
||||
* string.
|
||||
*/
|
||||
char *
|
||||
wstring_upper(char *str)
|
||||
{
|
||||
wchar_t *workspace;
|
||||
text *in_text;
|
||||
text *out_text;
|
||||
char *result;
|
||||
int i;
|
||||
|
||||
in_text = cstring_to_text(str);
|
||||
workspace = texttowcs(in_text);
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
workspace[i] = towupper(workspace[i]);
|
||||
|
||||
out_text = wcstotext(workspace, i);
|
||||
result = text_to_cstring(out_text);
|
||||
|
||||
pfree(workspace);
|
||||
pfree(in_text);
|
||||
pfree(out_text);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
char *
|
||||
wstring_lower(char *str)
|
||||
{
|
||||
wchar_t *workspace;
|
||||
text *in_text;
|
||||
text *out_text;
|
||||
char *result;
|
||||
int i;
|
||||
|
||||
in_text = cstring_to_text(str);
|
||||
workspace = texttowcs(in_text);
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
workspace[i] = towlower(workspace[i]);
|
||||
|
||||
out_text = wcstotext(workspace, i);
|
||||
result = text_to_cstring(out_text);
|
||||
|
||||
pfree(workspace);
|
||||
pfree(in_text);
|
||||
pfree(out_text);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
* lower
|
||||
@ -332,52 +56,15 @@ wstring_lower(char *str)
|
||||
Datum
|
||||
lower(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
text *in_string = PG_GETARG_TEXT_PP(0);
|
||||
char *out_string;
|
||||
text *result;
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte.
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_PP(0);
|
||||
text *result;
|
||||
wchar_t *workspace;
|
||||
int i;
|
||||
out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
|
||||
result = cstring_to_text(out_string);
|
||||
pfree(out_string);
|
||||
|
||||
workspace = texttowcs(string);
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
workspace[i] = towlower(workspace[i]);
|
||||
|
||||
result = wcstotext(workspace, i);
|
||||
|
||||
pfree(workspace);
|
||||
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P_COPY(0);
|
||||
char *ptr;
|
||||
int m;
|
||||
|
||||
/*
|
||||
* Since we copied the string, we can scribble directly on the value
|
||||
*/
|
||||
ptr = VARDATA(string);
|
||||
m = VARSIZE(string) - VARHDRSZ;
|
||||
|
||||
while (m-- > 0)
|
||||
{
|
||||
*ptr = tolower((unsigned char) *ptr);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
PG_RETURN_TEXT_P(string);
|
||||
}
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
|
||||
@ -398,52 +85,15 @@ lower(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
upper(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
text *in_string = PG_GETARG_TEXT_PP(0);
|
||||
char *out_string;
|
||||
text *result;
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte.
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_PP(0);
|
||||
text *result;
|
||||
wchar_t *workspace;
|
||||
int i;
|
||||
out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
|
||||
result = cstring_to_text(out_string);
|
||||
pfree(out_string);
|
||||
|
||||
workspace = texttowcs(string);
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
workspace[i] = towupper(workspace[i]);
|
||||
|
||||
result = wcstotext(workspace, i);
|
||||
|
||||
pfree(workspace);
|
||||
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P_COPY(0);
|
||||
char *ptr;
|
||||
int m;
|
||||
|
||||
/*
|
||||
* Since we copied the string, we can scribble directly on the value
|
||||
*/
|
||||
ptr = VARDATA(string);
|
||||
m = VARSIZE(string) - VARHDRSZ;
|
||||
|
||||
while (m-- > 0)
|
||||
{
|
||||
*ptr = toupper((unsigned char) *ptr);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
PG_RETURN_TEXT_P(string);
|
||||
}
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
|
||||
@ -467,64 +117,15 @@ upper(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
initcap(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
text *in_string = PG_GETARG_TEXT_PP(0);
|
||||
char *out_string;
|
||||
text *result;
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte.
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_PP(0);
|
||||
text *result;
|
||||
wchar_t *workspace;
|
||||
int wasalnum = 0;
|
||||
int i;
|
||||
out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
|
||||
result = cstring_to_text(out_string);
|
||||
pfree(out_string);
|
||||
|
||||
workspace = texttowcs(string);
|
||||
|
||||
for (i = 0; workspace[i] != 0; i++)
|
||||
{
|
||||
if (wasalnum)
|
||||
workspace[i] = towlower(workspace[i]);
|
||||
else
|
||||
workspace[i] = towupper(workspace[i]);
|
||||
wasalnum = iswalnum(workspace[i]);
|
||||
}
|
||||
|
||||
result = wcstotext(workspace, i);
|
||||
|
||||
pfree(workspace);
|
||||
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
else
|
||||
#endif /* USE_WIDE_UPPER_LOWER */
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P_COPY(0);
|
||||
int wasalnum = 0;
|
||||
char *ptr;
|
||||
int m;
|
||||
|
||||
/*
|
||||
* Since we copied the string, we can scribble directly on the value
|
||||
*/
|
||||
ptr = VARDATA(string);
|
||||
m = VARSIZE(string) - VARHDRSZ;
|
||||
|
||||
while (m-- > 0)
|
||||
{
|
||||
if (wasalnum)
|
||||
*ptr = tolower((unsigned char) *ptr);
|
||||
else
|
||||
*ptr = toupper((unsigned char) *ptr);
|
||||
wasalnum = isalnum((unsigned char) *ptr);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
PG_RETURN_TEXT_P(string);
|
||||
}
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
/* -----------------------------------------------------------------------
|
||||
* formatting.h
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.18 2008/01/01 19:45:59 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.19 2008/06/23 19:27:19 momjian Exp $
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group
|
||||
@ -21,6 +21,10 @@
|
||||
#include "fmgr.h"
|
||||
|
||||
|
||||
extern char *str_tolower(char *buff, size_t nbytes);
|
||||
extern char *str_toupper(char *buff, size_t nbytes);
|
||||
extern char *str_initcap(char *buff, size_t nbytes);
|
||||
|
||||
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
|
||||
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
|
||||
extern Datum interval_to_char(PG_FUNCTION_ARGS);
|
||||
|
Loading…
x
Reference in New Issue
Block a user