Move wchar2char() and char2wchar() from tsearch into /mb to be easier to
use for other modules; also move pnstrdup(). Clean up code slightly.
This commit is contained in:
parent
3eb9da524d
commit
9de09c087d
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -16,125 +16,8 @@
|
|||||||
#include "tsearch/ts_locale.h"
|
#include "tsearch/ts_locale.h"
|
||||||
#include "tsearch/ts_public.h"
|
#include "tsearch/ts_public.h"
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_WIDE_UPPER_LOWER
|
#ifdef USE_WIDE_UPPER_LOWER
|
||||||
|
|
||||||
/*
|
|
||||||
* wchar2char --- convert wide characters to multibyte format
|
|
||||||
*
|
|
||||||
* This has the same API as the standard wcstombs() function; in particular,
|
|
||||||
* tolen is the maximum number of bytes to store at *to, and *from must be
|
|
||||||
* zero-terminated. The output will be zero-terminated iff there is room.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
wchar2char(char *to, const wchar_t *from, size_t tolen)
|
|
||||||
{
|
|
||||||
if (tolen == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
{
|
|
||||||
int r;
|
|
||||||
|
|
||||||
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
|
|
||||||
NULL, NULL);
|
|
||||||
|
|
||||||
if (r <= 0)
|
|
||||||
return (size_t) -1;
|
|
||||||
|
|
||||||
Assert(r <= tolen);
|
|
||||||
|
|
||||||
/* Microsoft counts the zero terminator in the result */
|
|
||||||
return r - 1;
|
|
||||||
}
|
|
||||||
#endif /* WIN32 */
|
|
||||||
|
|
||||||
return wcstombs(to, from, tolen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* char2wchar --- convert multibyte characters to wide characters
|
|
||||||
*
|
|
||||||
* This has almost the API of mbstowcs(), except that *from need not be
|
|
||||||
* null-terminated; instead, the number of input bytes is specified as
|
|
||||||
* fromlen. Also, we ereport() rather than returning -1 for invalid
|
|
||||||
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
|
|
||||||
* The output will be zero-terminated iff there is room.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
|
|
||||||
{
|
|
||||||
if (tolen == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
{
|
|
||||||
int r;
|
|
||||||
|
|
||||||
/* stupid Microsloth API does not work for zero-length input */
|
|
||||||
if (fromlen == 0)
|
|
||||||
r = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
|
|
||||||
|
|
||||||
if (r <= 0)
|
|
||||||
{
|
|
||||||
/* see notes in oracle_compat.c about error reporting */
|
|
||||||
pg_verifymbstr(from, fromlen, false);
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
||||||
errmsg("invalid multibyte character for locale"),
|
|
||||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert(r < tolen);
|
|
||||||
to[r] = 0;
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
#endif /* WIN32 */
|
|
||||||
|
|
||||||
if (lc_ctype_is_c())
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
|
|
||||||
* allocated with sufficient space
|
|
||||||
*/
|
|
||||||
return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* mbstowcs requires ending '\0'
|
|
||||||
*/
|
|
||||||
char *str = pnstrdup(from, fromlen);
|
|
||||||
size_t result;
|
|
||||||
|
|
||||||
result = mbstowcs(to, str, tolen);
|
|
||||||
|
|
||||||
pfree(str);
|
|
||||||
|
|
||||||
if (result == (size_t) -1)
|
|
||||||
{
|
|
||||||
pg_verifymbstr(from, fromlen, false);
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
||||||
errmsg("invalid multibyte character for locale"),
|
|
||||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result < tolen)
|
|
||||||
to[result] = 0;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
t_isdigit(const char *ptr)
|
t_isdigit(const char *ptr)
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
|
|||||||
bsearch(&key, s->stop, s->len,
|
bsearch(&key, s->stop, s->len,
|
||||||
sizeof(char *), comparestr)) ? true : false;
|
sizeof(char *), comparestr)) ? true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
|
||||||
pnstrdup(const char *in, int len)
|
|
||||||
{
|
|
||||||
char *out = palloc(len + 1);
|
|
||||||
|
|
||||||
memcpy(out, in, len);
|
|
||||||
out[len] = '\0';
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* (currently mule internal code (mic) is used)
|
* (currently mule internal code (mic) is used)
|
||||||
* Tatsuo Ishii
|
* Tatsuo Ishii
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef USE_WIDE_UPPER_LOWER
|
||||||
|
|
||||||
|
/*
|
||||||
|
* wchar2char --- convert wide characters to multibyte format
|
||||||
|
*
|
||||||
|
* This has the same API as the standard wcstombs() function; in particular,
|
||||||
|
* tolen is the maximum number of bytes to store at *to, and *from must be
|
||||||
|
* zero-terminated. The output will be zero-terminated iff there is room.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
wchar2char(char *to, const wchar_t *from, size_t tolen)
|
||||||
|
{
|
||||||
|
size_t result;
|
||||||
|
|
||||||
|
if (tolen == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
/*
|
||||||
|
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
|
||||||
|
* and for some reason mbstowcs and wcstombs won't do this for us,
|
||||||
|
* so we use MultiByteToWideChar().
|
||||||
|
*/
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
{
|
||||||
|
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
|
||||||
|
NULL, NULL);
|
||||||
|
/* A zero return is failure */
|
||||||
|
if (result <= 0)
|
||||||
|
result = -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Assert(result <= tolen);
|
||||||
|
/* Microsoft counts the zero terminator in the result */
|
||||||
|
result--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* WIN32 */
|
||||||
|
result = wcstombs(to, from, tolen);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* char2wchar --- convert multibyte characters to wide characters
|
||||||
|
*
|
||||||
|
* This has almost the API of mbstowcs(), except that *from need not be
|
||||||
|
* null-terminated; instead, the number of input bytes is specified as
|
||||||
|
* fromlen. Also, we ereport() rather than returning -1 for invalid
|
||||||
|
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
|
||||||
|
* The output will be zero-terminated iff there is room.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
|
||||||
|
{
|
||||||
|
size_t result;
|
||||||
|
|
||||||
|
if (tolen == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
/* See WIN32 "Unicode" comment above */
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
{
|
||||||
|
/* Win32 API does not work for zero-length input */
|
||||||
|
if (fromlen == 0)
|
||||||
|
result = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
|
||||||
|
/* A zero return is failure */
|
||||||
|
if (result == 0)
|
||||||
|
result = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != -1)
|
||||||
|
{
|
||||||
|
Assert(result < tolen);
|
||||||
|
/* Append trailing null wchar (MultiByteToWideChar() does not) */
|
||||||
|
to[result] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* WIN32 */
|
||||||
|
{
|
||||||
|
if (lc_ctype_is_c())
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
|
||||||
|
* allocated with sufficient space
|
||||||
|
*/
|
||||||
|
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* mbstowcs requires ending '\0' */
|
||||||
|
char *str = pnstrdup(from, fromlen);
|
||||||
|
|
||||||
|
result = mbstowcs(to, str, tolen);
|
||||||
|
pfree(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == -1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Invalid multibyte character encountered. We try to give a useful
|
||||||
|
* error message by letting pg_verifymbstr check the string. But it's
|
||||||
|
* possible that the string is OK to us, and not OK to mbstowcs ---
|
||||||
|
* this suggests that the LC_CTYPE locale is different from the
|
||||||
|
* database encoding. Give a generic error message if verifymbstr
|
||||||
|
* can't find anything wrong.
|
||||||
|
*/
|
||||||
|
pg_verifymbstr(from, fromlen, false); /* might not return */
|
||||||
|
/* but if it does ... */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||||
|
errmsg("invalid multibyte character for locale"),
|
||||||
|
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/* convert a multibyte string to a wchar */
|
/* convert a multibyte string to a wchar */
|
||||||
int
|
int
|
||||||
pg_mb2wchar(const char *from, pg_wchar *to)
|
pg_mb2wchar(const char *from, pg_wchar *to)
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
|
|||||||
pointer, size);
|
pointer, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Like pstrdup(), but append null byte */
|
||||||
|
char *
|
||||||
|
pnstrdup(const char *in, int len)
|
||||||
|
{
|
||||||
|
char *out = palloc(len + 1);
|
||||||
|
|
||||||
|
memcpy(out, in, len);
|
||||||
|
out[len] = '\0';
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MemoryContextSwitchTo
|
* MemoryContextSwitchTo
|
||||||
* Returns the current context; installs the given context.
|
* Returns the current context; installs the given context.
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* This is used both by the backend and by libpq, but should not be
|
* This is used both by the backend and by libpq, but should not be
|
||||||
@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
|
|||||||
extern int pg_encoding_max_length(int encoding);
|
extern int pg_encoding_max_length(int encoding);
|
||||||
extern int pg_database_encoding_max_length(void);
|
extern int pg_database_encoding_max_length(void);
|
||||||
|
|
||||||
|
#ifdef USE_WIDE_UPPER_LOWER
|
||||||
|
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
|
||||||
|
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void SetDefaultClientEncoding(void);
|
extern void SetDefaultClientEncoding(void);
|
||||||
extern int SetClientEncoding(int encoding, bool doit);
|
extern int SetClientEncoding(int encoding, bool doit);
|
||||||
extern void InitializeClientEncoding(void);
|
extern void InitializeClientEncoding(void);
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -33,9 +33,6 @@
|
|||||||
|
|
||||||
#ifdef USE_WIDE_UPPER_LOWER
|
#ifdef USE_WIDE_UPPER_LOWER
|
||||||
|
|
||||||
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
|
|
||||||
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
|
|
||||||
|
|
||||||
extern int t_isdigit(const char *ptr);
|
extern int t_isdigit(const char *ptr);
|
||||||
extern int t_isspace(const char *ptr);
|
extern int t_isspace(const char *ptr);
|
||||||
extern int t_isalpha(const char *ptr);
|
extern int t_isalpha(const char *ptr);
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -62,8 +62,6 @@ typedef struct
|
|||||||
extern char *get_tsearch_config_filename(const char *basename,
|
extern char *get_tsearch_config_filename(const char *basename,
|
||||||
const char *extension);
|
const char *extension);
|
||||||
|
|
||||||
extern char *pnstrdup(const char *in, int len);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Often useful stopword list management
|
* Often useful stopword list management
|
||||||
*/
|
*/
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.38 2008/01/01 19:45:59 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.39 2008/06/18 18:42:54 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -70,6 +70,8 @@ extern void pfree(void *pointer);
|
|||||||
|
|
||||||
extern void *repalloc(void *pointer, Size size);
|
extern void *repalloc(void *pointer, Size size);
|
||||||
|
|
||||||
|
extern char *pnstrdup(const char *in, int len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MemoryContextSwitchTo can't be a macro in standard C compilers.
|
* MemoryContextSwitchTo can't be a macro in standard C compilers.
|
||||||
* But we can make it an inline function when using GCC.
|
* But we can make it an inline function when using GCC.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user