From e3f5bc3492efa1fa6d20491bb3134c9b32f30b7d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 21 Sep 2001 15:27:38 +0000 Subject: [PATCH] Fix type_maximum_size() to give the right answer in MULTIBYTE cases. Avoid use of prototype-less function pointers in MB code. --- src/backend/utils/adt/format_type.c | 19 ++++++++++++--- src/backend/utils/mb/mbutils.c | 32 ++++++++++++++---------- src/backend/utils/mb/wchar.c | 25 +++++++++++++------ src/include/mb/pg_wchar.h | 38 ++++++++++++++++++----------- 4 files changed, 76 insertions(+), 38 deletions(-) diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c index 3f209d63b8..59b516556c 100644 --- a/src/backend/utils/adt/format_type.c +++ b/src/backend/utils/adt/format_type.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/format_type.c,v 1.14 2001/08/09 18:28:18 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/format_type.c,v 1.15 2001/09/21 15:27:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,10 @@ #include "utils/builtins.h" #include "utils/numeric.h" #include "utils/syscache.h" +#ifdef MULTIBYTE +#include "mb/pg_wchar.h" +#endif + #define MAX_INT32_LEN 11 #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str)) @@ -249,9 +253,9 @@ format_type_internal(Oid type_oid, int32 typemod, bool allow_invalid) /* - * type_maximum_size --- determine maximum length of a varlena column + * type_maximum_size --- determine maximum width of a varlena column * - * If the max length is indeterminate, return -1. In particular, we return + * If the max width is indeterminate, return -1. In particular, we return * -1 for any type not known to this routine. We assume the caller has * already determined that the type is a varlena type, so it's not * necessary to look up the type's pg_type tuple here. @@ -271,7 +275,14 @@ type_maximum_size(Oid type_oid, int32 typemod) case BPCHAROID: case VARCHAROID: /* typemod includes varlena header */ +#ifdef MULTIBYTE + /* typemod is in characters not bytes */ + return (typemod - VARHDRSZ) * + pg_encoding_max_length(GetDatabaseEncoding()) + + VARHDRSZ; +#else return typemod; +#endif case NUMERICOID: /* precision (ie, max # of digits) is in upper bits of typmod */ @@ -291,7 +302,7 @@ type_maximum_size(Oid type_oid, int32 typemod) + 2 * sizeof(int32); } - /* Unknown type, or unlimited-length type such as 'text' */ + /* Unknown type, or unlimited-width type such as 'text' */ return -1; } diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 4f15529aed..177cd3a1d6 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -3,7 +3,7 @@ * client encoding and server internal encoding. * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: mbutils.c,v 1.22 2001/09/09 01:15:11 ishii Exp $ + * $Id: mbutils.c,v 1.23 2001/09/21 15:27:38 tgl Exp $ */ #include "postgres.h" @@ -24,10 +24,10 @@ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[ PG_SQL_ASCII ]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[ PG_SQL_ASCII ]; -static void (*client_to_mic) (); /* something to MIC */ -static void (*client_from_mic) (); /* MIC to something */ -static void (*server_to_mic) (); /* something to MIC */ -static void (*server_from_mic) (); /* MIC to something */ +static to_mic_converter client_to_mic; /* something to MIC */ +static from_mic_converter client_from_mic; /* MIC to something */ +static to_mic_converter server_to_mic; /* something to MIC */ +static from_mic_converter server_from_mic; /* MIC to something */ /* * find encoding table entry by encoding @@ -60,7 +60,9 @@ pg_get_enconv_by_encoding(int encoding) * appropriate function found, set to 0. */ int -pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)()) +pg_find_encoding_converters(int src, int dest, + to_mic_converter *src_to_mic, + from_mic_converter *dest_from_mic) { if (src == dest) { /* src == dest? */ @@ -132,7 +134,7 @@ pg_set_client_encoding(int encoding) * returns the current client encoding */ int -pg_get_client_encoding() +pg_get_client_encoding(void) { Assert(ClientEncoding); return (ClientEncoding->encoding); @@ -142,7 +144,7 @@ pg_get_client_encoding() * returns the current client encoding name */ const char * -pg_get_client_encoding_name() +pg_get_client_encoding_name(void) { Assert(ClientEncoding); return (ClientEncoding->name); @@ -176,7 +178,9 @@ pg_get_client_encoding_name() * in the length of the string --- is this enough? */ unsigned char * -pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)()) +pg_do_encoding_conversion(unsigned char *src, int len, + to_mic_converter src_to_mic, + from_mic_converter dest_from_mic) { unsigned char *result = src; unsigned char *buf; @@ -212,7 +216,8 @@ pg_convert(PG_FUNCTION_ARGS) Name s = PG_GETARG_NAME(1); int encoding = pg_char_to_encoding(NameStr(*s)); int db_encoding = DatabaseEncoding->encoding; - void (*src)(), (*dest)(); + to_mic_converter src; + from_mic_converter dest; unsigned char *result; text *retval; @@ -253,7 +258,8 @@ pg_convert2(PG_FUNCTION_ARGS) int src_encoding = pg_char_to_encoding(src_encoding_name); char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); int dest_encoding = pg_char_to_encoding(dest_encoding_name); - void (*src)(), (*dest)(); + to_mic_converter src; + from_mic_converter dest; unsigned char *result; text *retval; @@ -446,14 +452,14 @@ SetDatabaseEncoding(int encoding) } int -GetDatabaseEncoding() +GetDatabaseEncoding(void) { Assert(DatabaseEncoding); return (DatabaseEncoding->encoding); } const char * -GetDatabaseEncodingName() +GetDatabaseEncodingName(void) { Assert(DatabaseEncoding); return (DatabaseEncoding->name); diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 114b7f2623..2da50bdd17 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,21 +1,21 @@ /* * conversion functions between pg_wchar and multi-byte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.20 2001/09/11 04:50:36 ishii Exp $ + * $Id: wchar.c,v 1.21 2001/09/21 15:27:38 tgl Exp $ * * WIN1250 client encoding updated by Pavel Behal * */ /* can be used in either frontend or backend */ -#include "postgres_fe.h" -#include "mb/pg_wchar.h" - #ifdef FRONTEND - #define Assert(condition) +#include "postgres_fe.h" +#define Assert(condition) #else - #include "postgres.h" +#include "postgres.h" #endif +#include "mb/pg_wchar.h" + /* * conversion to pg_wchar is done by "table driven." @@ -499,6 +499,17 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr) ((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr))); } +/* + * fetch maximum length of a char encoding + */ +int +pg_encoding_max_length(int encoding) +{ + Assert(PG_VALID_ENCODING(encoding)); + + return pg_wchar_table[encoding].maxmblen; +} + #ifndef FRONTEND /* * Verify mbstr to make sure that it has a valid character sequence. @@ -517,7 +528,7 @@ pg_verifymbstr(const unsigned char *mbstr, int len) int slen = 0; /* we do not check single byte encodings */ - if (pg_wchar_table[GetDatabaseEncoding()].maxmblen <= 1) + if (pg_encoding_max_length(GetDatabaseEncoding()) <= 1) return NULL; while (len > 0 && *mbstr) diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 35bb58a152..d27e316bde 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.30 2001/09/11 04:50:36 ishii Exp $ */ +/* $Id: pg_wchar.h,v 1.31 2001/09/21 15:27:38 tgl Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -17,7 +17,6 @@ */ #ifdef MULTIBYTE typedef unsigned int pg_wchar; - #else #define pg_wchar char #endif @@ -152,6 +151,9 @@ extern pg_encname *pg_char_to_encname_struct(const char *name); extern int pg_char_to_encoding(const char *s); extern const char *pg_encoding_to_char(int encoding); +typedef void (*to_mic_converter) (unsigned char *l, unsigned char *p, int len); +typedef void (*from_mic_converter) (unsigned char *mic, unsigned char *p, int len); + /* * The backend encoding conversion routines * Careful: @@ -162,11 +164,11 @@ extern const char *pg_encoding_to_char(int encoding); #ifndef FRONTEND typedef struct pg_enconv { - pg_enc encoding; /* encoding identificator */ - void (*to_mic) (); /* client encoding to MIC */ - void (*from_mic) (); /* MIC to client encoding */ - void (*to_unicode) (); /* client encoding to UTF-8 */ - void (*from_unicode) (); /* UTF-8 to client encoding */ + pg_enc encoding; /* encoding identifier */ + to_mic_converter to_mic; /* client encoding to MIC */ + from_mic_converter from_mic; /* MIC to client encoding */ + to_mic_converter to_unicode; /* client encoding to UTF-8 */ + from_mic_converter from_unicode; /* UTF-8 to client encoding */ } pg_enconv; extern pg_enconv pg_enconv_tbl[]; @@ -177,13 +179,16 @@ extern pg_enconv *pg_get_enconv_by_encoding(int encoding); /* * pg_wchar stuff */ +typedef int (*mb2wchar_with_len_converter) (const unsigned char *from, + pg_wchar *to, + int len); +typedef int (*mblen_converter) (const unsigned char *mbstr); + typedef struct { - int (*mb2wchar_with_len) (); /* convert a multi-byte - * string to a wchar */ - int (*mblen) (); /* returns the length of a multi-byte word */ - int maxmblen; /* max bytes for a letter in this charset */ - + mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multi-byte string to a wchar */ + mblen_converter mblen; /* returns the length of a multi-byte char */ + int maxmblen; /* max bytes for a char in this charset */ } pg_wchar_tbl; extern pg_wchar_tbl pg_wchar_table[]; @@ -220,6 +225,7 @@ extern int pg_mbstrlen(const unsigned char *); extern int pg_mbstrlen_with_len(const unsigned char *, int); extern int pg_mbcliplen(const unsigned char *, int, int); extern int pg_mbcharcliplen(const unsigned char *, int, int); +extern int pg_encoding_max_length(int); extern int pg_set_client_encoding(int); extern int pg_get_client_encoding(void); @@ -233,8 +239,12 @@ extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); extern int pg_utf_mblen(const unsigned char *); -extern int pg_find_encoding_converters(int, int, void (**)(), void (**)()); -extern unsigned char *pg_do_encoding_conversion(unsigned char *, int, void (*)(), void (*)()); +extern int pg_find_encoding_converters(int src, int dest, + to_mic_converter *src_to_mic, + from_mic_converter *dest_from_mic); +extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, + to_mic_converter src_to_mic, + from_mic_converter dest_from_mic); extern unsigned char *pg_client_to_server(unsigned char *, int); extern unsigned char *pg_server_to_client(unsigned char *, int);