mirror of https://github.com/postgres/postgres
Improve implementation of btrim/ltrim/rtrim: provide a special case for
single-byte encodings, and a direct C implementation of the single-argument forms (where spaces are always what gets trimmed). This is in preparation for using rtrim1() as the bpchar-to-text cast operator, but is a useful performance improvement even if we decide not to do that.
This commit is contained in:
parent
78d2156066
commit
11d5c82002
|
@ -2,26 +2,30 @@
|
|||
* oracle_compat.c
|
||||
* Oracle compatible functions.
|
||||
*
|
||||
* Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||
* Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
*
|
||||
* Author: Edmund Mergl <E.Mergl@bawue.de>
|
||||
* Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.43 2002/09/04 20:31:28 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.44 2003/05/23 22:33:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "mb/pg_wchar.h"
|
||||
|
||||
|
||||
static text *dotrim(const char *string, int stringlen,
|
||||
const char *set, int setlen,
|
||||
bool doltrim, bool dortrim);
|
||||
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
* lower
|
||||
|
@ -349,88 +353,194 @@ btrim(PG_FUNCTION_ARGS)
|
|||
text *string = PG_GETARG_TEXT_P(0);
|
||||
text *set = PG_GETARG_TEXT_P(1);
|
||||
text *ret;
|
||||
char *ptr,
|
||||
*end,
|
||||
*ptr2,
|
||||
*end2;
|
||||
int m;
|
||||
|
||||
char **mp;
|
||||
int mplen;
|
||||
char *p;
|
||||
int mblen;
|
||||
int len;
|
||||
|
||||
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
|
||||
(VARSIZE(set) - VARHDRSZ) <= 0)
|
||||
PG_RETURN_TEXT_P(string);
|
||||
|
||||
ptr = VARDATA(string);
|
||||
|
||||
len = m;
|
||||
mp = (char **) palloc(len * sizeof(char *));
|
||||
p = ptr;
|
||||
mplen = 0;
|
||||
|
||||
/* build the mb pointer array */
|
||||
while (len > 0)
|
||||
{
|
||||
mp[mplen++] = p;
|
||||
mblen = pg_mblen(p);
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
}
|
||||
mplen--;
|
||||
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
|
||||
|
||||
while (m > 0)
|
||||
{
|
||||
int str_len = pg_mblen(ptr);
|
||||
|
||||
ptr2 = VARDATA(set);
|
||||
while (ptr2 <= end2)
|
||||
{
|
||||
int set_len = pg_mblen(ptr2);
|
||||
|
||||
if (str_len == set_len &&
|
||||
memcmp(ptr, ptr2, str_len) == 0)
|
||||
break;
|
||||
ptr2 += set_len;
|
||||
}
|
||||
if (ptr2 > end2)
|
||||
break;
|
||||
ptr += str_len;
|
||||
m -= str_len;
|
||||
}
|
||||
|
||||
while (m > 0)
|
||||
{
|
||||
int str_len;
|
||||
|
||||
end = mp[mplen--];
|
||||
str_len = pg_mblen(end);
|
||||
ptr2 = VARDATA(set);
|
||||
while (ptr2 <= end2)
|
||||
{
|
||||
int set_len = pg_mblen(ptr2);
|
||||
|
||||
if (str_len == set_len &&
|
||||
memcmp(end, ptr2, str_len) == 0)
|
||||
break;
|
||||
ptr2 += set_len;
|
||||
}
|
||||
if (ptr2 > end2)
|
||||
break;
|
||||
m -= str_len;
|
||||
}
|
||||
pfree(mp);
|
||||
ret = (text *) palloc(VARHDRSZ + m);
|
||||
VARATT_SIZEP(ret) = VARHDRSZ + m;
|
||||
memcpy(VARDATA(ret), ptr, m);
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
VARDATA(set), VARSIZE(set) - VARHDRSZ,
|
||||
true, true);
|
||||
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
* btrim1 --- btrim with set fixed as ' '
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
Datum
|
||||
btrim1(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P(0);
|
||||
text *ret;
|
||||
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
" ", 1,
|
||||
true, true);
|
||||
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common implementation for btrim, ltrim, rtrim
|
||||
*/
|
||||
static text *
|
||||
dotrim(const char *string, int stringlen,
|
||||
const char *set, int setlen,
|
||||
bool doltrim, bool dortrim)
|
||||
{
|
||||
text *result;
|
||||
int i;
|
||||
|
||||
/* Nothing to do if either string or set is empty */
|
||||
if (stringlen > 0 && setlen > 0)
|
||||
{
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
{
|
||||
/*
|
||||
* In the multibyte-encoding case, build arrays of pointers to
|
||||
* character starts, so that we can avoid inefficient checks in
|
||||
* the inner loops.
|
||||
*/
|
||||
const char **stringchars;
|
||||
const char **setchars;
|
||||
int *stringmblen;
|
||||
int *setmblen;
|
||||
int stringnchars;
|
||||
int setnchars;
|
||||
int resultndx;
|
||||
int resultnchars;
|
||||
const char *p;
|
||||
int len;
|
||||
int mblen;
|
||||
const char *str_pos;
|
||||
int str_len;
|
||||
|
||||
stringchars = (const char **) palloc(stringlen * sizeof(char *));
|
||||
stringmblen = (int *) palloc(stringlen * sizeof(int));
|
||||
stringnchars = 0;
|
||||
p = string;
|
||||
len = stringlen;
|
||||
while (len > 0)
|
||||
{
|
||||
stringchars[stringnchars] = p;
|
||||
stringmblen[stringnchars] = mblen = pg_mblen(p);
|
||||
stringnchars++;
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
}
|
||||
|
||||
setchars = (const char **) palloc(setlen * sizeof(char *));
|
||||
setmblen = (int *) palloc(setlen * sizeof(int));
|
||||
setnchars = 0;
|
||||
p = set;
|
||||
len = setlen;
|
||||
while (len > 0)
|
||||
{
|
||||
setchars[setnchars] = p;
|
||||
setmblen[setnchars] = mblen = pg_mblen(p);
|
||||
setnchars++;
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
}
|
||||
|
||||
resultndx = 0; /* index in stringchars[] */
|
||||
resultnchars = stringnchars;
|
||||
|
||||
if (doltrim)
|
||||
{
|
||||
while (resultnchars > 0)
|
||||
{
|
||||
str_pos = stringchars[resultndx];
|
||||
str_len = stringmblen[resultndx];
|
||||
for (i = 0; i < setnchars; i++)
|
||||
{
|
||||
if (str_len == setmblen[i] &&
|
||||
memcmp(str_pos, setchars[i], str_len) == 0)
|
||||
break;
|
||||
}
|
||||
if (i >= setnchars)
|
||||
break; /* no match here */
|
||||
string += str_len;
|
||||
stringlen -= str_len;
|
||||
resultndx++;
|
||||
resultnchars--;
|
||||
}
|
||||
}
|
||||
|
||||
if (dortrim)
|
||||
{
|
||||
while (resultnchars > 0)
|
||||
{
|
||||
str_pos = stringchars[resultndx + resultnchars - 1];
|
||||
str_len = stringmblen[resultndx + resultnchars - 1];
|
||||
for (i = 0; i < setnchars; i++)
|
||||
{
|
||||
if (str_len == setmblen[i] &&
|
||||
memcmp(str_pos, setchars[i], str_len) == 0)
|
||||
break;
|
||||
}
|
||||
if (i >= setnchars)
|
||||
break; /* no match here */
|
||||
stringlen -= str_len;
|
||||
resultnchars--;
|
||||
}
|
||||
}
|
||||
|
||||
pfree(stringchars);
|
||||
pfree(stringmblen);
|
||||
pfree(setchars);
|
||||
pfree(setmblen);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* In the single-byte-encoding case, we don't need such overhead.
|
||||
*/
|
||||
if (doltrim)
|
||||
{
|
||||
while (stringlen > 0)
|
||||
{
|
||||
char str_ch = *string;
|
||||
|
||||
for (i = 0; i < setlen; i++)
|
||||
{
|
||||
if (str_ch == set[i])
|
||||
break;
|
||||
}
|
||||
if (i >= setlen)
|
||||
break; /* no match here */
|
||||
string++;
|
||||
stringlen--;
|
||||
}
|
||||
}
|
||||
|
||||
if (dortrim)
|
||||
{
|
||||
while (stringlen > 0)
|
||||
{
|
||||
char str_ch = string[stringlen - 1];
|
||||
|
||||
for (i = 0; i < setlen; i++)
|
||||
{
|
||||
if (str_ch == set[i])
|
||||
break;
|
||||
}
|
||||
if (i >= setlen)
|
||||
break; /* no match here */
|
||||
stringlen--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Return selected portion of string */
|
||||
result = (text *) palloc(VARHDRSZ + stringlen);
|
||||
VARATT_SIZEP(result) = VARHDRSZ + stringlen;
|
||||
memcpy(VARDATA(result), string, stringlen);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
* byteatrim
|
||||
|
@ -525,44 +635,32 @@ ltrim(PG_FUNCTION_ARGS)
|
|||
text *string = PG_GETARG_TEXT_P(0);
|
||||
text *set = PG_GETARG_TEXT_P(1);
|
||||
text *ret;
|
||||
char *ptr,
|
||||
*ptr2,
|
||||
*end2;
|
||||
int m;
|
||||
|
||||
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
|
||||
(VARSIZE(set) - VARHDRSZ) <= 0)
|
||||
PG_RETURN_TEXT_P(string);
|
||||
|
||||
ptr = VARDATA(string);
|
||||
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
|
||||
|
||||
while (m > 0)
|
||||
{
|
||||
int str_len = pg_mblen(ptr);
|
||||
|
||||
ptr2 = VARDATA(set);
|
||||
while (ptr2 <= end2)
|
||||
{
|
||||
int set_len = pg_mblen(ptr2);
|
||||
|
||||
if (str_len == set_len &&
|
||||
memcmp(ptr, ptr2, str_len) == 0)
|
||||
break;
|
||||
ptr2 += set_len;
|
||||
}
|
||||
if (ptr2 > end2)
|
||||
break;
|
||||
ptr += str_len;
|
||||
m -= str_len;
|
||||
}
|
||||
ret = (text *) palloc(VARHDRSZ + m);
|
||||
VARATT_SIZEP(ret) = VARHDRSZ + m;
|
||||
memcpy(VARDATA(ret), ptr, m);
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
VARDATA(set), VARSIZE(set) - VARHDRSZ,
|
||||
true, false);
|
||||
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
* ltrim1 --- ltrim with set fixed as ' '
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
Datum
|
||||
ltrim1(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P(0);
|
||||
text *ret;
|
||||
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
" ", 1,
|
||||
true, false);
|
||||
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
*
|
||||
|
@ -586,64 +684,28 @@ rtrim(PG_FUNCTION_ARGS)
|
|||
text *set = PG_GETARG_TEXT_P(1);
|
||||
text *ret;
|
||||
|
||||
char *ptr,
|
||||
*end,
|
||||
*ptr2,
|
||||
*end2;
|
||||
int m;
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
VARDATA(set), VARSIZE(set) - VARHDRSZ,
|
||||
false, true);
|
||||
|
||||
char **mp;
|
||||
int mplen;
|
||||
char *p;
|
||||
int mblen;
|
||||
int len;
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
||||
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
|
||||
(VARSIZE(set) - VARHDRSZ) <= 0)
|
||||
PG_RETURN_TEXT_P(string);
|
||||
/********************************************************************
|
||||
*
|
||||
* rtrim1 --- rtrim with set fixed as ' '
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
ptr = VARDATA(string);
|
||||
Datum
|
||||
rtrim1(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P(0);
|
||||
text *ret;
|
||||
|
||||
len = m;
|
||||
mp = (char **) palloc(len * sizeof(char *));
|
||||
p = ptr;
|
||||
mplen = 0;
|
||||
|
||||
/* build the mb pointer array */
|
||||
while (len > 0)
|
||||
{
|
||||
mp[mplen++] = p;
|
||||
mblen = pg_mblen(p);
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
}
|
||||
mplen--;
|
||||
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
|
||||
|
||||
while (m > 0)
|
||||
{
|
||||
int str_len;
|
||||
|
||||
end = mp[mplen--];
|
||||
str_len = pg_mblen(end);
|
||||
ptr2 = VARDATA(set);
|
||||
while (ptr2 <= end2)
|
||||
{
|
||||
int set_len = pg_mblen(ptr2);
|
||||
|
||||
if (str_len == set_len &&
|
||||
memcmp(end, ptr2, str_len) == 0)
|
||||
break;
|
||||
ptr2 += set_len;
|
||||
}
|
||||
if (ptr2 > end2)
|
||||
break;
|
||||
m -= str_len;
|
||||
}
|
||||
pfree(mp);
|
||||
ret = (text *) palloc(VARHDRSZ + m);
|
||||
VARATT_SIZEP(ret) = VARHDRSZ + m;
|
||||
memcpy(VARDATA(ret), ptr, m);
|
||||
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
|
||||
" ", 1,
|
||||
false, true);
|
||||
|
||||
PG_RETURN_TEXT_P(ret);
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: catversion.h,v 1.194 2003/05/15 15:50:19 petere Exp $
|
||||
* $Id: catversion.h,v 1.195 2003/05/23 22:33:22 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -53,6 +53,6 @@
|
|||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 200305151
|
||||
#define CATALOG_VERSION_NO 200305231
|
||||
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_proc.h,v 1.300 2003/05/15 15:50:19 petere Exp $
|
||||
* $Id: pg_proc.h,v 1.301 2003/05/23 22:33:22 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* The script catalog/genbki.sh reads this file and generates .bki
|
||||
|
@ -2134,9 +2134,9 @@ DESCR("left-pad string to length");
|
|||
DATA(insert OID = 874 ( rpad PGNSP PGUID 12 f f t f i 3 25 "25 23 25" rpad - _null_ ));
|
||||
DESCR("right-pad string to length");
|
||||
DATA(insert OID = 875 ( ltrim PGNSP PGUID 12 f f t f i 2 25 "25 25" ltrim - _null_ ));
|
||||
DESCR("left-pad string to length");
|
||||
DESCR("trim selected characters from left end of string");
|
||||
DATA(insert OID = 876 ( rtrim PGNSP PGUID 12 f f t f i 2 25 "25 25" rtrim - _null_ ));
|
||||
DESCR("right-pad string to length");
|
||||
DESCR("trim selected characters from right end of string");
|
||||
DATA(insert OID = 877 ( substr PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
|
||||
DESCR("return portion of string");
|
||||
DATA(insert OID = 878 ( translate PGNSP PGUID 12 f f t f i 3 25 "25 25 25" translate - _null_ ));
|
||||
|
@ -2145,16 +2145,16 @@ DATA(insert OID = 879 ( lpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select
|
|||
DESCR("left-pad string to length");
|
||||
DATA(insert OID = 880 ( rpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select rpad($1, $2, \' \')" - _null_ ));
|
||||
DESCR("right-pad string to length");
|
||||
DATA(insert OID = 881 ( ltrim PGNSP PGUID 14 f f t f i 1 25 "25" "select ltrim($1, \' \')" - _null_ ));
|
||||
DESCR("remove initial characters from string");
|
||||
DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ ));
|
||||
DESCR("remove trailing characters from string");
|
||||
DATA(insert OID = 881 ( ltrim PGNSP PGUID 12 f f t f i 1 25 "25" ltrim1 - _null_ ));
|
||||
DESCR("trim spaces from left end of string");
|
||||
DATA(insert OID = 882 ( rtrim PGNSP PGUID 12 f f t f i 1 25 "25" rtrim1 - _null_ ));
|
||||
DESCR("trim spaces from right end of string");
|
||||
DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ ));
|
||||
DESCR("return portion of string");
|
||||
DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ ));
|
||||
DESCR("trim both ends of string");
|
||||
DATA(insert OID = 885 ( btrim PGNSP PGUID 14 f f t f i 1 25 "25" "select btrim($1, \' \')" - _null_ ));
|
||||
DESCR("trim both ends of string");
|
||||
DESCR("trim selected characters from both ends of string");
|
||||
DATA(insert OID = 885 ( btrim PGNSP PGUID 12 f f t f i 1 25 "25" btrim1 - _null_ ));
|
||||
DESCR("trim spaces from both ends of string");
|
||||
|
||||
DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
|
||||
DESCR("return portion of string");
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: builtins.h,v 1.217 2003/05/15 15:50:20 petere Exp $
|
||||
* $Id: builtins.h,v 1.218 2003/05/23 22:33:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -608,9 +608,12 @@ extern Datum initcap(PG_FUNCTION_ARGS);
|
|||
extern Datum lpad(PG_FUNCTION_ARGS);
|
||||
extern Datum rpad(PG_FUNCTION_ARGS);
|
||||
extern Datum btrim(PG_FUNCTION_ARGS);
|
||||
extern Datum btrim1(PG_FUNCTION_ARGS);
|
||||
extern Datum byteatrim(PG_FUNCTION_ARGS);
|
||||
extern Datum ltrim(PG_FUNCTION_ARGS);
|
||||
extern Datum ltrim1(PG_FUNCTION_ARGS);
|
||||
extern Datum rtrim(PG_FUNCTION_ARGS);
|
||||
extern Datum rtrim1(PG_FUNCTION_ARGS);
|
||||
extern Datum translate(PG_FUNCTION_ARGS);
|
||||
extern Datum chr(PG_FUNCTION_ARGS);
|
||||
extern Datum repeat(PG_FUNCTION_ARGS);
|
||||
|
|
Loading…
Reference in New Issue