diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c index 74c5452fbd..9dc7105574 100644 --- a/contrib/ltree/lquery_op.c +++ b/contrib/ltree/lquery_op.c @@ -1,13 +1,14 @@ /* * op function for ltree and lquery * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/lquery_op.c,v 1.12 2008/05/12 00:00:42 alvherre Exp $ + * $PostgreSQL: pgsql/contrib/ltree/lquery_op.c,v 1.13 2008/06/30 18:30:48 teodor Exp $ */ #include "postgres.h" #include #include "utils/array.h" +#include "utils/formatting.h" #include "ltree.h" PG_FUNCTION_INFO_V1(ltq_regex); @@ -32,23 +33,24 @@ static char * getlexeme(char *start, char *end, int *len) { char *ptr; - - while (start < end && *start == '_') - start++; + int charlen; + + while (start < end && (charlen = pg_mblen(start)) == 1 && t_iseq(start,'_') ) + start += charlen; ptr = start; - if (ptr == end) + if (ptr >= end) return NULL; - while (ptr < end && *ptr != '_') - ptr++; + while (ptr < end && !( (charlen = pg_mblen(ptr)) == 1 && t_iseq(ptr, '_') ) ) + ptr += charlen; *len = ptr - start; return start; } bool - compare_subnode(ltree_level * t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend) +compare_subnode(ltree_level * t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend) { char *endt = t->name + t->len; char *endq = qn + len; @@ -85,6 +87,21 @@ bool return true; } +int +ltree_strncasecmp(const char *a, const char *b, size_t s) +{ + char *al = str_tolower(a, s); + char *bl = str_tolower(b, s); + int res; + + res = strncmp(al, bl,s); + + pfree(al); + pfree(bl); + + return res; +} + static bool checkLevel(lquery_level * curq, ltree_level * curt) { @@ -94,7 +111,7 @@ checkLevel(lquery_level * curq, ltree_level * curt) for (i = 0; i < curq->numvar; i++) { - cmpptr = (curvar->flag & LVAR_INCASE) ? pg_strncasecmp : strncmp; + cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp; if (curvar->flag & LVAR_SUBLEXEME) { diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index d1dab73501..9cdfedab04 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -1,17 +1,19 @@ -/* $PostgreSQL: pgsql/contrib/ltree/ltree.h,v 1.20 2008/05/12 00:00:42 alvherre Exp $ */ +/* $PostgreSQL: pgsql/contrib/ltree/ltree.h,v 1.21 2008/06/30 18:30:48 teodor Exp $ */ #ifndef __LTREE_H__ #define __LTREE_H__ +#include "postgres.h" #include "fmgr.h" +#include "tsearch/ts_locale.h" typedef struct { - uint8 len; + uint16 len; char name[1]; } ltree_level; -#define LEVEL_HDRSIZE (sizeof(uint8)) +#define LEVEL_HDRSIZE (offsetof(ltree_level,name)) #define LEVEL_NEXT(x) ( (ltree_level*)( ((char*)(x)) + MAXALIGN(((ltree_level*)(x))->len + LEVEL_HDRSIZE) ) ) typedef struct @@ -21,7 +23,7 @@ typedef struct char data[1]; } ltree; -#define LTREE_HDRSIZE MAXALIGN(VARHDRSZ + sizeof(uint16)) +#define LTREE_HDRSIZE MAXALIGN( offsetof(ltree, data) ) #define LTREE_FIRST(x) ( (ltree_level*)( ((char*)(x))+LTREE_HDRSIZE ) ) @@ -30,12 +32,12 @@ typedef struct typedef struct { int4 val; - uint8 len; + uint16 len; uint8 flag; char name[1]; } lquery_variant; -#define LVAR_HDRSIZE MAXALIGN(sizeof(uint8)*2 + sizeof(int4)) +#define LVAR_HDRSIZE MAXALIGN(offsetof(lquery_variant, name)) #define LVAR_NEXT(x) ( (lquery_variant*)( ((char*)(x)) + MAXALIGN(((lquery_variant*)(x))->len) + LVAR_HDRSIZE ) ) #define LVAR_ANYEND 0x01 @@ -52,7 +54,7 @@ typedef struct char variants[1]; } lquery_level; -#define LQL_HDRSIZE MAXALIGN( sizeof(uint16)*5 ) +#define LQL_HDRSIZE MAXALIGN( offsetof(lquery_level,variants) ) #define LQL_NEXT(x) ( (lquery_level*)( ((char*)(x)) + MAXALIGN(((lquery_level*)(x))->totallen) ) ) #define LQL_FIRST(x) ( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) ) @@ -73,12 +75,12 @@ typedef struct char data[1]; } lquery; -#define LQUERY_HDRSIZE MAXALIGN(VARHDRSZ + 3*sizeof(uint16)) +#define LQUERY_HDRSIZE MAXALIGN( offsetof(lquery, data) ) #define LQUERY_FIRST(x) ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) ) #define LQUERY_HASNOT 0x01 -#define ISALNUM(x) ( isalnum((unsigned char)(x)) || (x) == '_' ) +#define ISALNUM(x) ( t_isalpha(x) || t_isdigit(x) || ( pg_mblen(x) == 1 && t_iseq((x), '_') ) ) /* full text query */ @@ -156,9 +158,10 @@ bool ltree_execute(ITEM * curitem, void *checkval, int ltree_compare(const ltree * a, const ltree * b); bool inner_isparent(const ltree * c, const ltree * p); -bool compare_subnode(ltree_level * t, char *q, int len, - int (*cmpptr) (const char *, const char *, size_t), bool anyend); +bool compare_subnode(ltree_level * t, char *q, int len, + int (*cmpptr) (const char *, const char *, size_t), bool anyend); ltree *lca_inner(ltree ** a, int len); +int ltree_strncasecmp(const char *a, const char *b, size_t s); #define PG_GETARG_LTREE(x) ((ltree*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(x)))) #define PG_GETARG_LTREE_COPY(x) ((ltree*)DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(x)))) diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c index 4bf2b67f10..19645be38f 100644 --- a/contrib/ltree/ltree_io.c +++ b/contrib/ltree/ltree_io.c @@ -1,7 +1,7 @@ /* * in/out function for ltree and lquery * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/ltree_io.c,v 1.16 2008/05/12 00:00:43 alvherre Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltree_io.c,v 1.17 2008/06/30 18:30:48 teodor Exp $ */ #include "postgres.h" @@ -25,15 +25,16 @@ Datum lquery_out(PG_FUNCTION_ARGS); #define UNCHAR ereport(ERROR, \ (errcode(ERRCODE_SYNTAX_ERROR), \ - errmsg("syntax error at position %d near \"%c\"", \ - (int)(ptr-buf), *ptr))); + errmsg("syntax error at position %d", \ + pos))); typedef struct { char *start; - int len; + int len; /* length in bytes */ int flag; + int wlen; /* length in characters */ } nodeitem; #define LTPRS_WAITNAME 0 @@ -51,24 +52,30 @@ ltree_in(PG_FUNCTION_ARGS) int state = LTPRS_WAITNAME; ltree *result; ltree_level *curlevel; + int charlen; + int pos=0; ptr = buf; while (*ptr) { - if (*ptr == '.') + charlen = pg_mblen(ptr); + if ( charlen == 1 && t_iseq(ptr, '.') ) num++; - ptr++; + ptr+=charlen; } list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1)); ptr = buf; while (*ptr) { + charlen = pg_mblen(ptr); + if (state == LTPRS_WAITNAME) { - if (ISALNUM(*ptr)) + if (ISALNUM(ptr)) { lptr->start = ptr; + lptr->wlen = 0; state = LTPRS_WAITDELIM; } else @@ -76,40 +83,43 @@ ltree_in(PG_FUNCTION_ARGS) } else if (state == LTPRS_WAITDELIM) { - if (*ptr == '.') + if ( charlen == 1 && t_iseq(ptr, '.') ) { lptr->len = ptr - lptr->start; - if (lptr->len > 255) + if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", - lptr->len, (int) (lptr->start - buf)))); + lptr->wlen, pos))); totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); lptr++; state = LTPRS_WAITNAME; } - else if (!ISALNUM(*ptr)) + else if (!ISALNUM(ptr)) UNCHAR; } else /* internal error */ elog(ERROR, "internal error in parser"); - ptr++; + + ptr+=charlen; + lptr->wlen++; + pos++; } if (state == LTPRS_WAITDELIM) { lptr->len = ptr - lptr->start; - if (lptr->len > 255) + if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", - lptr->len, (int) (lptr->start - buf)))); + lptr->wlen, pos))); totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE); lptr++; @@ -127,7 +137,7 @@ ltree_in(PG_FUNCTION_ARGS) lptr = list; while (lptr - list < result->numlevel) { - curlevel->len = (uint8) lptr->len; + curlevel->len = (uint16) lptr->len; memcpy(curlevel->name, lptr->start, lptr->len); curlevel = LEVEL_NEXT(curlevel); lptr++; @@ -198,15 +208,23 @@ lquery_in(PG_FUNCTION_ARGS) lquery_variant *lrptr = NULL; bool hasnot = false; bool wasbad = false; + int charlen; + int pos=0; ptr = buf; while (*ptr) { - if (*ptr == '.') - num++; - else if (*ptr == '|') - numOR++; - ptr++; + charlen = pg_mblen(ptr); + + if ( charlen == 1 ) + { + if (t_iseq(ptr, '.')) + num++; + else if (t_iseq(ptr, '|')) + numOR++; + } + + ptr+=charlen; } num++; @@ -214,16 +232,18 @@ lquery_in(PG_FUNCTION_ARGS) ptr = buf; while (*ptr) { + charlen = pg_mblen(ptr); + if (state == LQPRS_WAITLEVEL) { - if (ISALNUM(*ptr)) + if (ISALNUM(ptr)) { GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); lptr->start = ptr; state = LQPRS_WAITDELIM; curqlevel->numvar = 1; } - else if (*ptr == '!') + else if (charlen==1 && t_iseq(ptr, '!')) { GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); lptr->start = ptr + 1; @@ -232,14 +252,14 @@ lquery_in(PG_FUNCTION_ARGS) curqlevel->flag |= LQL_NOT; hasnot = true; } - else if (*ptr == '*') + else if (charlen==1 && t_iseq(ptr, '*')) state = LQPRS_WAITOPEN; else UNCHAR; } else if (state == LQPRS_WAITVAR) { - if (ISALNUM(*ptr)) + if (ISALNUM(ptr)) { lptr++; lptr->start = ptr; @@ -251,61 +271,61 @@ lquery_in(PG_FUNCTION_ARGS) } else if (state == LQPRS_WAITDELIM) { - if (*ptr == '@') + if (charlen==1 && t_iseq(ptr, '@')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_INCASE; curqlevel->flag |= LVAR_INCASE; } - else if (*ptr == '*') + else if (charlen==1 && t_iseq(ptr, '*')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_ANYEND; curqlevel->flag |= LVAR_ANYEND; } - else if (*ptr == '%') + else if (charlen==1 && t_iseq(ptr, '%')) { if (lptr->start == ptr) UNCHAR; lptr->flag |= LVAR_SUBLEXEME; curqlevel->flag |= LVAR_SUBLEXEME; } - else if (*ptr == '|') + else if (charlen==1 && t_iseq(ptr, '|')) { lptr->len = ptr - lptr->start - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); - if (lptr->len > 255) + if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", - lptr->len, (int) (lptr->start - buf)))); + lptr->wlen, pos))); state = LQPRS_WAITVAR; } - else if (*ptr == '.') + else if (charlen==1 && t_iseq(ptr, '.')) { lptr->len = ptr - lptr->start - ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) - ((lptr->flag & LVAR_INCASE) ? 1 : 0) - ((lptr->flag & LVAR_ANYEND) ? 1 : 0); - if (lptr->len > 255) + if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", - lptr->len, (int) (lptr->start - buf)))); + lptr->wlen, pos))); state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); } - else if (ISALNUM(*ptr)) + else if (ISALNUM(ptr)) { if (lptr->flag) UNCHAR; @@ -315,9 +335,9 @@ lquery_in(PG_FUNCTION_ARGS) } else if (state == LQPRS_WAITOPEN) { - if (*ptr == '{') + if (charlen==1 && t_iseq(ptr, '{')) state = LQPRS_WAITFNUM; - else if (*ptr == '.') + else if (charlen==1 && t_iseq(ptr, '.')) { curqlevel->low = 0; curqlevel->high = 0xffff; @@ -329,9 +349,9 @@ lquery_in(PG_FUNCTION_ARGS) } else if (state == LQPRS_WAITFNUM) { - if (*ptr == ',') + if (charlen==1 && t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; - else if (isdigit((unsigned char) *ptr)) + else if (t_isdigit(ptr)) { curqlevel->low = atoi(ptr); state = LQPRS_WAITND; @@ -341,12 +361,12 @@ lquery_in(PG_FUNCTION_ARGS) } else if (state == LQPRS_WAITSNUM) { - if (isdigit((unsigned char) *ptr)) + if (t_isdigit(ptr)) { curqlevel->high = atoi(ptr); state = LQPRS_WAITCLOSE; } - else if (*ptr == '}') + else if (charlen==1 && t_iseq(ptr, '}')) { curqlevel->high = 0xffff; state = LQPRS_WAITEND; @@ -356,26 +376,26 @@ lquery_in(PG_FUNCTION_ARGS) } else if (state == LQPRS_WAITCLOSE) { - if (*ptr == '}') + if (charlen==1 && t_iseq(ptr, '}')) state = LQPRS_WAITEND; - else if (!isdigit((unsigned char) *ptr)) + else if (!t_isdigit(ptr)) UNCHAR; } else if (state == LQPRS_WAITND) { - if (*ptr == '}') + if (charlen==1 && t_iseq(ptr, '}')) { curqlevel->high = curqlevel->low; state = LQPRS_WAITEND; } - else if (*ptr == ',') + else if (charlen==1 && t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; - else if (!isdigit((unsigned char) *ptr)) + else if (!t_isdigit(ptr)) UNCHAR; } else if (state == LQPRS_WAITEND) { - if (*ptr == '.') + if (charlen==1 && t_iseq(ptr, '.')) { state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); @@ -386,7 +406,11 @@ lquery_in(PG_FUNCTION_ARGS) else /* internal error */ elog(ERROR, "internal error in parser"); - ptr++; + + ptr+=charlen; + if ( state == LQPRS_WAITDELIM ) + lptr->wlen++; + pos++; } if (state == LQPRS_WAITDELIM) @@ -407,13 +431,13 @@ lquery_in(PG_FUNCTION_ARGS) errmsg("syntax error"), errdetail("Unexpected end of line."))); - if (lptr->len > 255) + if (lptr->wlen > 255) ereport(ERROR, (errcode(ERRCODE_NAME_TOO_LONG), errmsg("name of level is too long"), errdetail("Name length is %d, must " "be < 256, in position %d.", - lptr->len, (int) (lptr->start - buf)))); + lptr->wlen, pos))); } else if (state == LQPRS_WAITOPEN) curqlevel->high = 0xffff; diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c index bf8c991450..eece52972a 100644 --- a/contrib/ltree/ltxtquery_io.c +++ b/contrib/ltree/ltxtquery_io.c @@ -1,7 +1,7 @@ /* * txtquery io * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/ltxtquery_io.c,v 1.15 2008/05/12 00:00:43 alvherre Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltxtquery_io.c,v 1.16 2008/06/30 18:30:48 teodor Exp $ */ #include "postgres.h" @@ -59,49 +59,53 @@ typedef struct static int4 gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint16 *flag) { - while (1) + int charlen; + + for(;;) { + charlen = pg_mblen(state->buf); + switch (state->state) { case WAITOPERAND: - if (*(state->buf) == '!') + if (charlen==1 && t_iseq(state->buf, '!')) { (state->buf)++; *val = (int4) '!'; return OPR; } - else if (*(state->buf) == '(') + else if (charlen==1 && t_iseq(state->buf, '(')) { state->count++; (state->buf)++; return OPEN; } - else if (ISALNUM(*(state->buf))) + else if (ISALNUM(state->buf)) { state->state = INOPERAND; *strval = state->buf; - *lenval = 1; + *lenval = charlen; *flag = 0; } - else if (!isspace((unsigned char) *(state->buf))) + else if (!t_isspace(state->buf)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); break; case INOPERAND: - if (ISALNUM(*(state->buf))) + if (ISALNUM(state->buf)) { if (*flag) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("modificators syntax error"))); - (*lenval)++; + *lenval += charlen; } - else if (*(state->buf) == '%') + else if (charlen==1 && t_iseq(state->buf, '%')) *flag |= LVAR_SUBLEXEME; - else if (*(state->buf) == '@') + else if (charlen==1 && t_iseq(state->buf, '@')) *flag |= LVAR_INCASE; - else if (*(state->buf) == '*') + else if (charlen==1 && t_iseq(state->buf, '*')) *flag |= LVAR_ANYEND; else { @@ -110,14 +114,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1 } break; case WAITOPERATOR: - if (*(state->buf) == '&' || *(state->buf) == '|') + if (charlen==1 && ( t_iseq(state->buf, '&') || t_iseq(state->buf, '|') )) { state->state = WAITOPERAND; *val = (int4) *(state->buf); (state->buf)++; return OPR; } - else if (*(state->buf) == ')') + else if (charlen==1 && t_iseq(state->buf, ')')) { (state->buf)++; state->count--; @@ -125,14 +129,15 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1 } else if (*(state->buf) == '\0') return (state->count) ? ERR : END; - else if (*(state->buf) != ' ') + else if (charlen==1 && !t_iseq(state->buf, ' ')) return ERR; break; default: return ERR; break; } - (state->buf)++; + + state->buf += charlen; } return END; } diff --git a/contrib/ltree/ltxtquery_op.c b/contrib/ltree/ltxtquery_op.c index 291b7e049b..647d978fec 100644 --- a/contrib/ltree/ltxtquery_op.c +++ b/contrib/ltree/ltxtquery_op.c @@ -1,7 +1,7 @@ /* * txtquery operations with ltree * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/ltxtquery_op.c,v 1.8 2008/05/12 00:00:43 alvherre Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltxtquery_op.c,v 1.9 2008/06/30 18:30:48 teodor Exp $ */ #include "postgres.h" @@ -57,7 +57,7 @@ checkcondition_str(void *checkval, ITEM * val) char *op = ((CHKVAL *) checkval)->operand + val->distance; int (*cmpptr) (const char *, const char *, size_t); - cmpptr = (val->flag & LVAR_INCASE) ? pg_strncasecmp : strncmp; + cmpptr = (val->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp; while (tlen > 0) { if (val->flag & LVAR_SUBLEXEME)