diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml index 65ff6eca29..5ccb3a7a0d 100644 --- a/doc/src/sgml/ref/set.sgml +++ b/doc/src/sgml/ref/set.sgml @@ -1,5 +1,5 @@ @@ -28,8 +28,8 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone Specifies that the command takes effect for the current session. - (This is the default if neither @@ -39,11 +39,11 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone Specifies that the command takes effect for only the current - transaction. After COMMIT or ROLLBACK, - the session-level setting takes effect again. Note that - SET LOCAL will appear to have no effect if it's - executed outside a BEGIN block, since the transaction - will end immediately. + transaction. After COMMIT or ROLLBACK, + the session-level setting takes effect again. Note that + SET LOCAL will appear to have no effect if it's + executed outside a BEGIN block, since the transaction + will end immediately. @@ -112,7 +112,6 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone - DATESTYLE @@ -126,44 +125,44 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone - - ISO - - - Use ISO 8601-style dates and times (YYYY-MM-DD - HH:MM:SS). This is the default. + + ISO + + + Use ISO 8601-style dates and times (YYYY-MM-DD + HH:MM:SS). This is the default. - - + + - SQL - - - Use Oracle/Ingres-style dates and times. Note that this - style has nothing to do with SQL (which mandates ISO 8601 - style), the naming of this option is a historical accident. - - - + SQL + + + Use Oracle/Ingres-style dates and times. Note that this + style has nothing to do with SQL (which mandates ISO 8601 + style), the naming of this option is a historical accident. + + + - PostgreSQL - - - Use traditional PostgreSQL format. - - - + PostgreSQL + + + Use traditional PostgreSQL format. + + + - German - - - Use dd.mm.yyyy for numeric date representations. - - - + German + + + Use dd.mm.yyyy for numeric date representations. + + + @@ -173,24 +172,24 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone - - European - - - Use dd/mm/yyyy for numeric date representations. - - - + + European + + + Use dd/mm/yyyy for numeric date representations. + + + - NonEuropean - US - - - Use mm/dd/yyyy for numeric date representations. - - - + NonEuropean + US + + + Use mm/dd/yyyy for numeric date representations. + + + @@ -214,23 +213,23 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezoneSET or a configuration-file entry: - - Setting the postmaster's PGDATESTYLE environment - variable. (This will be overridden by any of the other methods.) - - - Running postmaster using the option to - select the European conventions. - (This overrides environment variables and configuration-file - entries.) - - - Setting the client's PGDATESTYLE environment variable. - If PGDATESTYLE is set in the frontend environment of a client - based on libpq, libpq will automatically set DATESTYLE to the - value of PGDATESTYLE during connection start-up. This is - equivalent to a manually issued SET. - + + Setting the postmaster's PGDATESTYLE environment + variable. (This will be overridden by any of the other methods.) + + + Running postmaster using the option to + set dates to the European convention. + (This overrides environment variables and configuration-file + entries.) + + + Setting the client's PGDATESTYLE environment variable. + If PGDATESTYLE is set in the frontend environment of a client + based on libpq, libpq will automatically set DATESTYLE to the + value of PGDATESTYLE during connection start-up. This is + equivalent to a manually issued SET DATESTYLE. + @@ -250,50 +249,49 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezoneSEED - Sets the internal seed for the random number generator. + Sets the internal seed for the random number generator. - - - value - - - The value for the seed to be used by the - random function. Allowed - values are floating-point numbers between 0 and 1, which - are then multiplied by 231-1. - - - - - + + + value + + + The value for the seed to be used by the + random function. Allowed + values are floating-point numbers between 0 and 1, which + are then multiplied by 231-1. + + + + + - - The seed can also be set by invoking the - setseed SQL function: + + The seed can also be set by invoking the + setseed SQL function: - + SELECT setseed(value); - - + + + + - - + + SERVER_ENCODING + + + Shows the server-side multibyte encoding. (At present, this + parameter can be shown but not set, because the encoding is + determined at initdb time.) + + + - - SERVER_ENCODING - - - Shows the server-side multibyte encoding. (At present, this - parameter can be shown but not set, because the encoding is - determined at initdb time.) - - - - - - TIME ZONE - TIMEZONE - + + TIME ZONE + TIMEZONE + Sets the default time zone for your session. Arguments can be an SQL time interval constant, an integer or double precision @@ -301,92 +299,88 @@ SELECT setseed(value); by the host operating system. - - The available time zone names depend on your operating - system. For example, on Linux - /usr/share/zoneinfo contains the database - of time zones. - - - Here are some typical values for time zone names: + + Here are some typical values for time zone settings: - - 'PST8PDT' - - - Set the time zone for California. - - - - - 'Portugal' - - - Set the time zone for Portugal. - - - - - 'Europe/Rome' - - - Set the time zone for Italy. - - - - - + + 'PST8PDT' + + + Set the time zone for Berkeley, California. + + + + + 'Portugal' + + + Set the time zone for Portugal. + + + + + 'Europe/Rome' + + + Set the time zone for Italy. + + + + + 7 + + + Set the time zone to 7 hours offset west from GMT (equivalent + to PDT). + + + + + INTERVAL '08:00' HOUR TO MINUTE + + + Set the time zone to 8 hours offset west from GMT (equivalent + to PST). + + + + + LOCAL + DEFAULT + + + Set the time zone to your local time zone (the one that + your operating system defaults to). + + + + + - - In addition to time zone names, PostgreSQL - accepts these other methods of specifying a time zone: + + The available time zone names depend on your operating + system. For example, on Linux + /usr/share/zoneinfo contains the database + of time zones; the names of the files in that directory can be + used as parameters to this command. + - - - 7 - - - Set the time zone to 7 hours offset west from GMT (equivalent - to PDT). - - - - - INTERVAL '08:00' HOUR TO MINUTE - - - Set the time zone to 8 hours offset west from GMT (equivalent - to PST). - - - - - LOCAL - DEFAULT - - - Set the time zone to your local time zone (the one that - your operating system defaults to). - - - - - - - If an invalid time zone name is specified, the time zone - becomes GMT (on most systems anyway). - - - If the PGTZ environment variable is set in the frontend - environment of a client based on libpq, libpq will automatically - set TIMEZONE to the value of PGTZ during connection start-up. - - - + + If an invalid time zone is specified, the time zone + becomes GMT (on most systems anyway). + - - + + If the PGTZ environment variable is set in the frontend + environment of a client based on libpq, libpq will automatically + SET TIMEZONE to the value of + PGTZ during connection start-up. + + + + + Use to show the diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index fbf63db85e..b8fd3d0348 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.320 2002/06/11 13:40:50 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.321 2002/06/11 15:41:37 thomas Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -227,10 +227,10 @@ static void doNegateFloat(Value *v); %type join_outer, join_qual %type join_type -%type extract_list, position_list +%type extract_list, overlay_list, position_list %type substr_list, trim_list %type opt_interval -%type substr_from, substr_for +%type overlay_placing, substr_from, substr_for %type opt_binary, opt_using, opt_instead, opt_cursor %type opt_with_copy, index_opt_unique, opt_verbose, opt_full @@ -336,7 +336,7 @@ static void doNegateFloat(Value *v); FALSE_P, FETCH, FLOAT_P, FOR, FORCE, FOREIGN, FORWARD, FREEZE, FROM, FULL, FUNCTION, - GLOBAL, GRANT, GROUP_P, + GET, GLOBAL, GRANT, GROUP_P, HANDLER, HAVING, HOUR_P, ILIKE, IMMEDIATE, IMMUTABLE, IMPLICIT, IN_P, INCREMENT, INDEX, INHERITS, @@ -356,16 +356,16 @@ static void doNegateFloat(Value *v); NUMERIC, OF, OFF, OFFSET, OIDS, OLD, ON, ONLY, OPERATOR, OPTION, OR, ORDER, - OUT_P, OUTER_P, OVERLAPS, OWNER, + OUT_P, OUTER_P, OVERLAPS, OVERLAY, OWNER, - PARTIAL, PASSWORD, PATH_P, PENDANT, POSITION, PRECISION, PRIMARY, + PARTIAL, PASSWORD, PATH_P, PENDANT, PLACING, POSITION, PRECISION, PRIMARY, PRIOR, PRIVILEGES, PROCEDURE, PROCEDURAL, READ, REAL, REFERENCES, REINDEX, RELATIVE, RENAME, REPLACE, RESET, RESTRICT, RETURNS, REVOKE, RIGHT, ROLLBACK, ROW, RULE, SCHEMA, SCROLL, SECOND_P, SECURITY, SELECT, SEQUENCE, SERIALIZABLE, - SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SMALLINT, SOME, + SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SIMILAR, SMALLINT, SOME, STABLE, START, STATEMENT, STATISTICS, STDIN, STDOUT, STORAGE, STRICT, SUBSTRING, SYSID, @@ -402,7 +402,7 @@ static void doNegateFloat(Value *v); %right NOT %right '=' %nonassoc '<' '>' -%nonassoc LIKE ILIKE +%nonassoc LIKE ILIKE SIMILAR %nonassoc ESCAPE %nonassoc OVERLAPS %nonassoc BETWEEN @@ -420,6 +420,7 @@ static void doNegateFloat(Value *v); %right UMINUS %left '[' ']' %left '(' ')' +%left COLLATE %left TYPECAST %left '.' %% @@ -2139,6 +2140,14 @@ DefineStmt: CREATE AGGREGATE func_name definition n->definition = $4; $$ = (Node *)n; } + | CREATE CHARACTER SET opt_as any_name GET definition opt_collate + { + DefineStmt *n = makeNode(DefineStmt); + n->defType = CHARACTER; + n->defnames = $5; + n->definition = $7; + $$ = (Node *)n; + } ; definition: '(' def_list ')' { $$ = $2; } @@ -4978,9 +4987,18 @@ qual_all_Op: all_Op * it's factored out just to eliminate redundant coding. */ a_expr: c_expr - { $$ = $1; } + { $$ = $1; } | a_expr TYPECAST Typename { $$ = makeTypeCast($1, $3); } + | a_expr COLLATE ColId + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName($3); + n->args = makeList1($1); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) n; + } | a_expr AT TIME ZONE c_expr { FuncCall *n = makeNode(FuncCall); @@ -5088,6 +5106,30 @@ a_expr: c_expr n->agg_distinct = FALSE; $$ = (Node *) makeSimpleA_Expr(OP, "!~~*", $1, (Node *) n); } + + | a_expr SIMILAR TO a_expr %prec SIMILAR + { $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); } + | a_expr SIMILAR TO a_expr ESCAPE a_expr + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("like_escape"); + n->args = makeList2($4, $6); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n); + } + | a_expr NOT SIMILAR TO a_expr %prec SIMILAR + { $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); } + | a_expr NOT SIMILAR TO a_expr ESCAPE a_expr + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("like_escape"); + n->args = makeList2($5, $7); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n); + } + /* NullTest clause * Define SQL92-style Null test clause. * Allow two forms described in the standard: @@ -5568,6 +5610,20 @@ c_expr: columnref n->agg_distinct = FALSE; $$ = (Node *)n; } + | OVERLAY '(' overlay_list ')' + { + /* overlay(A PLACING B FROM C FOR D) is converted to + * substring(A, 1, C-1) || B || substring(A, C+1, C+D) + * overlay(A PLACING B FROM C) is converted to + * substring(A, 1, C-1) || B || substring(A, C+1, C+char_length(B)) + */ + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("overlay"); + n->args = $3; + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *)n; + } | POSITION '(' position_list ')' { /* position(A in B) is converted to position(B, A) */ @@ -5706,6 +5762,25 @@ extract_arg: IDENT { $$ = $1; } | SCONST { $$ = $1; } ; +/* OVERLAY() arguments + * SQL99 defines the OVERLAY() function: + * o overlay(text placing text from int for int) + * o overlay(text placing text from int) + */ +overlay_list: a_expr overlay_placing substr_from substr_for + { + $$ = makeList4($1, $2, $3, $4); + } + | a_expr overlay_placing substr_from + { + $$ = makeList3($1, $2, $3); + } + ; + +overlay_placing: PLACING a_expr + { $$ = $2; } + ; + /* position_list uses b_expr not a_expr to avoid conflict with general IN */ position_list: b_expr IN_P b_expr @@ -6259,6 +6334,7 @@ unreserved_keyword: | FORCE | FORWARD | FUNCTION + | GET | GLOBAL | HANDLER | HOUR_P @@ -6404,6 +6480,7 @@ col_name_keyword: | NONE | NULLIF | NUMERIC + | OVERLAY | POSITION | REAL | SETOF @@ -6423,7 +6500,7 @@ col_name_keyword: * * Do not include POSITION, SUBSTRING, etc here since they have explicit * productions in a_expr to support the goofy SQL9x argument syntax. - * - thomas 2000-11-28 + * - thomas 2000-11-28 */ func_name_keyword: AUTHORIZATION @@ -6445,6 +6522,7 @@ func_name_keyword: | OUTER_P | OVERLAPS | RIGHT + | SIMILAR | VERBOSE ; diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index c60b2df2a6..dca44c448f 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.112 2002/06/11 13:40:51 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.113 2002/06/11 15:41:37 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -129,6 +129,7 @@ static const ScanKeyword ScanKeywords[] = { {"from", FROM}, {"full", FULL}, {"function", FUNCTION}, + {"get", GET}, {"global", GLOBAL}, {"grant", GRANT}, {"group", GROUP_P}, @@ -211,11 +212,13 @@ static const ScanKeyword ScanKeywords[] = { {"out", OUT_P}, {"outer", OUTER_P}, {"overlaps", OVERLAPS}, + {"overlay", OVERLAY}, {"owner", OWNER}, {"partial", PARTIAL}, {"password", PASSWORD}, {"path", PATH_P}, {"pendant", PENDANT}, + {"placing", PLACING}, {"position", POSITION}, {"precision", PRECISION}, {"primary", PRIMARY}, @@ -251,6 +254,7 @@ static const ScanKeyword ScanKeywords[] = { {"setof", SETOF}, {"share", SHARE}, {"show", SHOW}, + {"similar", SIMILAR}, {"smallint", SMALLINT}, {"some", SOME}, {"stable", STABLE}, diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index a6bc3549e1..e3d88b4521 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.94 2002/05/02 18:44:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.95 2002/06/11 15:41:37 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -92,14 +92,14 @@ unsigned char unescape_single_char(unsigned char c); * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: - * bit string literal + * bit string literal * extended C-style comments - thomas 1997-07-12 * delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * hexadecimal numeric string - thomas 1997-11-16 * quoted strings - thomas 1997-07-30 */ -%x xbit +%x xb %x xc %x xd %x xh @@ -107,10 +107,10 @@ unsigned char unescape_single_char(unsigned char c); /* Bit string */ -xbitstart [bB]{quote} -xbitstop {quote} -xbitinside [^']* -xbitcat {quote}{whitespace_with_newline}{quote} +xbstart [bB]{quote} +xbstop {quote} +xbinside [^']* +xbcat {quote}{whitespace_with_newline}{quote} /* Hexadecimal number */ @@ -285,13 +285,13 @@ other . <> { yyerror("unterminated /* comment"); } -{xbitstart} { +{xbstart} { token_start = yytext; - BEGIN(xbit); + BEGIN(xb); startlit(); addlitchar('b'); } -{xbitstop} { +{xbstop} { BEGIN(INITIAL); if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') yyerror("invalid bit string input"); @@ -299,14 +299,14 @@ other . return BITCONST; } {xhinside} | -{xbitinside} { +{xbinside} { addlit(yytext, yyleng); } {xhcat} | -{xbitcat} { +{xbcat} { /* ignore */ } -<> { yyerror("unterminated bit string literal"); } +<> { yyerror("unterminated bit string literal"); } {xhstart} { token_start = yytext; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index bea464bd2a..d6f7b26fa1 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -174,9 +174,10 @@ static int never = 0; /* for use in asserts; shuts lint up */ /* * regcomp - interface for parser and compilation + * returns 0 success, otherwise REG_something */ -int /* 0 success, otherwise REG_something */ -pg95_regcomp(regex_t *preg, const char *pattern, int cflags) +int +pg_regcomp(regex_t *preg, const char *pattern, int cflags) { struct parse pa; struct re_guts *g; @@ -224,7 +225,6 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags) (void) pg_mb2wchar((unsigned char *) pattern, wcp); len = pg_wchar_strlen(wcp); #else - len = strlen((char *) pattern); #endif } @@ -305,7 +305,7 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags) /* win or lose, we're done */ if (p->error != 0) /* lose */ - pg95_regfree(preg); + pg_regfree(preg); return p->error; } diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index 3fa00946aa..fb12cba304 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -117,8 +117,8 @@ static struct rerr */ /* ARGSUSED */ size_t -pg95_regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size) +pg_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size) { struct rerr *r; size_t len; diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 4767730441..06459ef1db 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -149,8 +149,8 @@ do { \ * when choosing which matcher to call. */ int /* 0 success, REG_NOMATCH failure */ -pg95_regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t *pmatch, int eflags) +pg_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t *pmatch, int eflags) { struct re_guts *g = preg->re_g; diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c index 87e0c99ef9..5672fcf240 100644 --- a/src/backend/regex/regfree.c +++ b/src/backend/regex/regfree.c @@ -49,7 +49,7 @@ * regfree - free everything */ void -pg95_regfree(regex_t *preg) +pg_regfree(regex_t *preg) { struct re_guts *g; diff --git a/src/backend/regex/retest.c b/src/backend/regex/retest.c index eee8ef09ae..ca5d6c5394 100644 --- a/src/backend/regex/retest.c +++ b/src/backend/regex/retest.c @@ -1,7 +1,7 @@ /* * a simple regexp debug program * - * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.4 1999/07/17 20:17:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.5 2002/06/11 15:41:37 thomas Exp $ */ #include "postgres.h" @@ -22,7 +22,7 @@ main() if (p) *p = '\0'; - sts = pg95_regcomp(&re, buf, 1); + sts = pg_regcomp(&re, buf, 1); printf("regcomp: parses \"%s\" and returns %d\n", buf, sts); for (;;) { @@ -33,7 +33,7 @@ main() if (p) *p = '\0'; - sts = pg95_regexec(&re, buf, 0, 0, 0); + sts = pg_regexec(&re, buf, 0, 0, 0); printf("regexec: returns %d\n", sts); } } diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index a61717a860..9afc58feee 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.38 2001/11/05 17:46:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.39 2002/06/11 15:41:37 thomas Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -19,7 +19,7 @@ * * agc - incorporated Keith Bostic's Berkeley regex code into * the tree for all ports. To distinguish this regex code from any that - * is existent on a platform, I've prepended the string "pg95_" to + * is existent on a platform, I've prepended the string "pg_" to * the functions regcomp, regerror, regexec and regfree. * Fixed a bug that was originally a typo by me, where `i' was used * instead of `oldest' when compiling regular expressions - benign @@ -53,11 +53,13 @@ struct cached_re_str static int rec = 0; /* # of cached re's */ static struct cached_re_str rev[MAX_CACHED_RES]; /* cached re's */ static unsigned long lru; /* system lru tag */ +static int pg_lastre = 0; /* attempt to compile `re' as an re, then match it against text */ /* cflags - flag to regcomp indicates case sensitivity */ static bool -RE_compile_and_execute(text *text_re, char *text, int cflags) +RE_compile_and_execute(text *text_re, char *text, int cflags, + int nmatch, regmatch_t *pmatch) { char *re; int oldest; @@ -68,8 +70,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) re = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(text_re))); - /* find a previously compiled regular expression */ - for (i = 0; i < rec; i++) + if ((i = pg_lastre) < rec) { if (rev[i].cre_s) { @@ -78,9 +79,28 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) { rev[i].cre_lru = ++lru; pfree(re); - return (pg95_regexec(&rev[i].cre_re, - text, 0, - (regmatch_t *) NULL, 0) == 0); + return (pg_regexec(&rev[i].cre_re, + text, nmatch, + pmatch, 0) == 0); + } + } + } + + /* find a previously compiled regular expression */ + for (i = 0; i < rec; i++) + { + if (i == pg_lastre) continue; + + if (rev[i].cre_s) + { + if (strcmp(rev[i].cre_s, re) == 0 && + rev[i].cre_type == cflags) + { + rev[i].cre_lru = ++lru; + pfree(re); + return (pg_regexec(&rev[i].cre_re, + text, nmatch, + pmatch, 0) == 0); } } } @@ -107,7 +127,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) if (rev[i].cre_lru > lru) lru = rev[i].cre_lru; } - pg95_regfree(&rev[oldest].cre_re); + pg_regfree(&rev[oldest].cre_re); /* * use malloc/free for the cre_s field because the storage has to @@ -118,7 +138,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) } /* compile the re */ - regcomp_result = pg95_regcomp(&rev[oldest].cre_re, re, cflags); + regcomp_result = pg_regcomp(&rev[oldest].cre_re, re, cflags); if (regcomp_result == 0) { /* @@ -130,16 +150,16 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) rev[oldest].cre_type = cflags; pfree(re); /* agc - fixed an old typo here */ - return (pg95_regexec(&rev[oldest].cre_re, text, 0, - (regmatch_t *) NULL, 0) == 0); + return (pg_regexec(&rev[oldest].cre_re, text, + nmatch, pmatch, 0) == 0); } else { char errMsg[1000]; /* re didn't compile */ - pg95_regerror(regcomp_result, &rev[oldest].cre_re, errMsg, - sizeof(errMsg)); + pg_regerror(regcomp_result, &rev[oldest].cre_re, errMsg, + sizeof(errMsg)); elog(ERROR, "Invalid regular expression: %s", errMsg); } @@ -167,7 +187,7 @@ fixedlen_regexeq(char *s, text *p, int charlen, int cflags) memcpy(sterm, s, charlen); sterm[charlen] = '\0'; - result = RE_compile_and_execute(p, sterm, cflags); + result = RE_compile_and_execute(p, sterm, cflags, 0, NULL); pfree(sterm); @@ -230,7 +250,7 @@ textregexne(PG_FUNCTION_ARGS) /* * routines that use the regexp stuff, but ignore the case. - * for this, we use the REG_ICASE flag to pg95_regcomp + * for this, we use the REG_ICASE flag to pg_regcomp */ @@ -281,3 +301,54 @@ nameicregexne(PG_FUNCTION_ARGS) strlen(NameStr(*n)), REG_ICASE | REG_EXTENDED)); } + + +/* textregexsubstr() + * Return a substring matched by a regular expression. + */ +Datum +textregexsubstr(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *result; + char *sterm; + int len; + bool match; + int nmatch = 1; + regmatch_t pmatch; + + /* be sure sterm is null-terminated */ + len = VARSIZE(s) - VARHDRSZ; + sterm = (char *) palloc(len + 1); + memcpy(sterm, VARDATA(s), len); + sterm[len] = '\0'; + /* We need the match info back from the pattern match + * to be able to actually extract the substring. + * It seems to be adequate to pass in a structure to return + * only one result. + */ + match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch); + pfree(sterm); + + /* match? then return the substring matching the pattern */ + if (match) + { + return (DirectFunctionCall3(text_substr, + PointerGetDatum(s), + Int32GetDatum(pmatch.rm_so+1), + Int32GetDatum(pmatch.rm_eo-pmatch.rm_so))); + } +#if 0 + /* otherwise, return a zero-length string */ + else + { + result = palloc(VARHDRSZ); + VARATT_SIZEP(result) = VARHDRSZ; + PG_RETURN_TEXT_P(result); + } +#endif + + /* not reached */ + PG_RETURN_NULL(); +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index bd86139621..ae10bfb687 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.133 2002/05/22 17:21:01 petere Exp $ + * $Id: catversion.h,v 1.134 2002/06/11 15:44:38 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200205221 +#define CATALOG_VERSION_NO 200206111 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index a7d56dc90d..56831dcfca 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.240 2002/05/24 18:57:56 tgl Exp $ + * $Id: pg_proc.h,v 1.241 2002/06/11 15:41:37 thomas Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -1679,7 +1679,7 @@ DESCR("less-equal-greater"); DATA(insert OID = 1359 ( timestamptz PGNSP PGUID 12 f f f t f i 2 1184 "1082 1266" 100 0 0 100 datetimetz_timestamptz - _null_ )); DESCR("convert date and time with time zone to timestamp with time zone"); -DATA(insert OID = 1364 ( time PGNSP PGUID 14 f f f t f i 1 1083 "702" 100 0 0 100 "select time(cast($1 as timestamp without time zone))" - _null_ )); +DATA(insert OID = 1364 ( time PGNSP PGUID 14 f f f t f i 1 1083 "702" 100 0 0 100 "select time(cast($1 as timestamp without time zone))" - _null_ )); DESCR("convert abstime to time"); DATA(insert OID = 1367 ( character_length PGNSP PGUID 12 f f f t f i 1 23 "1042" 100 0 0 100 bpcharlen - _null_ )); @@ -1764,6 +1764,11 @@ DESCR("current schema name"); DATA(insert OID = 1403 ( current_schemas PGNSP PGUID 12 f f f t f s 0 1003 "0" 100 0 0 100 current_schemas - _null_ )); DESCR("current schema search list"); +DATA(insert OID = 1404 ( overlay PGNSP PGUID 14 f f f t f i 4 25 "25 25 23 23" 100 0 0 100 "select substring($1, 1, ($3 - 1)) || $2 || substring($1, ($3 + $4))" - _null_ )); +DESCR("substitute portion of string"); +DATA(insert OID = 1405 ( overlay PGNSP PGUID 14 f f f t f i 3 25 "25 25 23" 100 0 0 100 "select substring($1, 1, ($3 - 1)) || $2 || substring($1, ($3 + char_length($2)))" - _null_ )); +DESCR("substitute portion of string"); + DATA(insert OID = 1406 ( isvertical PGNSP PGUID 12 f f f t f i 2 16 "600 600" 100 0 0 100 point_vert - _null_ )); DESCR("vertically aligned?"); DATA(insert OID = 1407 ( ishorizontal PGNSP PGUID 12 f f f t f i 2 16 "600 600" 100 0 0 100 point_horiz - _null_ )); @@ -2871,6 +2876,11 @@ DESCR("add"); DATA(insert OID = 2072 ( date_mi_interval PGNSP PGUID 14 f f f t f i 2 1114 "1082 1186" 100 0 0 100 "select cast($1 as timestamp without time zone) - $2;" - _null_ )); DESCR("subtract"); +DATA(insert OID = 2073 ( substring PGNSP PGUID 12 f f f t f i 2 25 "25 25" 100 0 0 100 textregexsubstr - _null_ )); +DESCR("substitutes regular expression"); +DATA(insert OID = 2074 ( substring PGNSP PGUID 14 f f f t f i 3 25 "25 25 25" 100 0 0 100 "select substring($1, like_escape($2, $3))" - _null_ )); +DESCR("substitutes regular expression with escape argument"); + /* Aggregates (moved here from pg_aggregate for 7.3) */ DATA(insert OID = 2100 ( avg PGNSP PGUID 12 t f f f f i 1 1700 "20" 100 0 0 100 aggregate_dummy - _null_ )); diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index 09f966e1bf..dd8e0da171 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -53,7 +53,7 @@ typedef struct const pg_wchar *re_endp; /* end pointer for REG_PEND */ struct re_guts *re_g; /* none of your business :-) */ #ifdef MULTIBYTE - pg_wchar *patsave; /* mee too :-) */ + pg_wchar *patsave; /* me too :-) */ #endif } regex_t; @@ -102,12 +102,12 @@ typedef struct #define REG_LARGE 01000 /* force large representation */ #define REG_BACKR 02000 /* force use of backref code */ -extern int pg95_regcomp(regex_t *preg, const char *pattern, int cflags); -extern size_t pg95_regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size); -extern int pg95_regexec(const regex_t *preg, const char *string, - size_t nmatch, - regmatch_t *pmatch, int eflags); -extern void pg95_regfree(regex_t *preg); +extern int pg_regcomp(regex_t *preg, const char *pattern, int cflags); +extern size_t pg_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); +extern int pg_regexec(const regex_t *preg, const char *string, + size_t nmatch, + regmatch_t *pmatch, int eflags); +extern void pg_regfree(regex_t *preg); #endif /* !_REGEX_H_ */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 6340aa2254..7676ce5663 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.182 2002/05/18 21:38:41 tgl Exp $ + * $Id: builtins.h,v 1.183 2002/06/11 15:41:38 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -327,6 +327,7 @@ extern Datum nameicregexeq(PG_FUNCTION_ARGS); extern Datum nameicregexne(PG_FUNCTION_ARGS); extern Datum texticregexeq(PG_FUNCTION_ARGS); extern Datum texticregexne(PG_FUNCTION_ARGS); +extern Datum textregexsubstr(PG_FUNCTION_ARGS); /* regproc.c */ extern Datum regprocin(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index ebfe8eeb66..680a76c0bd 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -3,6 +3,7 @@ -- Test various data entry syntaxes. -- -- SQL92 string continuation syntax +-- E021-03 character string literals SELECT 'first line' ' - next line' ' - third line' @@ -20,6 +21,7 @@ SELECT 'first line' ERROR: parser: parse error at or near "' - third line'" -- -- test conversions between various string types +-- E021-10 implicit casting among the character data types -- SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL; text(char) @@ -93,7 +95,9 @@ SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)"; -- -- test SQL92 string functions +-- E### and T### are feature reference numbers from SQL99 -- +-- E021-09 trim function SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; bunch o blanks ---------------- @@ -118,6 +122,7 @@ SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS "some Xs"; t (1 row) +-- E021-06 substring expression SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; 34567890 ---------- @@ -130,6 +135,47 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; t (1 row) +-- T581 regular expression substring +SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd"; + bcd +----- + bcd +(1 row) + +-- No match should return NULL +SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True"; + True +------ + t +(1 row) + +-- Null inputs should return NULL +SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True"; + True +------ + t +(1 row) + +SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True"; + True +------ + t +(1 row) + +SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; + True +------ + t +(1 row) + +-- PostgreSQL extention to allow omitting the escape character +SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde"; + cde +----- + cde +(1 row) + +-- E021-11 position expression SELECT POSITION('4' IN '1234567890') = '4' AS "4"; 4 --- @@ -142,11 +188,37 @@ SELECT POSITION(5 IN '1234567890') = '5' AS "5"; t (1 row) +-- T312 character overlay function +SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f"; + abc45f +-------- + abc45f +(1 row) + +SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS "yabadaba"; + yabadaba +---------- + yabadaba +(1 row) + +SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS "yabadabadoo"; + yabadabadoo +------------- + yabadabadoo +(1 row) + +SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS "bubba"; + bubba +------- + bubba +(1 row) + -- -- test LIKE -- Be sure to form every test as a LIKE/NOT LIKE pair. -- -- simplest examples +-- E061-04 like predicate SELECT 'hawkeye' LIKE 'h%' AS "true"; true ------ @@ -257,6 +329,7 @@ SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false"; (1 row) -- escape character +-- E061-05 like predicate with escape clause SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true"; true ------ @@ -469,6 +542,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false"; -- -- test implicit type conversion -- +-- E021-07 character concatenation SELECT 'unknown' || ' and unknown' AS "Concat unknown types"; Concat unknown types ---------------------- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index b7f214f4d8..3127d0ebff 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -4,6 +4,7 @@ -- -- SQL92 string continuation syntax +-- E021-03 character string literals SELECT 'first line' ' - next line' ' - third line' @@ -17,6 +18,7 @@ SELECT 'first line' -- -- test conversions between various string types +-- E021-10 implicit casting among the character data types -- SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL; @@ -41,8 +43,10 @@ SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)"; -- -- test SQL92 string functions +-- E### and T### are feature reference numbers from SQL99 -- +-- E021-09 trim function SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; SELECT TRIM(LEADING FROM ' bunch o blanks ') = 'bunch o blanks ' AS "bunch o blanks "; @@ -51,20 +55,46 @@ SELECT TRIM(TRAILING FROM ' bunch o blanks ') = ' bunch o blanks' AS " bunch SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS "some Xs"; +-- E021-06 substring expression SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; +-- T581 regular expression substring +SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd"; + +-- No match should return NULL +SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True"; + +-- Null inputs should return NULL +SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True"; +SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; + +-- PostgreSQL extention to allow omitting the escape character +SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde"; + +-- E021-11 position expression SELECT POSITION('4' IN '1234567890') = '4' AS "4"; SELECT POSITION(5 IN '1234567890') = '5' AS "5"; +-- T312 character overlay function +SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f"; + +SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS "yabadaba"; + +SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS "yabadabadoo"; + +SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS "bubba"; + -- -- test LIKE -- Be sure to form every test as a LIKE/NOT LIKE pair. -- -- simplest examples +-- E061-04 like predicate SELECT 'hawkeye' LIKE 'h%' AS "true"; SELECT 'hawkeye' NOT LIKE 'h%' AS "false"; @@ -94,6 +124,7 @@ SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS "true"; SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false"; -- escape character +-- E061-05 like predicate with escape clause SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true"; SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS "false"; @@ -156,6 +187,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false"; -- test implicit type conversion -- +-- E021-07 character concatenation SELECT 'unknown' || ' and unknown' AS "Concat unknown types"; SELECT text 'text' || ' and unknown' AS "Concat text to unknown type"; @@ -165,4 +197,3 @@ SELECT char(20) 'characters' || 'and text' AS "Concat char to unknown type"; SELECT text 'text' || char(20) ' and characters' AS "Concat text to char"; SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar"; -