Add a new, improved version of citext as a contrib module.

David E. Wheeler
2008-07-29 18:31:20 +00:00 · 2008-07-29 18:31:20 +00:00 · ab9907f5e5
commit ab9907f5e5
parent 6fe8796341
11 changed files with 2517 additions and 3 deletions
--- a/contrib/Makefile
+++ b/contrib/Makefile
@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.83 2008/05/08 16:49:36 tgl Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.84 2008/07/29 18:31:20 tgl Exp $

 subdir = contrib
 top_builddir = ..
@ -8,6 +8,7 @@ WANTED_DIRS = \
 		adminpack	\
 		btree_gist	\
 		chkpass		\
+		citext		\
 		cube		\
 		dblink		\
 		dict_int	\
--- a/contrib/README
+++ b/contrib/README
@ -36,6 +36,10 @@ chkpass -
 	An auto-encrypted password datatype
 	by D'Arcy J.M. Cain <darcy@druid.net>

+citext -
+	A case-insensitive character string datatype
+	by David E. Wheeler <david@kineticode.com>
+
 cube -
 	Multidimensional-cube datatype (GiST indexing example)
 	by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
--- a/contrib/citext/Makefile
+++ b/contrib/citext/Makefile
@ -0,0 +1,17 @@
+# $PostgreSQL: pgsql/contrib/citext/Makefile,v 1.1 2008/07/29 18:31:20 tgl Exp $
+
+MODULES = citext
+DATA_built = citext.sql
+DATA = uninstall_citext.sql
+REGRESS = citext
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/citext
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
--- a/contrib/citext/citext.c
+++ b/contrib/citext/citext.c
@ -0,0 +1,268 @@
+/*
+ * $PostgreSQL: pgsql/contrib/citext/citext.c,v 1.1 2008/07/29 18:31:20 tgl Exp $
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "fmgr.h"
+#include "utils/builtins.h"
+#include "utils/formatting.h"
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+/*
+ *      ====================
+ *      FORWARD DECLARATIONS
+ *      ====================
+ */
+
+static int32  citextcmp      (text *left, text *right);
+extern Datum  citext_cmp     (PG_FUNCTION_ARGS);
+extern Datum  citext_hash    (PG_FUNCTION_ARGS);
+extern Datum  citext_eq      (PG_FUNCTION_ARGS);
+extern Datum  citext_ne      (PG_FUNCTION_ARGS);
+extern Datum  citext_gt      (PG_FUNCTION_ARGS);
+extern Datum  citext_ge      (PG_FUNCTION_ARGS);
+extern Datum  citext_lt      (PG_FUNCTION_ARGS);
+extern Datum  citext_le      (PG_FUNCTION_ARGS);
+extern Datum  citext_smaller (PG_FUNCTION_ARGS);
+extern Datum  citext_larger  (PG_FUNCTION_ARGS);
+
+/*
+ *      =================
+ *      UTILITY FUNCTIONS
+ *      =================
+ */
+
+/*
+ * citextcmp()
+ * Internal comparison function for citext strings.
+ * Returns int32 negative, zero, or positive.
+ */
+static int32
+citextcmp (text *left, text *right)
+{
+   char   *lcstr, *rcstr;
+   int32	result;
+
+   lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
+   rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
+
+   result = varstr_cmp(lcstr, strlen(lcstr),
+					   rcstr, strlen(rcstr));
+
+   pfree(lcstr);
+   pfree(rcstr);
+
+   return result;
+}
+
+/*
+ *      ==================
+ *      INDEXING FUNCTIONS
+ *      ==================
+ */
+
+PG_FUNCTION_INFO_V1(citext_cmp);
+
+Datum
+citext_cmp(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   int32 result;
+
+   result = citextcmp(left, right);
+
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_INT32(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_hash);
+
+Datum
+citext_hash(PG_FUNCTION_ARGS)
+{
+   text       *txt = PG_GETARG_TEXT_PP(0);
+   char       *str;
+   Datum       result;
+
+   str    = str_tolower(VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
+   result = hash_any((unsigned char *) str, strlen(str));
+   pfree(str);
+
+   /* Avoid leaking memory for toasted inputs */
+   PG_FREE_IF_COPY(txt, 0);
+
+   PG_RETURN_DATUM(result);
+}
+
+/*
+ *      ==================
+ *      OPERATOR FUNCTIONS
+ *      ==================
+ */
+
+PG_FUNCTION_INFO_V1(citext_eq);
+
+Datum
+citext_eq(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   char *lcstr, *rcstr;
+   bool  result;
+
+   /* We can't compare lengths in advance of downcasing ... */
+
+   lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
+   rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
+
+   /*
+    * Since we only care about equality or not-equality, we can
+    * avoid all the expense of strcoll() here, and just do bitwise
+    * comparison.
+    */
+   result = (strcmp(lcstr, rcstr) == 0);
+
+   pfree(lcstr);
+   pfree(rcstr);
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_ne);
+
+Datum
+citext_ne(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   char *lcstr, *rcstr;
+   bool  result;
+
+   /* We can't compare lengths in advance of downcasing ... */
+
+   lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
+   rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
+
+   /*
+    * Since we only care about equality or not-equality, we can
+    * avoid all the expense of strcoll() here, and just do bitwise
+    * comparison.
+    */
+   result = (strcmp(lcstr, rcstr) != 0);
+
+   pfree(lcstr);
+   pfree(rcstr);
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_lt);
+
+Datum
+citext_lt(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   bool  result;
+
+   result = citextcmp(left, right) < 0;
+
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_le);
+
+Datum
+citext_le(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   bool  result;
+
+   result = citextcmp(left, right) <= 0;
+
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_gt);
+
+Datum
+citext_gt(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   bool  result;
+
+   result = citextcmp(left, right) > 0;
+
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_ge);
+
+Datum
+citext_ge(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   bool  result;
+
+   result = citextcmp(left, right) >= 0;
+
+   PG_FREE_IF_COPY(left, 0);
+   PG_FREE_IF_COPY(right, 1);
+
+   PG_RETURN_BOOL(result);
+}
+
+/*
+ *      ===================
+ *      AGGREGATE FUNCTIONS
+ *      ===================
+ */
+
+PG_FUNCTION_INFO_V1(citext_smaller);
+
+Datum
+citext_smaller(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   text *result;
+
+   result = citextcmp(left, right) < 0 ? left : right;
+   PG_RETURN_TEXT_P(result);
+}
+
+PG_FUNCTION_INFO_V1(citext_larger);
+
+Datum
+citext_larger(PG_FUNCTION_ARGS)
+{
+   text *left  = PG_GETARG_TEXT_PP(0);
+   text *right = PG_GETARG_TEXT_PP(1);
+   text *result;
+
+   result = citextcmp(left, right) > 0 ? left : right;
+   PG_RETURN_TEXT_P(result);
+}
--- a/contrib/citext/citext.sql.in
+++ b/contrib/citext/citext.sql.in
@ -0,0 +1,450 @@
+/* $PostgreSQL: pgsql/contrib/citext/citext.sql.in,v 1.1 2008/07/29 18:31:20 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+--
+--  PostgreSQL code for CITEXT.
+--
+-- Most I/O functions, and a few others, piggyback on the "text" type
+-- functions via the implicit cast to text.
+--
+
+--
+-- Shell type to keep things a bit quieter.
+--
+
+CREATE TYPE citext;
+
+--
+--  Input and output functions.
+--
+CREATE OR REPLACE FUNCTION citextin(cstring)
+RETURNS citext
+AS 'textin'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citextout(citext)
+RETURNS cstring
+AS 'textout'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citextrecv(internal)
+RETURNS citext
+AS 'textrecv'
+LANGUAGE 'internal' STABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citextsend(citext)
+RETURNS bytea
+AS 'textsend'
+LANGUAGE 'internal' STABLE STRICT;
+
+--
+--  The type itself.
+--
+
+CREATE TYPE citext (
+    INPUT          = citextin,
+    OUTPUT         = citextout,
+    RECEIVE        = citextrecv,
+    SEND           = citextsend,
+    INTERNALLENGTH = VARIABLE,
+    STORAGE        = extended
+);
+
+--
+-- A single cast function, since bpchar needs to have its whitespace trimmed
+-- before it's cast to citext.
+--
+CREATE OR REPLACE FUNCTION citext(bpchar)
+RETURNS citext
+AS 'rtrim1'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+--
+--  Implicit and assignment type casts.
+--
+
+CREATE CAST (citext AS text)    WITHOUT FUNCTION AS IMPLICIT;
+CREATE CAST (citext AS varchar) WITHOUT FUNCTION AS IMPLICIT;
+CREATE CAST (citext AS bpchar)  WITHOUT FUNCTION AS ASSIGNMENT;
+CREATE CAST (text AS citext)    WITHOUT FUNCTION AS ASSIGNMENT;
+CREATE CAST (varchar AS citext) WITHOUT FUNCTION AS ASSIGNMENT;
+CREATE CAST (bpchar AS citext)  WITH FUNCTION citext(bpchar) AS ASSIGNMENT;
+
+--
+-- Operator Functions.
+--
+
+CREATE OR REPLACE FUNCTION citext_eq( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_ne( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_lt( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_le( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_gt( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_ge( citext, citext )
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+-- We overload || just to preserve "citext-ness" of the result.
+CREATE OR REPLACE FUNCTION textcat(citext, citext)
+RETURNS citext
+AS 'textcat'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+--
+-- Operators.
+--
+
+CREATE OPERATOR = (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    COMMUTATOR = =,
+    NEGATOR    = <>,
+    PROCEDURE  = citext_eq,
+    RESTRICT   = eqsel,
+    JOIN       = eqjoinsel,
+    HASHES,
+    MERGES
+);
+
+CREATE OPERATOR <> (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    NEGATOR    = =,
+    COMMUTATOR = <>,
+    PROCEDURE  = citext_ne,
+    RESTRICT   = neqsel,
+    JOIN       = neqjoinsel
+);
+
+CREATE OPERATOR < (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    NEGATOR    = >=,
+    COMMUTATOR = >,
+    PROCEDURE  = citext_lt,
+    RESTRICT   = scalarltsel,
+    JOIN       = scalarltjoinsel
+);
+
+CREATE OPERATOR <= (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    NEGATOR    = >,
+    COMMUTATOR = >=,
+    PROCEDURE  = citext_le,
+    RESTRICT   = scalarltsel,
+    JOIN       = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    NEGATOR    = <,
+    COMMUTATOR = <=,
+    PROCEDURE  = citext_ge,
+    RESTRICT   = scalargtsel,
+    JOIN       = scalargtjoinsel
+);
+
+CREATE OPERATOR > (
+    LEFTARG    = CITEXT,
+    RIGHTARG   = CITEXT,
+    NEGATOR    = <=,
+    COMMUTATOR = <,
+    PROCEDURE  = citext_gt,
+    RESTRICT   = scalargtsel,
+    JOIN       = scalargtjoinsel
+);
+
+CREATE OPERATOR || (
+    LEFTARG   = CITEXT,
+    RIGHTARG  = CITEXT,
+    PROCEDURE = textcat
+);
+
+--
+-- Support functions for indexing.
+--
+
+CREATE OR REPLACE FUNCTION citext_cmp(citext, citext)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION citext_hash(citext)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+--
+-- The btree indexing operator class.
+--
+
+CREATE OPERATOR CLASS citext_ops
+DEFAULT FOR TYPE CITEXT USING btree AS
+    OPERATOR    1   <  (citext, citext),
+    OPERATOR    2   <= (citext, citext),
+    OPERATOR    3   =  (citext, citext),
+    OPERATOR    4   >= (citext, citext),
+    OPERATOR    5   >  (citext, citext),
+    FUNCTION    1   citext_cmp(citext, citext);
+
+--
+-- The hash indexing operator class.
+--
+
+CREATE OPERATOR CLASS citext_ops
+DEFAULT FOR TYPE citext USING hash AS
+    OPERATOR    1   =  (citext, citext),
+    FUNCTION    1   citext_hash(citext);
+
+--
+-- Aggregates.
+--
+
+CREATE OR REPLACE FUNCTION citext_smaller(citext, citext)
+RETURNS citext
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION citext_larger(citext, citext)
+RETURNS citext
+AS 'MODULE_PATHNAME'
+LANGUAGE 'C' IMMUTABLE STRICT;
+
+CREATE AGGREGATE min(citext)  (
+    SFUNC = citext_smaller,
+    STYPE = citext,
+    SORTOP = <
+);
+
+CREATE AGGREGATE max(citext)  (
+    SFUNC = citext_larger,
+    STYPE = citext,
+    SORTOP = >
+);
+
+--
+-- Miscellaneous functions
+-- These exist to preserve the "citext-ness" of the input.
+--
+
+CREATE OR REPLACE FUNCTION lower(citext)
+RETURNS citext AS 'lower'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION upper(citext)
+RETURNS citext AS 'upper'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+-- needed to avoid "function is not unique" errors
+-- XXX find a better way to deal with this...
+CREATE FUNCTION quote_literal(citext)
+RETURNS text AS 'quote_literal'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+--
+-- CITEXT pattern matching.
+--
+
+CREATE OR REPLACE FUNCTION texticlike(citext, citext)
+RETURNS bool AS 'texticlike'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticnlike(citext, citext)
+RETURNS bool AS 'texticnlike'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticregexeq(citext, citext)
+RETURNS bool AS 'texticregexeq'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticregexne(citext, citext)
+RETURNS bool AS 'texticregexne'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OPERATOR ~ (
+    PROCEDURE = texticregexeq,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = !~,
+    RESTRICT  = icregexeqsel,
+    JOIN      = icregexeqjoinsel
+);
+
+CREATE OPERATOR ~* (
+    PROCEDURE = texticregexeq,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = !~*,
+    RESTRICT  = icregexeqsel,
+    JOIN      = icregexeqjoinsel
+);
+
+CREATE OPERATOR !~ (
+    PROCEDURE = texticregexne,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = ~,
+    RESTRICT  = icregexnesel,
+    JOIN      = icregexnejoinsel
+);
+
+CREATE OPERATOR !~* (
+    PROCEDURE = texticregexne,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = ~*,
+    RESTRICT  = icregexnesel,
+    JOIN      = icregexnejoinsel
+);
+
+CREATE OPERATOR ~~ (
+    PROCEDURE = texticlike,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = !~~,
+    RESTRICT  = iclikesel,
+    JOIN      = iclikejoinsel
+);
+
+CREATE OPERATOR ~~* (
+    PROCEDURE = texticlike,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = !~~*,
+    RESTRICT  = iclikesel,
+    JOIN      = iclikejoinsel
+);
+
+CREATE OPERATOR !~~ (
+    PROCEDURE = texticnlike,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = ~~,
+    RESTRICT  = icnlikesel,
+    JOIN      = icnlikejoinsel
+);
+
+CREATE OPERATOR !~~* (
+    PROCEDURE = texticnlike,
+    LEFTARG   = citext,
+    RIGHTARG  = citext,
+    NEGATOR   = ~~*,
+    RESTRICT  = icnlikesel,
+    JOIN      = icnlikejoinsel
+);
+
+--
+-- Matching citext to text. 
+--
+
+CREATE OR REPLACE FUNCTION texticlike(citext, text)
+RETURNS bool AS 'texticlike'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticnlike(citext, text)
+RETURNS bool AS 'texticnlike'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticregexeq(citext, text)
+RETURNS bool AS 'texticregexeq'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OR REPLACE FUNCTION texticregexne(citext, text)
+RETURNS bool AS 'texticregexne'
+LANGUAGE 'internal' IMMUTABLE STRICT;
+
+CREATE OPERATOR ~ (
+    PROCEDURE = texticregexeq,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = !~,
+    RESTRICT  = icregexeqsel,
+    JOIN      = icregexeqjoinsel
+);
+
+CREATE OPERATOR ~* (
+    PROCEDURE = texticregexeq,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = !~*,
+    RESTRICT  = icregexeqsel,
+    JOIN      = icregexeqjoinsel
+);
+
+CREATE OPERATOR !~ (
+    PROCEDURE = texticregexne,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = ~,
+    RESTRICT  = icregexnesel,
+    JOIN      = icregexnejoinsel
+);
+
+CREATE OPERATOR !~* (
+    PROCEDURE = texticregexne,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = ~*,
+    RESTRICT  = icregexnesel,
+    JOIN      = icregexnejoinsel
+);
+
+CREATE OPERATOR ~~ (
+    PROCEDURE = texticlike,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = !~~,
+    RESTRICT  = iclikesel,
+    JOIN      = iclikejoinsel
+);
+
+CREATE OPERATOR ~~* (
+    PROCEDURE = texticlike,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = !~~*,
+    RESTRICT  = iclikesel,
+    JOIN      = iclikejoinsel
+);
+
+CREATE OPERATOR !~~ (
+    PROCEDURE = texticnlike,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = ~~,
+    RESTRICT  = icnlikesel,
+    JOIN      = icnlikejoinsel
+);
+
+CREATE OPERATOR !~~* (
+    PROCEDURE = texticnlike,
+    LEFTARG   = citext,
+    RIGHTARG  = text,
+    NEGATOR   = ~~*,
+    RESTRICT  = icnlikesel,
+    JOIN      = icnlikejoinsel
+);
--- a/contrib/citext/expected/citext.out
+++ b/contrib/citext/expected/citext.out
--- a/contrib/citext/sql/citext.sql
+++ b/contrib/citext/sql/citext.sql
@ -0,0 +1,328 @@
+--
+--  Test citext datatype
+--
+
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of citext.sql.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i citext.sql
+RESET client_min_messages;
+\set ECHO all
+
+-- Test the operators and indexing functions
+
+-- Test = and <>.
+SELECT 'a'::citext = 'a'::citext AS t;
+SELECT 'a'::citext = 'A'::citext AS t;
+SELECT 'a'::citext = 'A'::text AS f;        -- text wins the discussion
+SELECT 'a'::citext = 'b'::citext AS f;
+SELECT 'a'::citext = 'ab'::citext AS f;
+SELECT 'a'::citext <> 'ab'::citext AS t;
+
+-- Multibyte sanity tests. Uncomment to run.
+-- SELECT 'À'::citext =  'À'::citext AS t;
+-- SELECT 'À'::citext =  'à'::citext AS t;
+-- SELECT 'À'::text   =  'à'::text   AS f; -- text wins.
+-- SELECT 'À'::citext <> 'B'::citext AS t;
+
+-- Test combining characters making up canonically equivalent strings.
+-- SELECT 'Ä'::text   <> 'Ä'::text   AS t;
+-- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
+
+-- Test the Turkish dotted I. The lowercase is a single byte while the
+-- uppercase is multibyte. This is why the comparison code can't be optimized
+-- to compare string lengths.
+-- SELECT 'i'::citext = 'İ'::citext AS t;
+
+-- Regression.
+-- SELECT 'láska'::citext <> 'laská'::citext AS t;
+
+-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
+-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
+-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
+-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
+-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
+-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
+-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
+-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
+-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
+
+-- Test > and >=
+SELECT 'B'::citext > 'a'::citext AS t;
+SELECT 'b'::citext >  'A'::citext AS t;
+SELECT 'B'::citext >  'b'::citext AS f;
+SELECT 'B'::citext >= 'b'::citext AS t;
+
+-- Test < and <=
+SELECT 'a'::citext <  'B'::citext AS t;
+SELECT 'a'::citext <= 'B'::citext AS t;
+
+-- Test implicit casting. citext casts to text, but not vice-versa.
+SELECT 'a'::citext = 'a'::text   AS t;
+SELECT 'A'::text  <> 'a'::citext AS t;
+
+SELECT 'B'::citext <  'a'::text AS t;  -- text wins.
+SELECT 'B'::citext <= 'a'::text AS t;  -- text wins.
+
+SELECT 'a'::citext >  'B'::text AS t;  -- text wins.
+SELECT 'a'::citext >= 'B'::text AS t;  -- text wins.
+
+-- Test implicit casting. citext casts to varchar, but not vice-versa.
+SELECT 'a'::citext = 'a'::varchar   AS t;
+SELECT 'A'::varchar  <> 'a'::citext AS t;
+
+SELECT 'B'::citext <  'a'::varchar AS t;  -- varchar wins.
+SELECT 'B'::citext <= 'a'::varchar AS t;  -- varchar wins.
+
+SELECT 'a'::citext >  'B'::varchar AS t;  -- varchar wins.
+SELECT 'a'::citext >= 'B'::varchar AS t;  -- varchar wins.
+
+-- A couple of longer examlpes to ensure that we don't get any issues with bad
+-- conversions to char[] in the c code. Yes, I did do this.
+
+SELECT 'aardvark'::citext = 'aardvark'::citext AS t;
+SELECT 'aardvark'::citext = 'aardVark'::citext AS t;
+
+-- Check the citext_cmp() function explicitly.
+SELECT citext_cmp('aardvark'::citext, 'aardvark'::citext) AS zero;
+SELECT citext_cmp('aardvark'::citext, 'aardVark'::citext) AS zero;
+SELECT citext_cmp('AARDVARK'::citext, 'AARDVARK'::citext) AS zero;
+SELECT citext_cmp('B'::citext, 'a'::citext) AS one;
+
+-- Do some tests using a table and index.
+
+CREATE TEMP TABLE try (
+   name citext PRIMARY KEY
+);
+
+INSERT INTO try (name)
+VALUES ('a'), ('ab'), ('â'), ('aba'), ('b'), ('ba'), ('bab'), ('AZ');
+
+SELECT name, 'a' = name AS eq_a   FROM try WHERE name <> 'â';
+SELECT name, 'a' = name AS t      FROM try where name = 'a';
+SELECT name, 'A' = name AS "eq_A" FROM try WHERE name <> 'â';
+SELECT name, 'A' = name AS t      FROM try where name = 'A';
+SELECT name, 'A' = name AS t      FROM try where name = 'A';
+
+-- expected failures on duplicate key
+INSERT INTO try (name) VALUES ('a');
+INSERT INTO try (name) VALUES ('A');
+INSERT INTO try (name) VALUES ('aB');
+
+-- Make sure that citext_smaller() and citext_lager() work properly.
+SELECT citext_smaller( 'aa'::citext, 'ab'::citext ) = 'aa' AS t;
+SELECT citext_smaller( 'AAAA'::citext, 'bbbb'::citext ) = 'AAAA' AS t;
+SELECT citext_smaller( 'aardvark'::citext, 'Aaba'::citext ) = 'Aaba' AS t;
+SELECT citext_smaller( 'aardvark'::citext, 'AARDVARK'::citext ) = 'AARDVARK' AS t;
+
+SELECT citext_larger( 'aa'::citext, 'ab'::citext ) = 'ab' AS t;
+SELECT citext_larger( 'AAAA'::citext, 'bbbb'::citext ) = 'bbbb' AS t;
+SELECT citext_larger( 'aardvark'::citext, 'Aaba'::citext ) = 'aardvark' AS t;
+
+-- Test aggregate functions and sort ordering
+
+CREATE TEMP TABLE srt (
+   name CITEXT
+);
+
+INSERT INTO srt (name)
+VALUES ('aardvark'),
+       ('AAA'),
+       ('aba'),
+       ('ABC'),
+       ('abd');
+
+-- Check the min() and max() aggregates, with and without index.
+set enable_seqscan = off;
+SELECT MIN(name) AS "AAA" FROM srt;
+SELECT MAX(name) AS abd FROM srt;
+reset enable_seqscan;
+set enable_indexscan = off;
+SELECT MIN(name) AS "AAA" FROM srt;
+SELECT MAX(name) AS abd FROM srt;
+reset enable_indexscan;
+
+-- Check sorting likewise
+set enable_seqscan = off;
+SELECT name FROM srt ORDER BY name;
+reset enable_seqscan;
+set enable_indexscan = off;
+SELECT name FROM srt ORDER BY name;
+reset enable_indexscan;
+
+-- Test assignment casts.
+SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::text;
+SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::varchar;
+SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::bpchar;
+SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA';
+SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::citext;
+
+-- LIKE shoudl be case-insensitive
+SELECT name FROM srt WHERE name     LIKE '%a%' ORDER BY name;
+SELECT name FROM srt WHERE name NOT LIKE '%b%' ORDER BY name;
+SELECT name FROM srt WHERE name     LIKE '%A%' ORDER BY name;
+SELECT name FROM srt WHERE name NOT LIKE '%B%' ORDER BY name;
+
+-- ~~ should be case-insensitive
+SELECT name FROM srt WHERE name ~~  '%a%' ORDER BY name;
+SELECT name FROM srt WHERE name !~~ '%b%' ORDER BY name;
+SELECT name FROM srt WHERE name ~~  '%A%' ORDER BY name;
+SELECT name FROM srt WHERE name !~~ '%B%' ORDER BY name;
+
+-- ~ should be case-insensitive
+SELECT name FROM srt WHERE name ~  '^a' ORDER BY name;
+SELECT name FROM srt WHERE name !~ 'a$' ORDER BY name;
+SELECT name FROM srt WHERE name ~  '^A' ORDER BY name;
+SELECT name FROM srt WHERE name !~ 'A$' ORDER BY name;
+
+-- SIMILAR TO should be case-insensitive.
+SELECT name FROM srt WHERE name SIMILAR TO '%a.*';
+SELECT name FROM srt WHERE name SIMILAR TO '%A.*';
+
+-- Table 9-5. SQL String Functions and Operators
+SELECT 'D'::citext || 'avid'::citext = 'David'::citext AS citext_concat;
+SELECT 'Value: '::citext || 42 = 'Value: 42' AS text_concat;
+SELECT  42 || ': value'::citext ='42: value' AS int_concat;
+SELECT bit_length('jose'::citext) = 32 AS t;
+SELECT bit_length( name ) = bit_length( name::text ) AS t FROM srt;
+SELECT textlen( name ) = textlen( name::text ) AS t FROM srt;
+SELECT char_length( name ) = char_length( name::text ) AS t FROM srt;
+SELECT lower( name ) = lower( name::text ) AS t FROM srt;
+SELECT octet_length( name ) = octet_length( name::text ) AS t FROM srt;
+SELECT overlay( name placing 'hom' from 2 for 4) = overlay( name::text placing 'hom' from 2 for 4) AS t FROM srt;
+SELECT position( 'a' IN name ) = position( 'a' IN name::text ) AS t FROM srt;
+
+SELECT substr('alphabet'::citext, 3)       = 'phabet' AS t;
+SELECT substr('alphabet'::citext, 3, 2)    = 'ph' AS t;
+
+SELECT substring('alphabet'::citext, 3)    = 'phabet' AS t;
+SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
+SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
+SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
+SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
+SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+
+SELECT trim('    trim    '::citext)               = 'trim' AS t;
+SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
+SELECT trim('xxxxxxtrimxxxx'::text,  'x'::citext) = 'trim' AS t;
+SELECT trim('xxxxxtrimxxxx'::text,   'x'::citext) = 'trim' AS t;
+
+SELECT upper( name ) = upper( name::text ) AS t FROM srt;
+
+-- Table 9-6. Other String Functions.
+SELECT ascii( name ) = ascii( name::text ) AS t FROM srt;
+
+SELECT btrim('    trim'::citext                   ) = 'trim' AS t;
+SELECT btrim('xxxxxtrimxxxx'::citext, 'x'::citext ) = 'trim' AS t;
+SELECT btrim('xyxtrimyyx'::citext,    'xy'::citext) = 'trim' AS t;
+SELECT btrim('xyxtrimyyx'::text,      'xy'::citext) = 'trim' AS t;
+SELECT btrim('xyxtrimyyx'::citext,    'xy'::text  ) = 'trim' AS t;
+
+-- chr() takes an int and returns text.
+-- convert() and convert_from take bytea and return text.
+
+SELECT convert_to( name, 'ISO-8859-1' ) = convert_to( name::text, 'ISO-8859-1' ) AS t FROM srt;
+SELECT decode('MTIzAAE='::citext, 'base64') = decode('MTIzAAE='::text, 'base64') AS t;
+-- encode() takes bytea and returns text.
+SELECT initcap('hi THOMAS'::citext) = initcap('hi THOMAS'::text) AS t;
+SELECT length( name ) = length( name::text ) AS t FROM srt;
+
+SELECT lpad('hi'::citext, 5              ) = '   hi' AS t;
+SELECT lpad('hi'::citext, 5, 'xy'::citext) = 'xyxhi' AS t;
+SELECT lpad('hi'::text,   5, 'xy'::citext) = 'xyxhi' AS t;
+SELECT lpad('hi'::citext, 5, 'xy'::text  ) = 'xyxhi' AS t;
+
+SELECT ltrim('    trim'::citext               ) = 'trim' AS t;
+SELECT ltrim('zzzytrim'::citext, 'xyz'::citext) = 'trim' AS t;
+SELECT ltrim('zzzytrim'::text,   'xyz'::citext) = 'trim' AS t;
+SELECT ltrim('zzzytrim'::citext, 'xyz'::text  ) = 'trim' AS t;
+
+SELECT md5( name ) = md5( name::text ) AS t FROM srt;
+-- pg_client_encoding() takes no args and returns name.
+SELECT quote_ident( name ) = quote_ident( name::text ) AS t FROM srt;
+SELECT quote_literal( name ) = quote_literal( name::text ) AS t FROM srt;
+
+SELECT regexp_matches('foobarbequebaz'::citext, '(bar)(beque)') = ARRAY[ 'bar', 'beque' ] AS t;
+SELECT regexp_replace('Thomas'::citext, '.[mN]a.', 'M') = 'ThM' AS t;
+SELECT regexp_split_to_array('hello world'::citext, E'\\s+') = ARRAY[ 'hello', 'world' ] AS t;
+SELECT regexp_split_to_table('hello world'::citext, E'\\s+') AS words;
+
+SELECT repeat('Pg'::citext, 4) = 'PgPgPgPg' AS t;
+SELECT replace('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t;
+
+SELECT rpad('hi'::citext, 5              ) = 'hi   ' AS t;
+SELECT rpad('hi'::citext, 5, 'xy'::citext) = 'hixyx' AS t;
+SELECT rpad('hi'::text,   5, 'xy'::citext) = 'hixyx' AS t;
+SELECT rpad('hi'::citext, 5, 'xy'::text  ) = 'hixyx' AS t;
+
+SELECT rtrim('trim    '::citext             ) = 'trim' AS t;
+SELECT rtrim('trimxxxx'::citext, 'x'::citext) = 'trim' AS t;
+SELECT rtrim('trimxxxx'::text,   'x'::citext) = 'trim' AS t;
+SELECT rtrim('trimxxxx'::text,   'x'::text  ) = 'trim' AS t;
+
+SELECT split_part('abc~@~def~@~ghi'::citext, '~@~', 2) = 'def' AS t;
+SELECT strpos('high'::citext, 'ig'        ) = 2 AS t;
+SELECT strpos('high'::citext, 'ig'::citext) = 2 AS t;
+-- to_ascii() does not support UTF-8.
+-- to_hex() takes a numeric argument.
+SELECT substr('alphabet', 3, 2) = 'ph' AS t;
+SELECT translate('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t;
+
+-- TODO These functions should work case-insensitively, but don't.
+SELECT regexp_matches('foobarbequebaz'::citext, '(BAR)(BEQUE)') = ARRAY[ 'bar', 'beque' ] AS "t TODO";
+SELECT regexp_replace('Thomas'::citext, '.[MN]A.', 'M') = 'THM' AS "t TODO";
+SELECT regexp_split_to_array('helloTworld'::citext, 't') = ARRAY[ 'hello', 'world' ] AS "t TODO";
+SELECT regexp_split_to_table('helloTworld'::citext, 't') AS "words TODO";
+SELECT replace('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO";
+SELECT split_part('abcTdefTghi'::citext, 't', 2) = 'def' AS "t TODO";
+SELECT strpos('high'::citext, 'IG'::citext) = 2 AS "t TODO";
+SELECT translate('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO";
+
+-- Table 9-20. Formatting Functions
+SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY'::citext)
+     = to_date('05 Dec 2000',         'DD Mon YYYY') AS t;
+SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY')
+     = to_date('05 Dec 2000',         'DD Mon YYYY') AS t;
+SELECT to_date('05 Dec 2000',         'DD Mon YYYY'::citext)
+     = to_date('05 Dec 2000',         'DD Mon YYYY') AS t;
+
+SELECT to_number('12,454.8-'::citext, '99G999D9S'::citext)
+     = to_number('12,454.8-',         '99G999D9S') AS t;
+SELECT to_number('12,454.8-'::citext, '99G999D9S')
+     = to_number('12,454.8-',         '99G999D9S') AS t;
+SELECT to_number('12,454.8-',         '99G999D9S'::citext)
+     = to_number('12,454.8-',         '99G999D9S') AS t;
+
+SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY'::citext)
+     = to_timestamp('05 Dec 2000',         'DD Mon YYYY') AS t;
+SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY')
+     = to_timestamp('05 Dec 2000',         'DD Mon YYYY') AS t;
+SELECT to_timestamp('05 Dec 2000',         'DD Mon YYYY'::citext)
+     = to_timestamp('05 Dec 2000',         'DD Mon YYYY') AS t;
+
+-- Try assigning function results to a column.
+SELECT COUNT(*) = 8::bigint AS t FROM try;
+INSERT INTO try
+VALUES ( to_char(  now()::timestamp,          'HH12:MI:SS') ),
+       ( to_char(  now() + '1 sec'::interval, 'HH12:MI:SS') ), -- timetamptz
+       ( to_char(  '15h 2m 12s'::interval,    'HH24:MI:SS') ),
+       ( to_char(  current_date,              '999') ),
+       ( to_char(  125::int,                  '999') ),
+       ( to_char(  127::int4,                 '999') ),
+       ( to_char(  126::int8,                 '999') ),
+       ( to_char(  128.8::real,               '999D9') ),
+       ( to_char(  125.7::float4,             '999D9') ),
+       ( to_char(  125.9::float8,             '999D9') ),
+       ( to_char( -125.8::numeric,            '999D99S') );
+
+SELECT COUNT(*) = 19::bigint AS t FROM try;
+
+SELECT like_escape( name, '' ) = like_escape( name::text, '' ) AS t FROM srt;
+SELECT like_escape( name::text, ''::citext ) =like_escape( name::text, '' ) AS t FROM srt;
+
+--- TODO: Get citext working with magic cast functions?
+SELECT cidr( '192.168.1.2'::citext ) = cidr( '192.168.1.2'::text ) AS "t TODO";
+SELECT '192.168.1.2'::cidr::citext = '192.168.1.2'::cidr::text AS "t TODO";
--- a/contrib/citext/uninstall_citext.sql
+++ b/contrib/citext/uninstall_citext.sql
@ -0,0 +1,69 @@
+/* $PostgreSQL: pgsql/contrib/citext/uninstall_citext.sql,v 1.1 2008/07/29 18:31:20 tgl Exp $ */
+
+-- Adjust this setting to control where the objects get dropped.
+SET search_path = public;
+
+DROP OPERATOR CLASS citext_ops USING btree CASCADE;
+DROP OPERATOR CLASS citext_ops USING hash CASCADE;
+
+DROP AGGREGATE min(citext);
+DROP AGGREGATE max(citext);
+
+DROP OPERATOR = (citext, citext);
+DROP OPERATOR <> (citext, citext);
+DROP OPERATOR < (citext, citext);
+DROP OPERATOR <= (citext, citext);
+DROP OPERATOR >= (citext, citext);
+DROP OPERATOR > (citext, citext);
+DROP OPERATOR || (citext, citext);
+
+DROP OPERATOR ~ (citext, citext);
+DROP OPERATOR ~* (citext, citext);
+DROP OPERATOR !~ (citext, citext);
+DROP OPERATOR !~* (citext, citext);
+DROP OPERATOR ~~ (citext, citext);
+DROP OPERATOR ~~* (citext, citext);
+DROP OPERATOR !~~ (citext, citext);
+DROP OPERATOR !~~* (citext, citext);
+
+DROP OPERATOR ~ (citext, text);
+DROP OPERATOR ~* (citext, text);
+DROP OPERATOR !~ (citext, text);
+DROP OPERATOR !~* (citext, text);
+DROP OPERATOR ~~ (citext, text);
+DROP OPERATOR ~~* (citext, text);
+DROP OPERATOR !~~ (citext, text);
+DROP OPERATOR !~~* (citext, text);
+
+DROP CAST (citext AS text);
+DROP CAST (citext AS varchar);
+DROP CAST (citext AS bpchar);
+DROP CAST (text AS citext);
+DROP CAST (varchar AS citext);
+DROP CAST (bpchar AS citext);
+
+DROP FUNCTION citext(bpchar);
+DROP FUNCTION citext_eq(citext, citext);
+DROP FUNCTION citext_ne(citext, citext);
+DROP FUNCTION citext_lt(citext, citext);
+DROP FUNCTION citext_le(citext, citext);
+DROP FUNCTION citext_gt(citext, citext);
+DROP FUNCTION citext_ge(citext, citext);
+DROP FUNCTION textcat(citext, citext);
+DROP FUNCTION citext_cmp(citext, citext);
+DROP FUNCTION citext_hash(citext);
+DROP FUNCTION citext_smaller(citext, citext);
+DROP FUNCTION citext_larger(citext, citext);
+DROP FUNCTION lower(citext);
+DROP FUNCTION upper(citext);
+DROP FUNCTION quote_literal(citext);
+DROP FUNCTION texticlike(citext, citext);
+DROP FUNCTION texticnlike(citext, citext);
+DROP FUNCTION texticregexeq(citext, citext);
+DROP FUNCTION texticregexne(citext, citext);
+DROP FUNCTION texticlike(citext, text);
+DROP FUNCTION texticnlike(citext, text);
+DROP FUNCTION texticregexeq(citext, text);
+DROP FUNCTION texticregexne(citext, text);
+
+DROP TYPE citext CASCADE;
--- a/doc/src/sgml/citext.sgml
+++ b/doc/src/sgml/citext.sgml
@ -0,0 +1,222 @@
+<!-- $PostgreSQL: pgsql/doc/src/sgml/citext.sgml,v 1.1 2008/07/29 18:31:20 tgl Exp $ -->
+
+<sect1 id="citext">
+ <title>citext</title>
+
+ <indexterm zone="citext">
+  <primary>citext</primary>
+ </indexterm>
+
+ <para>
+  The <filename>citext</> module provides a case-insensitive
+  character string type, <type>citext</>. Essentially, it internally calls
+  <function>lower</> when comparing values. Otherwise, it behaves almost
+  exactly like <type>text</>.
+ </para>
+
+ <sect2>
+  <title>Rationale</title>
+
+  <para>
+   The standard approach to doing case-insensitive matches
+   in <productname>PostgreSQL</> has been to use the <function>lower</>
+   function when comparing values, for example
+
+  <programlisting>
+   SELECT * FROM tab WHERE lower(col) = LOWER(?);
+  </programlisting>
+  </para>
+
+  <para>
+   This works reasonably well, but has a number of drawbacks:
+  </para>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      It makes your SQL statements verbose, and you always have to remember to
+      use <function>lower</> on both the column and the query value.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      It won't use an index, unless you create a functional index using
+      <function>lower</>.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      If you declare a column as <literal>UNIQUE</> or <literal>PRIMARY
+      KEY</>, the implicitly generated index is case-sensitive.  So it's
+      useless for case-insensitive searches, and it won't enforce
+      uniqueness case-insensitively.
+     </para>
+    </listitem>
+   </itemizedlist>
+
+   <para>
+    The <type>citext</> data type allows you to eliminate calls
+    to <function>lower</> in SQL queries, and allows a primary key to
+    be case-insensitive. <type>citext</> is locale-aware, just
+    like <type>text</>, which means that the comparison of uppercase and
+    lowercase characters is dependent on the rules of
+    the <literal>LC_CTYPE</> locale setting. Again, this behavior is
+    identical to the use of <function>lower</> in queries. But because it's
+    done transparently by the datatype, you don't have to remember to do
+    anything special in your queries.
+   </para>
+
+ </sect2>
+
+ <sect2>
+  <title>How to Use It</title>
+
+  <para>
+   Here's a simple example of usage:
+
+  <programlisting>
+   CREATE TABLE users (
+       nick CITEXT PRIMARY KEY,
+       pass TEXT   NOT NULL
+   );
+
+   INSERT INTO users VALUES ( 'larry',  md5(random()::text) );
+   INSERT INTO users VALUES ( 'Tom',    md5(random()::text) );
+   INSERT INTO users VALUES ( 'Damian', md5(random()::text) );
+   INSERT INTO users VALUES ( 'NEAL',   md5(random()::text) );
+   INSERT INTO users VALUES ( 'Bj&oslash;rn',  md5(random()::text) );
+
+   SELECT * FROM users WHERE nick = 'Larry';
+  </programlisting>
+
+   The <command>SELECT</> statement will return one tuple, even though
+   the <structfield>nick</> column was set to <quote>larry</> and the query
+   was for <quote>Larry</>.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Limitations</title>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      <type>citext</>'s behavior depends on
+      the <literal>LC_CTYPE</> setting of your database. How it compares
+      values is therefore determined when
+      <application>initdb</> is run to create the cluster. It is not truly
+      case-insensitive in the terms defined by the Unicode standard.
+      Effectively, what this means is that, as long as you're happy with your
+      collation, you should be happy with <type>citext</>'s comparisons. But
+      if you have data in different languages stored in your database, users
+      of one language may find their query results are not as expected if the
+      collation is for another language.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      The pattern-matching comparison operators, such as <literal>LIKE</>,
+      <literal>~</>, <literal>~~</>, <literal>!~</>, <literal>!~~</>, etc.,
+      have been overloaded to make case-insensitive comparisons when their
+      left-hand argument is of type <type>citext</>.  However, related
+      functions have not been changed, including:
+     </para>
+
+     <itemizedlist>
+      <listitem>
+       <para>
+         <function>regexp_replace()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>regexp_split_to_array()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>regexp_split_to_table()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>replace()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>split_part()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>strpos()</>
+       </para>
+      </listitem>
+      <listitem>
+       <para>
+         <function>translate()</>
+       </para>
+      </listitem>
+     </itemizedlist>
+
+     <para>
+      Of course, for the regular expression functions, you can specify
+      case-insensitive comparisons in their <parameter>flags</> arguments, but
+      the same cannot be done for the the non-regexp functions.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+       <type>citext</> is not as efficient as <type>text</> because the
+       operator functions and the btree comparison functions must make copies
+       of the data and convert it to lower case for comparisons. It is,
+       however, slightly more efficient than using <function>lower</> to get
+       case-insensitive matching.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      <productname>PostgreSQL</> supports casting between <type>text</>
+      and any other type (even non-string types) by using the other type's
+      I/O functions.  This doesn't work with <type>citext</>.  However,
+      you can cast via I/O functions in two steps, for example
+      <literal><replaceable>somevalue</>::text::citext</literal> or
+      <literal><replaceable>citextvalue</>::text::<replaceable>sometype</></literal>.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      <type>citext</> doesn't help much if you need data to compare
+      case-sensitively in some contexts and case-insensitively in other
+      contexts.  The standard answer is to use the <type>text</> type and
+      manually use the <function>lower</> function when you need to compare
+      case-insensitively; this works all right if case-insensitive comparison
+      is needed only infrequently.  If you need case-insensitive most of
+      the time and case-sensitive infrequently, consider storing the data
+      as <type>citext</> and explicitly casting the column to <type>text</>
+      when you want case-sensitive comparison.  In either situation, you
+      will need two indexes if you want both types of searches to be fast.
+    </para>
+    </listitem>
+   </itemizedlist>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   David E. Wheeler <email>david@kineticode.com</email>
+  </para>
+
+  <para>
+    Inspired by the original <type>citext</> module by Donald Fraser.
+  </para>
+
+ </sect2>
+
+</sect1>
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.8 2007/12/06 04:12:09 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.9 2008/07/29 18:31:20 tgl Exp $ -->

 <appendix id="contrib">
 <title>Additional Supplied Modules</title>
@ -81,6 +81,7 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
 &adminpack;
 &btree-gist;
 &chkpass;
+ &citext;
 &cube;
 &dblink;
 &dict-int;
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.56 2007/12/03 04:18:47 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.57 2008/07/29 18:31:20 tgl Exp $ -->

 <!entity history    SYSTEM "history.sgml">
 <!entity info       SYSTEM "info.sgml">
@ -94,6 +94,7 @@
 <!entity adminpack       SYSTEM "adminpack.sgml">
 <!entity btree-gist      SYSTEM "btree-gist.sgml">
 <!entity chkpass         SYSTEM "chkpass.sgml">
+<!entity citext          SYSTEM "citext.sgml">
 <!entity cube            SYSTEM "cube.sgml">
 <!entity dblink          SYSTEM "dblink.sgml">
 <!entity dict-int        SYSTEM "dict-int.sgml">