diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index afdda69720..368673c66e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -7857,11 +7857,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple - ts_debug( config regconfig, document text) - setof ts_debug + ts_debug( config regconfig, document text, OUT alias text, OUT description text, OUT token text, OUT dictionaries regdictionary[], OUT dictionary regdictionary, OUT lexemes text[]) + setof record test a configuration ts_debug('english', 'The Brightest supernovaes') - (lword,"Latin word",The,{english_stem},"english_stem: {}") ... + (lword,"Latin word",The,{english_stem},english_stem,{}) ... ts_lexize(dict regdictionary, token text) diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 03625b41a5..81b54d8e17 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1,4 +1,4 @@ - + Full Text Search @@ -1699,18 +1699,18 @@ ON messages FOR EACH ROW EXECUTE PROCEDURE messages_trigger(); - word text — the value of a lexeme + word text — the value of a lexeme - ndoc integer — number of documents + ndoc integer — number of documents (tsvectors) the word occurred in - nentry integer — total number of + nentry integer — total number of occurrences of the word @@ -1901,8 +1901,8 @@ LIMIT 10; as the entire word and as each component: -SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1'); - Alias | Description | Token +SELECT alias, description, token FROM ts_debug('foo-bar-beta1'); + alias | description | token -------------+-------------------------------+--------------- hword | Hyphenated word | foo-bar-beta1 lpart_hword | Latin part of hyphenated word | foo @@ -1917,8 +1917,8 @@ SELECT "Alias", "Description", "Token" FROM ts_debug('foo-bar-beta1'); instructive example: -SELECT "Alias", "Description", "Token" FROM ts_debug('http://foo.com/stuff/index.html'); - Alias | Description | Token +SELECT alias, description, token FROM ts_debug('http://foo.com/stuff/index.html'); + alias | description | token ----------+---------------+-------------------------- protocol | Protocol head | http:// url | URL | foo.com/stuff/index.html @@ -2186,25 +2186,23 @@ SELECT ts_lexize('public.simple_dict','The'); synonym dictionary and put it before the english_stem dictionary: -SELECT * FROM ts_debug('english','Paris'); - Alias | Description | Token | Dictionaries | Lexized token --------+-------------+-------+----------------+---------------------- - lword | Latin word | Paris | {english_stem} | english_stem: {pari} -(1 row) +SELECT * FROM ts_debug('english', 'Paris'); + alias | description | token | dictionaries | dictionary | lexemes +-------+-------------+-------+----------------+--------------+--------- + lword | Latin word | Paris | {english_stem} | english_stem | {pari} -CREATE TEXT SEARCH DICTIONARY synonym ( +CREATE TEXT SEARCH DICTIONARY my_synonym ( TEMPLATE = synonym, SYNONYMS = my_synonyms ); ALTER TEXT SEARCH CONFIGURATION english - ALTER MAPPING FOR lword WITH synonym, english_stem; + ALTER MAPPING FOR lword WITH my_synonym, english_stem; -SELECT * FROM ts_debug('english','Paris'); - Alias | Description | Token | Dictionaries | Lexized token --------+-------------+-------+------------------------+------------------ - lword | Latin word | Paris | {synonym,english_stem} | synonym: {paris} -(1 row) +SELECT * FROM ts_debug('english', 'Paris'); + alias | description | token | dictionaries | dictionary | lexemes +-------+-------------+-------+---------------------------+------------+--------- + lword | Latin word | Paris | {my_synonym,english_stem} | my_synonym | {paris} @@ -2711,7 +2709,14 @@ SHOW default_text_search_config; - ts_debug( config regconfig, document text) returns setof ts_debug + ts_debug( config regconfig, document text, + OUT alias text, + OUT description text, + OUT token text, + OUT dictionaries regdictionary[], + OUT dictionary regdictionary, + OUT lexemes text[]) + returns setof record @@ -2725,23 +2730,47 @@ SHOW default_text_search_config; - ts_debug's result row type is defined as: + ts_debug returns one row for each token identified in the text + by the parser. The columns returned are - -CREATE TYPE ts_debug AS ( - "Alias" text, - "Description" text, - "Token" text, - "Dictionaries" regdictionary[], - "Lexized token" text -); - - - One row is produced for each token identified by the parser. - The first three columns describe the token, and the fourth lists - the dictionaries selected by the configuration for that token's type. - The last column shows the result of dictionary processing: which - dictionary (if any) recognized the token, and what it produced. + + + + alias text — short name of the token type + + + + + description text — description of the + token type + + + + + token text — text of the token + + + + + dictionaries regdictionary[] — the + dictionaries selected by the configuration for this token type + + + + + dictionary regdictionary — the dictionary + that recognized the token, or NULL if none did + + + + + lexemes text[] — the lexeme(s) produced + by the dictionary that recognized the token, or NULL if + none did; an empty array ({}) means it was recognized as a + stop word + + + @@ -2749,33 +2778,32 @@ CREATE TYPE ts_debug AS ( SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats'); - Alias | Description | Token | Dictionaries | Lexized token --------+---------------+-------+--------------+---------------- - lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | - lword | Latin word | fat | {english} | english: {fat} - blank | Space symbols | | | - lword | Latin word | cat | {english} | english: {cat} - blank | Space symbols | | | - lword | Latin word | sat | {english} | english: {sat} - blank | Space symbols | | | - lword | Latin word | on | {english} | english: {} - blank | Space symbols | | | - lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | - lword | Latin word | mat | {english} | english: {mat} - blank | Space symbols | | | - blank | Space symbols | - | | - lword | Latin word | it | {english} | english: {} - blank | Space symbols | | | - lword | Latin word | ate | {english} | english: {ate} - blank | Space symbols | | | - lword | Latin word | a | {english} | english: {} - blank | Space symbols | | | - lword | Latin word | fat | {english} | english: {fat} - blank | Space symbols | | | - lword | Latin word | rats | {english} | english: {rat} - (24 rows) + alias | description | token | dictionaries | dictionary | lexemes +-------+---------------+-------+----------------+--------------+--------- + lword | Latin word | a | {english_stem} | english_stem | {} + blank | Space symbols | | {} | | + lword | Latin word | fat | {english_stem} | english_stem | {fat} + blank | Space symbols | | {} | | + lword | Latin word | cat | {english_stem} | english_stem | {cat} + blank | Space symbols | | {} | | + lword | Latin word | sat | {english_stem} | english_stem | {sat} + blank | Space symbols | | {} | | + lword | Latin word | on | {english_stem} | english_stem | {} + blank | Space symbols | | {} | | + lword | Latin word | a | {english_stem} | english_stem | {} + blank | Space symbols | | {} | | + lword | Latin word | mat | {english_stem} | english_stem | {mat} + blank | Space symbols | | {} | | + blank | Space symbols | - | {} | | + lword | Latin word | it | {english_stem} | english_stem | {} + blank | Space symbols | | {} | | + lword | Latin word | ate | {english_stem} | english_stem | {ate} + blank | Space symbols | | {} | | + lword | Latin word | a | {english_stem} | english_stem | {} + blank | Space symbols | | {} | | + lword | Latin word | fat | {english_stem} | english_stem | {fat} + blank | Space symbols | | {} | | + lword | Latin word | rats | {english_stem} | english_stem | {rat} @@ -2801,34 +2829,33 @@ ALTER TEXT SEARCH CONFIGURATION public.english SELECT * FROM ts_debug('public.english','The Brightest supernovaes'); - Alias | Description | Token | Dictionaries | Lexized token --------+---------------+-------------+-------------------------------------------------+------------------------------------- - lword | Latin word | The | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {} - blank | Space symbols | | | - lword | Latin word | Brightest | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright} - blank | Space symbols | | | - lword | Latin word | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova} -(5 rows) + alias | description | token | dictionaries | dictionary | lexemes +-------+---------------+-------------+-------------------------------+----------------+------------- + lword | Latin word | The | {english_ispell,english_stem} | english_ispell | {} + blank | Space symbols | | {} | | + lword | Latin word | Brightest | {english_ispell,english_stem} | english_ispell | {bright} + blank | Space symbols | | {} | | + lword | Latin word | supernovaes | {english_ispell,english_stem} | english_stem | {supernova} In this example, the word Brightest was recognized by the parser as a Latin word (alias lword). For this token type the dictionary list is - public.english_ispell and - pg_catalog.english_stem. The word was recognized by - public.english_ispell, which reduced it to the noun + english_ispell and + english_stem. The word was recognized by + english_ispell, which reduced it to the noun bright. The word supernovaes is - unknown to the public.english_ispell dictionary so it + unknown to the english_ispell dictionary so it was passed to the next dictionary, and, fortunately, was recognized (in - fact, public.english_stem is a Snowball dictionary which + fact, english_stem is a Snowball dictionary which recognizes everything; that is why it was placed at the end of the dictionary list). The word The was recognized by the - public.english_ispell dictionary as a stop word (english_ispell dictionary as a stop word () and will not be indexed. The spaces are discarded too, since the configuration provides no dictionaries at all for them. @@ -2839,16 +2866,15 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes'); you want to see: -SELECT "Alias", "Token", "Lexized token" +SELECT alias, token, dictionary, lexemes FROM ts_debug('public.english','The Brightest supernovaes'); - Alias | Token | Lexized token --------+-------------+-------------------------------------- - lword | The | public.english_ispell: {} - blank | | - lword | Brightest | public.english_ispell: {bright} - blank | | - lword | supernovaes | pg_catalog.english_stem: {supernova} -(5 rows) + alias | token | dictionary | lexemes +-------+-------------+----------------+------------- + lword | The | english_ispell | {} + blank | | | + lword | Brightest | english_ispell | {bright} + blank | | | + lword | supernovaes | english_stem | {supernova} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5e557efef4..1f1d983573 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -3,7 +3,7 @@ * * Copyright (c) 1996-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.46 2007/09/25 20:03:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.47 2007/10/22 20:13:37 tgl Exp $ */ CREATE VIEW pg_roles AS @@ -386,41 +386,39 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_alloc() AS buffers_alloc; --- Tsearch debug function. Defined here because it'd be pretty unwieldy +-- Tsearch debug function. Defined here because it'd be pretty unwieldy -- to put it into pg_proc.h -CREATE TYPE ts_debug AS ( - "Alias" text, - "Description" text, - "Token" text, - "Dictionaries" regdictionary[], - "Lexized token" text -); - -COMMENT ON TYPE ts_debug IS 'type returned from ts_debug() function'; - -CREATE FUNCTION ts_debug(regconfig, text) -RETURNS SETOF ts_debug AS +CREATE FUNCTION ts_debug(IN config regconfig, IN document text, + OUT alias text, + OUT description text, + OUT token text, + OUT dictionaries regdictionary[], + OUT dictionary regdictionary, + OUT lexemes text[]) +RETURNS SETOF record AS $$ SELECT - tt.alias AS "Alias", - tt.description AS "Description", - parse.token AS "Token", + tt.alias AS alias, + tt.description AS description, + parse.token AS token, ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary FROM pg_catalog.pg_ts_config_map AS m WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid ORDER BY m.mapseqno ) - AS "Dictionaries", - ( - SELECT - dl.mapdict::pg_catalog.regdictionary || ': ' || dl.lex::pg_catalog.text - FROM - ( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex - FROM pg_catalog.pg_ts_config_map AS m - WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid - ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno ) dl - LIMIT 1 - ) AS "Lexized token" + AS dictionaries, + ( SELECT mapdict::pg_catalog.regdictionary + FROM pg_catalog.pg_ts_config_map AS m + WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid + ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno + LIMIT 1 + ) AS dictionary, + ( SELECT pg_catalog.ts_lexize(mapdict, parse.token) + FROM pg_catalog.pg_ts_config_map AS m + WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid + ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno + LIMIT 1 + ) AS lexemes FROM pg_catalog.ts_parse( (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 ) AS parse, @@ -434,8 +432,14 @@ LANGUAGE SQL STRICT STABLE; COMMENT ON FUNCTION ts_debug(regconfig,text) IS 'debug function for text search configuration'; -CREATE FUNCTION ts_debug(text) -RETURNS SETOF ts_debug AS +CREATE FUNCTION ts_debug(IN document text, + OUT alias text, + OUT description text, + OUT token text, + OUT dictionaries regdictionary[], + OUT dictionary regdictionary, + OUT lexemes text[]) +RETURNS SETOF record AS $$ SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1); $$ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 467277d8ad..1fa5428a96 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.434 2007/10/19 22:01:45 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.435 2007/10/22 20:13:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200710192 +#define CATALOG_VERSION_NO 200710221 #endif