diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 0d1ab50002..5124bd80ae 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -210,9 +210,9 @@ SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::ts
'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
-Each lexeme position also can be labeled as 'A',
-'B', 'C', 'D',
-where 'D' is the default. These labels can be used to group
+Each lexeme position also can be labeled as A,
+B, C, D,
+where D is the default. These labels can be used to group
lexemes into different importance or
rankings, for example to reflect document structure.
Actual values can be assigned at search time and used during the calculation
@@ -668,9 +668,9 @@ setweight(vector TSVECTOR,
This function returns a copy of the input vector in which every location
-has been labeled with either the letter 'A',
-'B', or 'C', or the default label
-'D' (which is the default for new vectors
+has been labeled with either the letter A,
+B, or C, or the default label
+D (which is the default for new vectors
and as such is usually not displayed). These labels are retained
when vectors are concatenated, allowing words from different parts of a
document to be weighted differently by ranking functions.
@@ -807,13 +807,12 @@ to be made.
-stat
+ts_stat
-stat(sqlquery text , weight text ) returns SETOF statinfo
-
+ts_stat(sqlquery text , weights text ) returns SETOF statinfo
@@ -821,27 +820,27 @@ stat(sqlquery text
Here statinfo is a type, defined as:
-CREATE TYPE statinfo AS (word text, ndoc int4, nentry int4);
+CREATE TYPE statinfo AS (word text, ndoc integer, nentry integer);
-and sqlquery is a query which returns a
-tsvector column's contents. stat> returns
-statistics about a tsvector column, i.e., the number of
-documents, ndoc>, and the total number of words in the
-collection, nentry>. It is useful for checking your
-configuration and to find stop word candidates. For example, to find
-the ten most frequent words:
+and sqlquery is a text value containing a SQL query
+which returns a single tsvector column. ts_stat>
+executes the query and returns statistics about the resulting
+tsvector data, i.e., the number of documents, ndoc>,
+and the total number of words in the collection, nentry>. It is
+useful for checking your configuration and to find stop word candidates. For
+example, to find the ten most frequent words:
-SELECT * FROM stat('SELECT vector from apod')
+SELECT * FROM ts_stat('SELECT vector from apod')
ORDER BY ndoc DESC, nentry DESC, word
LIMIT 10;
-Optionally, one can specify weight to obtain
+Optionally, one can specify weights to obtain
statistics about words with a specific weight:
-SELECT * FROM stat('SELECT vector FROM apod','a')
+SELECT * FROM ts_stat('SELECT vector FROM apod','a')
ORDER BY ndoc DESC, nentry DESC, word
LIMIT 10;
@@ -1146,9 +1145,9 @@ topic.
-The rewrite() function changes the original query by
+The ts_rewrite() function changes the original query by
replacing part of the query with some other string of type tsquery,
-as defined by the rewrite rule. Arguments to rewrite()
+as defined by the rewrite rule. Arguments to ts_rewrite()
can be names of columns of type tsquery.
@@ -1161,20 +1160,20 @@ INSERT INTO aliases VALUES('a', 'c');
-rewrite - 1
+ts_rewrite
-rewrite (query TSQUERY, target TSQUERY, sample TSQUERY) returns TSQUERY
+ts_rewrite (query TSQUERY, target TSQUERY, sample TSQUERY) returns TSQUERY
-SELECT rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
- rewrite
+SELECT ts_rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
+ ts_rewrite
-----------
'b' & 'c'
@@ -1184,21 +1183,17 @@ SELECT rewrite('a & b'::tsquery, 'a'::tsquery, 'c'::tsquery);
-
-rewrite - 2
-
-
-rewrite(ARRAY[query TSQUERY, target TSQUERY, sample TSQUERY]) returns TSQUERY
+ts_rewrite(ARRAY[query TSQUERY, target TSQUERY, sample TSQUERY]) returns TSQUERY
-SELECT rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
- rewrite
+SELECT ts_rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
+ ts_rewrite
-----------
'b' & 'c'
@@ -1208,21 +1203,17 @@ SELECT rewrite(ARRAY['a & b'::tsquery, t,s]) FROM aliases;
-
-rewrite - 3
-
-
-rewrite (query> TSQUERY,'SELECT target ,sample FROM test'::text) returns TSQUERY
+ts_rewrite (query> TSQUERY,'SELECT target ,sample FROM test'::text) returns TSQUERY
-SELECT rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases');
- rewrite
+SELECT ts_rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases');
+ ts_rewrite
-----------
'b' & 'c'
@@ -1246,12 +1237,12 @@ SELECT * FROM aliases;
This ambiguity can be resolved by specifying a sort order:
-SELECT rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t DESC');
- rewrite
+SELECT ts_rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t DESC');
+ ts_rewrite
---------
'cc'
-SELECT rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t ASC');
- rewrite
+SELECT ts_rewrite('a & b', 'SELECT t, s FROM aliases ORDER BY t ASC');
+ ts_rewrite
-----------
'b' & 'c'
@@ -1263,7 +1254,7 @@ Let's consider a real-life astronomical example. We'll expand query
CREATE TABLE aliases (t tsquery primary key, s tsquery);
INSERT INTO aliases VALUES(to_tsquery('supernovae'), to_tsquery('supernovae|sn'));
-SELECT rewrite(to_tsquery('supernovae'), 'SELECT * FROM aliases') && to_tsquery('crab');
+SELECT ts_rewrite(to_tsquery('supernovae'), 'SELECT * FROM aliases') && to_tsquery('crab');
?column?
---------------------------------
( 'supernova' | 'sn' ) & 'crab'
@@ -1271,7 +1262,7 @@ SELECT rewrite(to_tsquery('supernovae'), 'SELECT * FROM aliases') && to
Notice, that we can change the rewriting rule online:
UPDATE aliases SET s=to_tsquery('supernovae|sn & !nebulae') WHERE t=to_tsquery('supernovae');
-SELECT rewrite(to_tsquery('supernovae'), 'SELECT * FROM aliases') && to_tsquery('crab');
+SELECT ts_rewrite(to_tsquery('supernovae'), 'SELECT * FROM aliases') && to_tsquery('crab');
?column?
---------------------------------------------
( 'supernova' | 'sn' & !'nebula' ) & 'crab'
@@ -1288,10 +1279,10 @@ for a possible hit. To filter out obvious non-candidate rules there are containm
operators for the tsquery type. In the example below, we select only those
rules which might contain the original query:
-SELECT rewrite(ARRAY['a & b'::tsquery, t,s])
+SELECT ts_rewrite(ARRAY['a & b'::tsquery, t,s])
FROM aliases
WHERE 'a & b' @> t;
- rewrite
+ ts_rewrite
-----------
'b' & 'c'
@@ -1525,7 +1516,7 @@ SELECT * FROM ts_parse('default','123 - a number');
-token_type
+ts_token_type
@@ -1894,11 +1885,13 @@ configuration config_name is realized by
superimposed coding (Knuth, 1973) of signatures, i.e., a parent is the
result of 'OR'-ing the bit-strings of all children. This is a second
factor of lossiness. It is clear that parents tend to be full of
-'1'>s (degenerates) and become quite useless because of the
+1>s (degenerates) and become quite useless because of the
limited selectivity. Searching is performed as a bit comparison of a
signature representing the query and an RD-tree entry.
-If all '1'>s of both signatures are in the same position we
+If all 1>s of both signatures are in the same position we
say that this branch probably matches the query, but if there is even one
discrepancy we can definitely reject this branch.
@@ -2870,13 +2863,15 @@ The current limitations of Full Text Searching are:
For comparison, the PostgreSQL 8.1 documentation
-consists of 10,441 unique words, a total of 335,420 words, and the most frequent word
-'postgresql' is mentioned 6,127 times in 655 documents.
+contained 10,441 unique words, a total of 335,420 words, and the most frequent
+word postgresql> was mentioned 6,127 times in 655 documents.
+
-Another example - the PostgreSQL mailing list archives
-consists of 910,989 unique words with 57,491,343 lexemes in 461,020 messages.
+Another example — the PostgreSQL mailing list
+archives contained 910,989 unique words with 57,491,343 lexemes in 461,020
+messages.
@@ -2942,28 +2937,27 @@ names and object names. The following examples illustrate this:
=> \dF+ russian
Configuration "pg_catalog.russian"
Parser name: "pg_catalog.default"
-Locale: 'ru_RU.UTF-8' (default)
Token | Dictionaries
--------------+-------------------------
email | pg_catalog.simple
file | pg_catalog.simple
float | pg_catalog.simple
host | pg_catalog.simple
- hword | pg_catalog.ru_stem_utf8
+ hword | pg_catalog.russian_stem
int | pg_catalog.simple
lhword | public.tz_simple
lpart_hword | public.tz_simple
lword | public.tz_simple
- nlhword | pg_catalog.ru_stem_utf8
- nlpart_hword | pg_catalog.ru_stem_utf8
- nlword | pg_catalog.ru_stem_utf8
+ nlhword | pg_catalog.russian_stem
+ nlpart_hword | pg_catalog.russian_stem
+ nlword | pg_catalog.russian_stem
part_hword | pg_catalog.simple
sfloat | pg_catalog.simple
uint | pg_catalog.simple
uri | pg_catalog.simple
url | pg_catalog.simple
version | pg_catalog.simple
- word | pg_catalog.ru_stem_utf8
+ word | pg_catalog.russian_stem
@@ -3112,43 +3106,43 @@ play with the standard english configuration.
CREATE TEXT SEARCH CONFIGURATION public.english ( COPY = pg_catalog.english );
-CREATE TEXT SEARCH DICTIONARY en_ispell (
+CREATE TEXT SEARCH DICTIONARY english_ispell (
TEMPLATE = ispell,
- DictFile = english-utf8,
- AffFile = english-utf8,
+ DictFile = english,
+ AffFile = english,
StopWords = english
);
ALTER TEXT SEARCH CONFIGURATION public.english
- ALTER MAPPING FOR lword WITH en_ispell, en_stem;
+ ALTER MAPPING FOR lword WITH english_ispell, english_stem;
SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
Alias | Description | Token | Dicts list | Lexized token
-------+---------------+-------------+---------------------------------------+---------------------------------
- lword | Latin word | The | {public.en_ispell,pg_catalog.en_stem} | public.en_ispell: {}
+ lword | Latin word | The | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {}
blank | Space symbols | | |
- lword | Latin word | Brightest | {public.en_ispell,pg_catalog.en_stem} | public.en_ispell: {bright}
+ lword | Latin word | Brightest | {public.english_ispell,pg_catalog.english_stem} | public.english_ispell: {bright}
blank | Space symbols | | |
- lword | Latin word | supernovaes | {public.en_ispell,pg_catalog.en_stem} | pg_catalog.en_stem: {supernova}
+ lword | Latin word | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova}
(5 rows)
-In this example, the word 'Brightest'> was recognized by a
+In this example, the word Brightest> was recognized by a
parser as a Latin word (alias lword)
-and came through the dictionaries public.en_ispell> and
-pg_catalog.en_stem. It was recognized by
-public.en_ispell, which reduced it to the noun
+and came through the dictionaries public.english_ispell> and
+pg_catalog.english_stem. It was recognized by
+public.english_ispell, which reduced it to the noun
bright. The word supernovaes is unknown
-by the public.en_ispell dictionary so it was passed to
+by the public.english_ispell dictionary so it was passed to
the next dictionary, and, fortunately, was recognized (in fact,
-public.en_stem is a stemming dictionary and recognizes
+public.english_stem is a stemming dictionary and recognizes
everything; that is why it was placed at the end of the dictionary stack).
-The word The was recognized by public.en_ispell
+The word The was recognized by public.english_ispell
dictionary as a stop word () and will not be indexed.
@@ -3159,11 +3153,11 @@ SELECT "Alias", "Token", "Lexized token"
FROM ts_debug('public.english','The Brightest supernovaes');
Alias | Token | Lexized token
-------+-------------+---------------------------------
- lword | The | public.en_ispell: {}
+ lword | The | public.english_ispell: {}
blank | |
- lword | Brightest | public.en_ispell: {bright}
+ lword | Brightest | public.english_ispell: {bright}
blank | |
- lword | supernovaes | pg_catalog.en_stem: {supernova}
+ lword | supernovaes | pg_catalog.english_stem: {supernova}
(5 rows)