Tsvector editing functions
Adds several tsvector editting function: convert tsvector to/from text array, set weight for given lexemes, delete lexeme(s), unnest, filter lexemes with given weights Author: Stas Kelvich with some editorization by me Reviewers: Tomas Vondram, Teodor Sigaev
This commit is contained in:
parent
49635d7b3e
commit
6943a946c7
@ -9211,13 +9211,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
|||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>setweight</primary>
|
<primary>setweight</primary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
<literal><function>setweight(<type>tsvector</>, <type>"char"</>)</function></literal>
|
<literal><function>setweight(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>)</function></literal>
|
||||||
</entry>
|
</entry>
|
||||||
<entry><type>tsvector</type></entry>
|
<entry><type>tsvector</type></entry>
|
||||||
<entry>assign weight to each element of <type>tsvector</></entry>
|
<entry>assign <replaceable class="PARAMETER">weight</replaceable> to each element of <replaceable class="PARAMETER">vector</replaceable></entry>
|
||||||
<entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A')</literal></entry>
|
<entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A')</literal></entry>
|
||||||
<entry><literal>'cat':3A 'fat':2A,4A 'rat':5A</literal></entry>
|
<entry><literal>'cat':3A 'fat':2A,4A 'rat':5A</literal></entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>setweight</primary>
|
||||||
|
<secondary>setweight by filter</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>setweight(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>, <replaceable class="PARAMETER">lexemes</replaceable> <type>"text"[]</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>tsvector</type></entry>
|
||||||
|
<entry>assign <replaceable class="PARAMETER">weight</replaceable> to elements of <replaceable class="PARAMETER">vector</replaceable> that are listed in <replaceable class="PARAMETER">lexemes</replaceable> array</entry>
|
||||||
|
<entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A', '{cat,rat}')</literal></entry>
|
||||||
|
<entry><literal>'cat':3A 'fat':2,4 'rat':5A</literal></entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>
|
<entry>
|
||||||
<indexterm>
|
<indexterm>
|
||||||
@ -9230,6 +9243,80 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
|||||||
<entry><literal>strip('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
|
<entry><literal>strip('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
|
||||||
<entry><literal>'cat' 'fat' 'rat'</literal></entry>
|
<entry><literal>'cat' 'fat' 'rat'</literal></entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>delete</primary>
|
||||||
|
<secondary>delete lemexeme</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>delete(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">lexeme</replaceable> <type>text</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>tsvector</type></entry>
|
||||||
|
<entry>remove given <replaceable class="PARAMETER">lexeme</replaceable> from <replaceable class="PARAMETER">vector</replaceable></entry>
|
||||||
|
<entry><literal>delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat')</literal></entry>
|
||||||
|
<entry><literal>'cat':3 'rat':5A</literal></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>delete</primary>
|
||||||
|
<secondary>delete lemexemes array</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>delete(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">lexemes</replaceable> <type>text[]</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>tsvector</type></entry>
|
||||||
|
<entry>remove any occurrence of lexemes in <replaceable class="PARAMETER">lexemes</replaceable> array from <replaceable class="PARAMETER">vector</replaceable></entry>
|
||||||
|
<entry><literal>delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat'])</literal></entry>
|
||||||
|
<entry><literal>'cat':3</literal></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>unnest</primary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>unnest(<type>tsvector</>, OUT <replaceable class="PARAMETER">lexeme</> <type>text</>, OUT <replaceable class="PARAMETER">positions</> <type>smallint[]</>, OUT <replaceable class="PARAMETER">weights</> <type>text</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>setof record</type></entry>
|
||||||
|
<entry>expand a tsvector to a set of rows</entry>
|
||||||
|
<entry><literal>unnest('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
|
||||||
|
<entry><literal>(cat,{3},{D}) ...</literal></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>tsvector_to_array</primary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>tsvector_to_array(<type>tsvector</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>text[]</type></entry>
|
||||||
|
<entry>convert <type>tsvector</> to array of lexemes</entry>
|
||||||
|
<entry><literal>tsvector_to_array('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
|
||||||
|
<entry><literal>{cat,fat,rat}</literal></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>array_to_tsvector</primary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>array_to_tsvector(<type>text[]</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>tsvector</type></entry>
|
||||||
|
<entry>convert array of lexemes to <type>tsvector</type></entry>
|
||||||
|
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
|
||||||
|
<entry><literal>'fat' 'cat' 'rat'</literal></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>
|
||||||
|
<indexterm>
|
||||||
|
<primary>filter</primary>
|
||||||
|
</indexterm>
|
||||||
|
<literal><function>filter(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weights</replaceable> <type>"char"[]</>)</function></literal>
|
||||||
|
</entry>
|
||||||
|
<entry><type>tsvector</type></entry>
|
||||||
|
<entry>Select only elements with given <replaceable class="PARAMETER">weights</replaceable> from <replaceable class="PARAMETER">vector</replaceable></entry>
|
||||||
|
<entry><literal>filter('fat:2,4 cat:3b rat:5A'::tsvector, '{a,b}')</literal></entry>
|
||||||
|
<entry><literal>'cat':3B 'rat':5A</literal></entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>
|
<entry>
|
||||||
<indexterm>
|
<indexterm>
|
||||||
|
@ -1326,6 +1326,10 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
|
|||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Full list of <type>tsvector</>-related functions available in <xref linkend="textsearch-functions-table">.
|
||||||
|
</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="textsearch-manipulate-tsquery">
|
<sect2 id="textsearch-manipulate-tsquery">
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/htup_details.h"
|
||||||
#include "catalog/namespace.h"
|
#include "catalog/namespace.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "commands/trigger.h"
|
#include "commands/trigger.h"
|
||||||
@ -65,6 +66,7 @@ typedef struct
|
|||||||
#define STATHDRSIZE (offsetof(TSVectorStat, data))
|
#define STATHDRSIZE (offsetof(TSVectorStat, data))
|
||||||
|
|
||||||
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
|
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
|
||||||
|
static int tsvector_bsearch(TSVector tsin, char *lexin, int lexin_len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Order: haspos, len, word, for all positions (pos, weight)
|
* Order: haspos, len, word, for all positions (pos, weight)
|
||||||
@ -251,6 +253,90 @@ tsvector_setweight(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_POINTER(out);
|
PG_RETURN_POINTER(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* setweight(tsin tsvector, char_weight "char", lexemes "text"[])
|
||||||
|
*
|
||||||
|
* Assign weight w to elements of tsin that are listed in lexemes.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TSVector tsin = PG_GETARG_TSVECTOR(0);
|
||||||
|
char char_weight = PG_GETARG_CHAR(1);
|
||||||
|
ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
|
||||||
|
|
||||||
|
TSVector tsout;
|
||||||
|
int i,
|
||||||
|
j,
|
||||||
|
nlexemes,
|
||||||
|
weight;
|
||||||
|
WordEntry *entry;
|
||||||
|
Datum *dlexemes;
|
||||||
|
bool *nulls;
|
||||||
|
|
||||||
|
switch (char_weight)
|
||||||
|
{
|
||||||
|
case 'A': case 'a':
|
||||||
|
weight = 3;
|
||||||
|
break;
|
||||||
|
case 'B': case 'b':
|
||||||
|
weight = 2;
|
||||||
|
break;
|
||||||
|
case 'C': case 'c':
|
||||||
|
weight = 1;
|
||||||
|
break;
|
||||||
|
case 'D': case 'd':
|
||||||
|
weight = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* internal error */
|
||||||
|
elog(ERROR, "unrecognized weight: %c", char_weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
tsout = (TSVector) palloc(VARSIZE(tsin));
|
||||||
|
memcpy(tsout, tsin, VARSIZE(tsin));
|
||||||
|
entry = ARRPTR(tsout);
|
||||||
|
|
||||||
|
deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
|
||||||
|
&dlexemes, &nulls, &nlexemes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assuming that lexemes array is significantly shorter than tsvector
|
||||||
|
* we can iterate through lexemes performing binary search
|
||||||
|
* of each lexeme from lexemes in tsvector.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < nlexemes; i++)
|
||||||
|
{
|
||||||
|
char *lex;
|
||||||
|
int lex_len,
|
||||||
|
lex_pos;
|
||||||
|
|
||||||
|
if (nulls[i])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("lexeme array may not contain nulls")));
|
||||||
|
|
||||||
|
lex = VARDATA(dlexemes[i]);
|
||||||
|
lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
|
lex_pos = tsvector_bsearch(tsout, lex, lex_len);
|
||||||
|
|
||||||
|
if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
|
||||||
|
{
|
||||||
|
WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
|
||||||
|
while (j--)
|
||||||
|
{
|
||||||
|
WEP_SETWEIGHT(*p, weight);
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(tsin, 0);
|
||||||
|
PG_FREE_IF_COPY(lexemes, 2);
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(tsout);
|
||||||
|
}
|
||||||
|
|
||||||
#define compareEntry(pa, a, pb, b) \
|
#define compareEntry(pa, a, pb, b) \
|
||||||
tsCompareString((pa) + (a)->pos, (a)->len, \
|
tsCompareString((pa) + (a)->pos, (a)->len, \
|
||||||
(pb) + (b)->pos, (b)->len, \
|
(pb) + (b)->pos, (b)->len, \
|
||||||
@ -291,6 +377,483 @@ add_pos(TSVector src, WordEntry *srcptr,
|
|||||||
return *clen - startlen;
|
return *clen - startlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform binary search of given lexeme in TSVector.
|
||||||
|
* Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
|
||||||
|
* found.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
|
||||||
|
{
|
||||||
|
WordEntry *arrin = ARRPTR(tsv);
|
||||||
|
int StopLow = 0,
|
||||||
|
StopHigh = tsv->size,
|
||||||
|
StopMiddle,
|
||||||
|
cmp;
|
||||||
|
|
||||||
|
while (StopLow < StopHigh)
|
||||||
|
{
|
||||||
|
StopMiddle = (StopLow + StopHigh)/2;
|
||||||
|
|
||||||
|
cmp = tsCompareString(lexeme, lexeme_len,
|
||||||
|
STRPTR(tsv) + arrin[StopMiddle].pos,
|
||||||
|
arrin[StopMiddle].len,
|
||||||
|
false);
|
||||||
|
|
||||||
|
if (cmp < 0)
|
||||||
|
StopHigh = StopMiddle;
|
||||||
|
else if (cmp > 0)
|
||||||
|
StopLow = StopMiddle + 1;
|
||||||
|
else /* found it */
|
||||||
|
return StopMiddle;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
compareint(const void *va, const void *vb)
|
||||||
|
{
|
||||||
|
int32 a = *((const int32 *) va);
|
||||||
|
int32 b = *((const int32 *) vb);
|
||||||
|
|
||||||
|
if (a == b)
|
||||||
|
return 0;
|
||||||
|
return (a > b) ? 1 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internal routine to delete lexemes from TSVector by array of offsets.
|
||||||
|
*
|
||||||
|
* int *indices_to_delete -- array of lexeme offsets to delete
|
||||||
|
* int indices_count -- size of that array
|
||||||
|
*
|
||||||
|
* Returns new TSVector without given lexemes along with their positions
|
||||||
|
* and weights.
|
||||||
|
*/
|
||||||
|
static TSVector
|
||||||
|
tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
|
||||||
|
int indices_count)
|
||||||
|
{
|
||||||
|
TSVector tsout;
|
||||||
|
WordEntry *arrin = ARRPTR(tsv),
|
||||||
|
*arrout;
|
||||||
|
char *data = STRPTR(tsv),
|
||||||
|
*dataout;
|
||||||
|
int i, j, k,
|
||||||
|
curoff;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here we overestimates tsout size, since we don't know exact size
|
||||||
|
* occupied by positions and weights. We will set exact size later
|
||||||
|
* after a pass through TSVector.
|
||||||
|
*/
|
||||||
|
tsout = (TSVector) palloc0(VARSIZE(tsv));
|
||||||
|
arrout = ARRPTR(tsout);
|
||||||
|
tsout->size = tsv->size - indices_count;
|
||||||
|
|
||||||
|
/* Sort our filter array to simplify membership check later. */
|
||||||
|
if (indices_count > 1)
|
||||||
|
qsort(indices_to_delete, indices_count, sizeof(int), compareint);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
|
||||||
|
*/
|
||||||
|
curoff = 0;
|
||||||
|
dataout = STRPTR(tsout);
|
||||||
|
for (i = j = k = 0; i < tsv->size; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Here we should check whether current i is present in
|
||||||
|
* indices_to_delete or not. Since indices_to_delete is already
|
||||||
|
* sorted we can advance it index only when we have match.
|
||||||
|
*/
|
||||||
|
if (k < indices_count && i == indices_to_delete[k]){
|
||||||
|
k++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy lexeme, it's positions and weights */
|
||||||
|
memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
|
||||||
|
arrout[j].haspos = arrin[i].haspos;
|
||||||
|
arrout[j].len = arrin[i].len;
|
||||||
|
arrout[j].pos = curoff;
|
||||||
|
curoff += arrin[i].len;
|
||||||
|
if (arrin[i].haspos)
|
||||||
|
{
|
||||||
|
int len = POSDATALEN(tsv, arrin+i) * sizeof(WordEntryPos) +
|
||||||
|
sizeof(uint16);
|
||||||
|
curoff = SHORTALIGN(curoff);
|
||||||
|
memcpy(dataout + curoff,
|
||||||
|
STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
|
||||||
|
len);
|
||||||
|
curoff += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* After the pass through TSVector k should equals exactly to indices_count.
|
||||||
|
* If it isn't then the caller provided us with indices outside of
|
||||||
|
* [0, tsv->size) range and estimation of tsout's size is wrong.
|
||||||
|
*/
|
||||||
|
Assert(k == indices_count);
|
||||||
|
|
||||||
|
SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
|
||||||
|
return tsout;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Delete given lexeme from tsvector.
|
||||||
|
* Implementation of user-level delete(tsvector, text).
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_delete_str(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
||||||
|
tsout;
|
||||||
|
text *tlexeme = PG_GETARG_TEXT_P(1);
|
||||||
|
char *lexeme = VARDATA(tlexeme);
|
||||||
|
int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
|
||||||
|
skip_index;
|
||||||
|
|
||||||
|
if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
|
||||||
|
PG_RETURN_POINTER(tsin);
|
||||||
|
|
||||||
|
tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(tsin, 0);
|
||||||
|
PG_FREE_IF_COPY(tlexeme, 1);
|
||||||
|
PG_RETURN_POINTER(tsout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Delete given array of lexemes from tsvector.
|
||||||
|
* Implementation of user-level delete(tsvector, text[]).
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_delete_arr(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
||||||
|
tsout;
|
||||||
|
ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
|
||||||
|
int i, nlex,
|
||||||
|
skip_count,
|
||||||
|
*skip_indices;
|
||||||
|
Datum *dlexemes;
|
||||||
|
bool *nulls;
|
||||||
|
|
||||||
|
deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
|
||||||
|
&dlexemes, &nulls, &nlex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In typical use case array of lexemes to delete is relatively small.
|
||||||
|
* So here we optimizing things for that scenario: iterate through lexarr
|
||||||
|
* performing binary search of each lexeme from lexarr in tsvector.
|
||||||
|
*/
|
||||||
|
skip_indices = palloc0(nlex * sizeof(int));
|
||||||
|
for (i = skip_count = 0; i < nlex; i++)
|
||||||
|
{
|
||||||
|
char *lex;
|
||||||
|
int lex_len,
|
||||||
|
lex_pos;
|
||||||
|
|
||||||
|
if (nulls[i])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("lexeme array may not contain nulls")));
|
||||||
|
|
||||||
|
lex = VARDATA(dlexemes[i]);
|
||||||
|
lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
|
lex_pos = tsvector_bsearch(tsin, lex, lex_len);
|
||||||
|
|
||||||
|
if (lex_pos >= 0)
|
||||||
|
skip_indices[skip_count++] = lex_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
|
||||||
|
|
||||||
|
pfree(skip_indices);
|
||||||
|
PG_FREE_IF_COPY(tsin, 0);
|
||||||
|
PG_FREE_IF_COPY(lexemes, 1);
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(tsout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Expand tsvector as table with following columns:
|
||||||
|
* lexeme: lexeme text
|
||||||
|
* positions: integer array of lexeme positions
|
||||||
|
* weights: char array of weights corresponding to positions
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_unnest(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
FuncCallContext *funcctx;
|
||||||
|
TSVector tsin;
|
||||||
|
|
||||||
|
if (SRF_IS_FIRSTCALL())
|
||||||
|
{
|
||||||
|
MemoryContext oldcontext;
|
||||||
|
TupleDesc tupdesc;
|
||||||
|
|
||||||
|
funcctx = SRF_FIRSTCALL_INIT();
|
||||||
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||||
|
|
||||||
|
tupdesc = CreateTemplateTupleDesc(3, false);
|
||||||
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
|
||||||
|
TEXTOID, -1, 0);
|
||||||
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
|
||||||
|
INT2ARRAYOID, -1, 0);
|
||||||
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
|
||||||
|
TEXTARRAYOID, -1, 0);
|
||||||
|
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
|
||||||
|
|
||||||
|
funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldcontext);
|
||||||
|
}
|
||||||
|
|
||||||
|
funcctx = SRF_PERCALL_SETUP();
|
||||||
|
tsin = (TSVector) funcctx->user_fctx;
|
||||||
|
|
||||||
|
if (funcctx->call_cntr < tsin->size)
|
||||||
|
{
|
||||||
|
WordEntry *arrin = ARRPTR(tsin);
|
||||||
|
char *data = STRPTR(tsin);
|
||||||
|
HeapTuple tuple;
|
||||||
|
int j,
|
||||||
|
i = funcctx->call_cntr;
|
||||||
|
bool nulls[] = {false, false, false};
|
||||||
|
Datum values[3];
|
||||||
|
|
||||||
|
values[0] = PointerGetDatum(
|
||||||
|
cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (arrin[i].haspos)
|
||||||
|
{
|
||||||
|
WordEntryPosVector *posv;
|
||||||
|
Datum *positions;
|
||||||
|
Datum *weights;
|
||||||
|
char weight;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internally tsvector stores position and weight in the same
|
||||||
|
* uint16 (2 bits for weight, 14 for position). Here we extract that
|
||||||
|
* in two separate arrays.
|
||||||
|
*/
|
||||||
|
posv = _POSVECPTR(tsin, arrin + i);
|
||||||
|
positions = palloc(posv->npos * sizeof(Datum));
|
||||||
|
weights = palloc(posv->npos * sizeof(Datum));
|
||||||
|
for (j = 0; j < posv->npos; j++)
|
||||||
|
{
|
||||||
|
positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
|
||||||
|
weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
|
||||||
|
weights[j] = PointerGetDatum(
|
||||||
|
cstring_to_text_with_len(&weight, 1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
values[1] = PointerGetDatum(
|
||||||
|
construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
|
||||||
|
values[2] = PointerGetDatum(
|
||||||
|
construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nulls[1] = nulls[2] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
|
||||||
|
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pfree(tsin);
|
||||||
|
SRF_RETURN_DONE(funcctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert tsvector to array of lexemes.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_to_array(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TSVector tsin = PG_GETARG_TSVECTOR(0);
|
||||||
|
WordEntry *arrin = ARRPTR(tsin);
|
||||||
|
Datum elements[tsin->size];
|
||||||
|
int i;
|
||||||
|
ArrayType *array;
|
||||||
|
|
||||||
|
for (i = 0; i < tsin->size; i++)
|
||||||
|
{
|
||||||
|
elements[i] = PointerGetDatum(
|
||||||
|
cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
|
||||||
|
PG_FREE_IF_COPY(tsin, 0);
|
||||||
|
PG_RETURN_POINTER(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build tsvector from array of lexemes.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
array_to_tsvector(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
|
||||||
|
TSVector tsout;
|
||||||
|
Datum *dlexemes;
|
||||||
|
WordEntry *arrout;
|
||||||
|
bool *nulls;
|
||||||
|
int nitems,
|
||||||
|
i,
|
||||||
|
tslen,
|
||||||
|
datalen = 0;
|
||||||
|
char *cur;
|
||||||
|
|
||||||
|
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
|
||||||
|
|
||||||
|
for (i = 0; i < nitems; i++)
|
||||||
|
{
|
||||||
|
if (nulls[i])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("lexeme array may not contain nulls")));
|
||||||
|
|
||||||
|
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
tslen = CALCDATASIZE(nitems, datalen);
|
||||||
|
tsout = (TSVector) palloc0(tslen);
|
||||||
|
SET_VARSIZE(tsout, tslen);
|
||||||
|
tsout->size = nitems;
|
||||||
|
arrout = ARRPTR(tsout);
|
||||||
|
cur = STRPTR(tsout);
|
||||||
|
|
||||||
|
for (i = 0; i < nitems; i++)
|
||||||
|
{
|
||||||
|
char *lex = VARDATA(dlexemes[i]);
|
||||||
|
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
|
|
||||||
|
memcpy(cur, lex, lex_len);
|
||||||
|
arrout[i].haspos = 0;
|
||||||
|
arrout[i].len = lex_len;
|
||||||
|
arrout[i].pos = cur - STRPTR(tsout);
|
||||||
|
cur += lex_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(v, 0);
|
||||||
|
PG_RETURN_POINTER(tsout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Leave only elements with given weights from tsvector.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
tsvector_filter(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
||||||
|
tsout;
|
||||||
|
ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
|
||||||
|
WordEntry *arrin = ARRPTR(tsin),
|
||||||
|
*arrout;
|
||||||
|
char *datain = STRPTR(tsin),
|
||||||
|
*dataout;
|
||||||
|
Datum *dweights;
|
||||||
|
bool *nulls;
|
||||||
|
int nweigths;
|
||||||
|
int i, j;
|
||||||
|
char mask = 0,
|
||||||
|
cur_pos = 0;
|
||||||
|
|
||||||
|
deconstruct_array(weights, CHAROID, 1, true, 'c',
|
||||||
|
&dweights, &nulls, &nweigths);
|
||||||
|
|
||||||
|
for (i = 0; i < nweigths; i++)
|
||||||
|
{
|
||||||
|
char char_weight;
|
||||||
|
|
||||||
|
if (nulls[i])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("weight array may not contain nulls")));
|
||||||
|
|
||||||
|
char_weight = DatumGetChar(dweights[i]);
|
||||||
|
switch (char_weight)
|
||||||
|
{
|
||||||
|
case 'A': case 'a':
|
||||||
|
mask = mask | 8;
|
||||||
|
break;
|
||||||
|
case 'B': case 'b':
|
||||||
|
mask = mask | 4;
|
||||||
|
break;
|
||||||
|
case 'C': case 'c':
|
||||||
|
mask = mask | 2;
|
||||||
|
break;
|
||||||
|
case 'D': case 'd':
|
||||||
|
mask = mask | 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* internal error */
|
||||||
|
elog(ERROR, "unrecognized weight: %c", char_weight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tsout = (TSVector) palloc0(VARSIZE(tsin));
|
||||||
|
tsout->size = tsin->size;
|
||||||
|
arrout = ARRPTR(tsout);
|
||||||
|
dataout = STRPTR(tsout);
|
||||||
|
|
||||||
|
for (i = j = 0; i < tsin->size; i++)
|
||||||
|
{
|
||||||
|
WordEntryPosVector *posvin,
|
||||||
|
*posvout;
|
||||||
|
int npos = 0;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
if (!arrin[i].haspos)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
posvin = _POSVECPTR(tsin, arrin + i);
|
||||||
|
posvout = (WordEntryPosVector *)
|
||||||
|
(dataout + SHORTALIGN(cur_pos + arrin[i].len));
|
||||||
|
|
||||||
|
for (k = 0; k < posvin->npos; k++)
|
||||||
|
{
|
||||||
|
if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
|
||||||
|
posvout->pos[npos++] = posvin->pos[k];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!npos) /* no satisfactory positions found, so skip that lexeme */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
arrout[j].haspos = true;
|
||||||
|
arrout[j].len = arrin[i].len;
|
||||||
|
arrout[j].pos = cur_pos;
|
||||||
|
|
||||||
|
memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
|
||||||
|
posvout->npos = npos;
|
||||||
|
cur_pos += SHORTALIGN(arrin[i].len);
|
||||||
|
cur_pos += POSDATALEN(tsout, arrout+j) * sizeof(WordEntryPos) +
|
||||||
|
sizeof(uint16);
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
tsout->size = j;
|
||||||
|
if (dataout != STRPTR(tsout))
|
||||||
|
memmove(STRPTR(tsout), dataout, cur_pos);
|
||||||
|
|
||||||
|
SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(tsin, 0);
|
||||||
|
PG_RETURN_POINTER(tsout);
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
tsvector_concat(PG_FUNCTION_ARGS)
|
tsvector_concat(PG_FUNCTION_ARGS)
|
||||||
|
@ -4498,8 +4498,22 @@ DESCR("number of lexemes");
|
|||||||
DATA(insert OID = 3623 ( strip PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_strip _null_ _null_ _null_ ));
|
DATA(insert OID = 3623 ( strip PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_strip _null_ _null_ _null_ ));
|
||||||
DESCR("strip position information");
|
DESCR("strip position information");
|
||||||
DATA(insert OID = 3624 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 18" _null_ _null_ _null_ _null_ _null_ tsvector_setweight _null_ _null_ _null_ ));
|
DATA(insert OID = 3624 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 18" _null_ _null_ _null_ _null_ _null_ tsvector_setweight _null_ _null_ _null_ ));
|
||||||
DESCR("set weight of lexeme's entries");
|
DESCR("set given weight for whole tsvector");
|
||||||
|
DATA(insert OID = 3320 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3614 "3614 18 1009" _null_ _null_ _null_ _null_ _null_ tsvector_setweight_by_filter _null_ _null_ _null_ ));
|
||||||
|
DESCR("set given weight for given lexemes");
|
||||||
DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ ));
|
DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ ));
|
||||||
|
DATA(insert OID = 3321 ( delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 25" _null_ _null_ _null_ _null_ _null_ tsvector_delete_str _null_ _null_ _null_ ));
|
||||||
|
DESCR("delete lexeme");
|
||||||
|
DATA(insert OID = 3323 ( delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 1009" _null_ _null_ _null_ _null_ _null_ tsvector_delete_arr _null_ _null_ _null_ ));
|
||||||
|
DESCR("delete given lexemes");
|
||||||
|
DATA(insert OID = 3322 ( unnest PGNSP PGUID 12 1 10 0 0 f f f f t t i s 1 0 2249 "3614" "{3614,25,1005,1009}" "{i,o,o,o}" "{tsvector,lexeme,positions,weights}" _null_ _null_ tsvector_unnest _null_ _null_ _null_ ));
|
||||||
|
DESCR("expand tsvector to set of rows");
|
||||||
|
DATA(insert OID = 3326 ( tsvector_to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 1009 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_to_array _null_ _null_ _null_ ));
|
||||||
|
DESCR("convert to lexeme's array");
|
||||||
|
DATA(insert OID = 3327 ( array_to_tsvector PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "1009" _null_ _null_ _null_ _null_ _null_ array_to_tsvector _null_ _null_ _null_ ));
|
||||||
|
DESCR("build tsvector from lexeme's array");
|
||||||
|
DATA(insert OID = 3319 ( filter PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 1002" _null_ _null_ _null_ _null_ _null_ tsvector_filter _null_ _null_ _null_ ));
|
||||||
|
DESCR("returns tsvector that contain only postings with given weights");
|
||||||
|
|
||||||
DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ ));
|
DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ ));
|
||||||
DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ ));
|
DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ ));
|
||||||
|
@ -141,7 +141,14 @@ extern Datum tsvector_cmp(PG_FUNCTION_ARGS);
|
|||||||
extern Datum tsvector_length(PG_FUNCTION_ARGS);
|
extern Datum tsvector_length(PG_FUNCTION_ARGS);
|
||||||
extern Datum tsvector_strip(PG_FUNCTION_ARGS);
|
extern Datum tsvector_strip(PG_FUNCTION_ARGS);
|
||||||
extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
|
extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS);
|
||||||
extern Datum tsvector_concat(PG_FUNCTION_ARGS);
|
extern Datum tsvector_concat(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_delete_str(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_delete_arr(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_unnest(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_to_array(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum array_to_tsvector(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum tsvector_filter(PG_FUNCTION_ARGS);
|
||||||
extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
|
extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
|
||||||
extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
|
extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
@ -83,18 +83,6 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
|||||||
'a':3A,4B 'b':2A 'ba':1237
|
'a':3A,4B 'b':2A 'ba':1237
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
|
||||||
setweight
|
|
||||||
----------------------------------------------------------
|
|
||||||
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
|
||||||
strip
|
|
||||||
---------------
|
|
||||||
'a' 'asd' 'w'
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
--Base tsquery test
|
--Base tsquery test
|
||||||
SELECT '1'::tsquery;
|
SELECT '1'::tsquery;
|
||||||
tsquery
|
tsquery
|
||||||
@ -625,3 +613,212 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
|
|||||||
0.1
|
0.1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- tsvector editing operations
|
||||||
|
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
||||||
|
strip
|
||||||
|
---------------
|
||||||
|
'a' 'asd' 'w'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
strip
|
||||||
|
----------------------------------------------
|
||||||
|
'base' 'hidden' 'rebel' 'spaceship' 'strike'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT strip('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
strip
|
||||||
|
----------------------------------------------
|
||||||
|
'base' 'hidden' 'rebel' 'spaceship' 'strike'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship');
|
||||||
|
delete
|
||||||
|
------------------------------------------
|
||||||
|
'base':7 'hidden':6 'rebel':1 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
|
||||||
|
delete
|
||||||
|
--------------------------------------------------------------
|
||||||
|
'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bas');
|
||||||
|
delete
|
||||||
|
-----------------------------------------------------------------------
|
||||||
|
'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bases');
|
||||||
|
delete
|
||||||
|
-----------------------------------------------------------------------
|
||||||
|
'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
|
||||||
|
delete
|
||||||
|
------------------------------------------
|
||||||
|
'base':7 'hidden':6 'rebel':1 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship');
|
||||||
|
delete
|
||||||
|
----------------------------------
|
||||||
|
'base' 'hidden' 'rebel' 'strike'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);
|
||||||
|
delete
|
||||||
|
--------------------------------
|
||||||
|
'base':7 'hidden':6 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceships','rebel']);
|
||||||
|
delete
|
||||||
|
-------------------------------------------------------------
|
||||||
|
'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
|
||||||
|
delete
|
||||||
|
-------------------------------------------------------------
|
||||||
|
'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||||
|
delete
|
||||||
|
--------------------------------
|
||||||
|
'base':7 'hidden':6 'strike':3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||||
|
delete
|
||||||
|
--------------------------
|
||||||
|
'base' 'hidden' 'strike'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
|
||||||
|
ERROR: lexeme array may not contain nulls
|
||||||
|
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
unnest
|
||||||
|
---------------------------------------------
|
||||||
|
(base,{7},{D})
|
||||||
|
(hidden,{6},{D})
|
||||||
|
(rebel,{1},{D})
|
||||||
|
(spaceship,"{2,33,34,35,36}","{D,A,B,C,D}")
|
||||||
|
(strike,{3},{D})
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
unnest
|
||||||
|
---------------
|
||||||
|
(base,,)
|
||||||
|
(hidden,,)
|
||||||
|
(rebel,,)
|
||||||
|
(spaceship,,)
|
||||||
|
(strike,,)
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
lexeme | positions | weights
|
||||||
|
-----------+-----------------+-------------
|
||||||
|
base | {7} | {D}
|
||||||
|
hidden | {6} | {D}
|
||||||
|
rebel | {1} | {D}
|
||||||
|
spaceship | {2,33,34,35,36} | {D,A,B,C,D}
|
||||||
|
strike | {3} | {D}
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
lexeme | positions | weights
|
||||||
|
-----------+-----------+---------
|
||||||
|
base | |
|
||||||
|
hidden | |
|
||||||
|
rebel | |
|
||||||
|
spaceship | |
|
||||||
|
strike | |
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT lexeme, positions[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
lexeme | positions
|
||||||
|
-----------+-----------
|
||||||
|
base | 7
|
||||||
|
hidden | 6
|
||||||
|
rebel | 1
|
||||||
|
spaceship | 2
|
||||||
|
strike | 3
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
tsvector_to_array
|
||||||
|
--------------------------------------
|
||||||
|
{base,hidden,rebel,spaceship,strike}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
tsvector_to_array
|
||||||
|
--------------------------------------
|
||||||
|
{base,hidden,rebel,spaceship,strike}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
||||||
|
array_to_tsvector
|
||||||
|
----------------------------------------------
|
||||||
|
'base' 'hidden' 'rebel' 'spaceship' 'strike'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||||
|
ERROR: lexeme array may not contain nulls
|
||||||
|
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
||||||
|
setweight
|
||||||
|
----------------------------------------------------------
|
||||||
|
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
|
||||||
|
setweight
|
||||||
|
----------------------------------------------------------
|
||||||
|
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||||
|
setweight
|
||||||
|
------------------------------------------------------
|
||||||
|
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||||
|
setweight
|
||||||
|
------------------------------------------------------
|
||||||
|
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
|
||||||
|
setweight
|
||||||
|
--------------------------------------------------------
|
||||||
|
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
|
||||||
|
setweight
|
||||||
|
---------------------------------
|
||||||
|
'a' 'asd' 'w':5,6,12B,13A 'zxc'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
|
||||||
|
ERROR: lexeme array may not contain nulls
|
||||||
|
SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
|
||||||
|
filter
|
||||||
|
-------------------------------------------------------------
|
||||||
|
'base':7A 'hidden':6A 'rebel':1A 'spaceship':2A 'strike':3A
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}');
|
||||||
|
filter
|
||||||
|
--------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');
|
||||||
|
ERROR: weight array may not contain nulls
|
||||||
|
@ -14,8 +14,6 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
|
|||||||
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
|
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
|
||||||
SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
||||||
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
||||||
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
|
||||||
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
|
||||||
|
|
||||||
--Base tsquery test
|
--Base tsquery test
|
||||||
SELECT '1'::tsquery;
|
SELECT '1'::tsquery;
|
||||||
@ -115,3 +113,48 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
|
|||||||
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
|
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
|
||||||
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
|
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
|
||||||
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
|
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
|
||||||
|
|
||||||
|
-- tsvector editing operations
|
||||||
|
|
||||||
|
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
||||||
|
SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
SELECT strip('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
|
||||||
|
SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship');
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bas');
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bases');
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship');
|
||||||
|
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceships','rebel']);
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
|
||||||
|
SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
|
||||||
|
SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
|
||||||
|
|
||||||
|
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
SELECT lexeme, positions[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
|
||||||
|
SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
|
||||||
|
SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
|
||||||
|
|
||||||
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
||||||
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||||
|
|
||||||
|
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
|
||||||
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
|
||||||
|
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
|
||||||
|
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
|
||||||
|
|
||||||
|
SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
|
||||||
|
SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}');
|
||||||
|
SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user