Extend GIN to support partial-match searches, and extend tsquery to support
prefix matching using this facility. Teodor Sigaev and Oleg Bartunov
This commit is contained in:
parent
e1bdd07c3c
commit
e6dbcb72fa
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.226 2008/03/30 04:08:14 neilc Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.227 2008/05/16 16:31:01 tgl Exp $ -->
|
||||||
|
|
||||||
<chapter id="datatype">
|
<chapter id="datatype">
|
||||||
<title id="datatype-title">Data Types</title>
|
<title id="datatype-title">Data Types</title>
|
||||||
@ -3298,18 +3298,17 @@ SELECT * FROM test;
|
|||||||
SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
|
SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
'a' 'on' 'and' 'ate' 'cat' 'fat' 'mat' 'rat' 'sat'
|
'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
(As the example shows, the sorting is first by length and then
|
To represent
|
||||||
alphabetically, but that detail is seldom important.) To represent
|
|
||||||
lexemes containing whitespace or punctuation, surround them with quotes:
|
lexemes containing whitespace or punctuation, surround them with quotes:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
SELECT $$the lexeme ' ' contains spaces$$::tsvector;
|
SELECT $$the lexeme ' ' contains spaces$$::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
'the' ' ' 'lexeme' 'spaces' 'contains'
|
' ' 'contains' 'lexeme' 'spaces' 'the'
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
(We use dollar-quoted string literals in this example and the next one,
|
(We use dollar-quoted string literals in this example and the next one,
|
||||||
@ -3320,7 +3319,7 @@ SELECT $$the lexeme ' ' contains spaces$$::tsvector;
|
|||||||
SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
|
SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
'a' 'the' 'Joe''s' 'quote' 'lexeme' 'contains'
|
'Joe''s' 'a' 'contains' 'lexeme' 'quote' 'the'
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
Optionally, integer <firstterm>position(s)</>
|
Optionally, integer <firstterm>position(s)</>
|
||||||
@ -3330,7 +3329,7 @@ SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
|
|||||||
SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
|
SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
|
'a':1,6,10 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'on':5 'rat':12 'sat':4
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
A position normally indicates the source word's location in the
|
A position normally indicates the source word's location in the
|
||||||
@ -3369,7 +3368,7 @@ SELECT 'a:1A fat:2B,4C cat:5D'::tsvector;
|
|||||||
select 'The Fat Rats'::tsvector;
|
select 'The Fat Rats'::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
--------------------
|
--------------------
|
||||||
'Fat' 'The' 'Rats'
|
'Fat' 'Rats' 'The'
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
For most English-text-searching applications the above words would
|
For most English-text-searching applications the above words would
|
||||||
@ -3439,6 +3438,19 @@ SELECT 'fat:ab & cat'::tsquery;
|
|||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Also, lexemes in a <type>tsquery</type> can be labeled with <literal>*</>
|
||||||
|
to specify prefix matching:
|
||||||
|
<programlisting>
|
||||||
|
SELECT 'super:*'::tsquery;
|
||||||
|
tsquery
|
||||||
|
-----------
|
||||||
|
'super':*
|
||||||
|
</programlisting>
|
||||||
|
This query will match any word in a <type>tsvector</> that begins
|
||||||
|
with <quote>super</>.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Quoting rules for lexemes are the same as described above for
|
Quoting rules for lexemes are the same as described above for
|
||||||
lexemes in <type>tsvector</>; and, as with <type>tsvector</>,
|
lexemes in <type>tsvector</>; and, as with <type>tsvector</>,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.14 2008/04/14 17:05:32 tgl Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.15 2008/05/16 16:31:01 tgl Exp $ -->
|
||||||
|
|
||||||
<chapter id="GIN">
|
<chapter id="GIN">
|
||||||
<title>GIN Indexes</title>
|
<title>GIN Indexes</title>
|
||||||
@ -52,15 +52,15 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
All it takes to get a <acronym>GIN</acronym> access method working
|
All it takes to get a <acronym>GIN</acronym> access method working is to
|
||||||
is to implement four user-defined methods, which define the behavior of
|
implement four (or five) user-defined methods, which define the behavior of
|
||||||
keys in the tree and the relationships between keys, indexed values,
|
keys in the tree and the relationships between keys, indexed values,
|
||||||
and indexable queries. In short, <acronym>GIN</acronym> combines
|
and indexable queries. In short, <acronym>GIN</acronym> combines
|
||||||
extensibility with generality, code reuse, and a clean interface.
|
extensibility with generality, code reuse, and a clean interface.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The four methods that an index operator class for
|
The four methods that an operator class for
|
||||||
<acronym>GIN</acronym> must provide are:
|
<acronym>GIN</acronym> must provide are:
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@ -88,7 +88,7 @@
|
|||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>Datum *extractQuery(Datum query, int32 *nkeys,
|
<term>Datum *extractQuery(Datum query, int32 *nkeys,
|
||||||
StrategyNumber n)</term>
|
StrategyNumber n, bool **pmatch)</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Returns an array of keys given a value to be queried; that is,
|
Returns an array of keys given a value to be queried; that is,
|
||||||
@ -100,13 +100,22 @@
|
|||||||
to consult <literal>n</> to determine the data type of
|
to consult <literal>n</> to determine the data type of
|
||||||
<literal>query</> and the key values that need to be extracted.
|
<literal>query</> and the key values that need to be extracted.
|
||||||
The number of returned keys must be stored into <literal>*nkeys</>.
|
The number of returned keys must be stored into <literal>*nkeys</>.
|
||||||
If number of keys is equal to zero then <function>extractQuery</>
|
If the query contains no keys then <function>extractQuery</>
|
||||||
should store 0 or -1 into <literal>*nkeys</>. 0 means that any
|
should store 0 or -1 into <literal>*nkeys</>, depending on the
|
||||||
row matches the <literal>query</> and sequence scan should be
|
semantics of the operator. 0 means that every
|
||||||
produced. -1 means nothing can satisfy <literal>query</>.
|
value matches the <literal>query</> and a sequential scan should be
|
||||||
Choice of value should be based on semantics meaning of operation with
|
produced. -1 means nothing can match the <literal>query</>.
|
||||||
given strategy number.
|
<literal>pmatch</> is an output argument for use when partial match
|
||||||
|
is supported. To use it, <function>extractQuery</> must allocate
|
||||||
|
an array of <literal>*nkeys</> booleans and store its address at
|
||||||
|
<literal>*pmatch</>. Each element of the array should be set to TRUE
|
||||||
|
if the corresponding key requires partial match, FALSE if not.
|
||||||
|
If <literal>*pmatch</> is set to NULL then GIN assumes partial match
|
||||||
|
is not required. The variable is initialized to NULL before call,
|
||||||
|
so this argument can simply be ignored by operator classes that do
|
||||||
|
not support partial match.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@ -133,6 +142,39 @@
|
|||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Optionally, an operator class for
|
||||||
|
<acronym>GIN</acronym> can supply a fifth method:
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>int comparePartial(Datum partial_key, Datum key, StrategyNumber n)</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Compare a partial-match query to an index key. Returns an integer
|
||||||
|
whose sign indicates the result: less than zero means the index key
|
||||||
|
does not match the query, but the index scan should continue; zero
|
||||||
|
means that the index key does match the query; greater than zero
|
||||||
|
indicates that the index scan should stop because no more matches
|
||||||
|
are possible. The strategy number <literal>n</> of the operator
|
||||||
|
that generated the partial match query is provided, in case its
|
||||||
|
semantics are needed to determine when to end the scan.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
To support <quote>partial match</> queries, an operator class must
|
||||||
|
provide the <function>comparePartial</> method, and its
|
||||||
|
<function>extractQuery</> method must set the <literal>pmatch</>
|
||||||
|
parameter when a partial-match query is encountered. See
|
||||||
|
<xref linkend="gin-partial-match"> for details.
|
||||||
|
</para>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="gin-implementation">
|
<sect1 id="gin-implementation">
|
||||||
@ -146,6 +188,33 @@
|
|||||||
list of heap pointers (PL, posting list) if the list is small enough.
|
list of heap pointers (PL, posting list) if the list is small enough.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect2 id="gin-partial-match">
|
||||||
|
<title>Partial match algorithm</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
GIN can support <quote>partial match</> queries, in which the query
|
||||||
|
does not determine an exact match for one or more keys, but the possible
|
||||||
|
matches fall within a reasonably narrow range of key values (within the
|
||||||
|
key sorting order determined by the <function>compare</> support method).
|
||||||
|
The <function>extractQuery</> method, instead of returning a key value
|
||||||
|
to be matched exactly, returns a key value that is the lower bound of
|
||||||
|
the range to be searched, and sets the <literal>pmatch</> flag true.
|
||||||
|
The key range is then searched using the <function>comparePartial</>
|
||||||
|
method. <function>comparePartial</> must return zero for an actual
|
||||||
|
match, less than zero for a non-match that is still within the range
|
||||||
|
to be searched, or greater than zero if the index key is past the range
|
||||||
|
that could match.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
During a partial-match scan, all <literal>itemPointer</>s for matching keys
|
||||||
|
are OR'ed into a <literal>TIDBitmap</>.
|
||||||
|
The scan fails if the <literal>TIDBitmap</> becomes lossy.
|
||||||
|
In this case an error message will be reported with advice
|
||||||
|
to increase <literal>work_mem</>.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="gin-tips">
|
<sect1 id="gin-tips">
|
||||||
@ -236,8 +305,14 @@
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<acronym>GIN</acronym> searches keys only by equality matching. This might
|
It is possible for an operator class to circumvent the restriction against
|
||||||
be improved in future.
|
full index scan. To do that, <function>extractValue</> must return at least
|
||||||
|
one (possibly dummy) key for every indexed value, and
|
||||||
|
<function>extractQuery</function> must convert an unrestricted search into
|
||||||
|
a partial-match query that will scan the whole index. This is inefficient
|
||||||
|
but might be necessary to avoid corner-case failures with operators such
|
||||||
|
as LIKE. Note however that failure could still occur if the intermediate
|
||||||
|
<literal>TIDBitmap</> becomes lossy.
|
||||||
</para>
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
@ -247,9 +322,11 @@
|
|||||||
<para>
|
<para>
|
||||||
The <productname>PostgreSQL</productname> source distribution includes
|
The <productname>PostgreSQL</productname> source distribution includes
|
||||||
<acronym>GIN</acronym> operator classes for <type>tsvector</> and
|
<acronym>GIN</acronym> operator classes for <type>tsvector</> and
|
||||||
for one-dimensional arrays of all internal types. The following
|
for one-dimensional arrays of all internal types. Prefix searching in
|
||||||
<filename>contrib</> modules also contain <acronym>GIN</acronym>
|
<type>tsvector</> is implemented using the <acronym>GIN</> partial match
|
||||||
operator classes:
|
feature.
|
||||||
|
The following <filename>contrib</> modules also contain
|
||||||
|
<acronym>GIN</acronym> operator classes:
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.43 2008/04/14 17:05:32 tgl Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.44 2008/05/16 16:31:01 tgl Exp $ -->
|
||||||
|
|
||||||
<chapter id="textsearch">
|
<chapter id="textsearch">
|
||||||
<title id="textsearch-title">Full Text Search</title>
|
<title id="textsearch-title">Full Text Search</title>
|
||||||
@ -754,6 +754,20 @@ SELECT to_tsquery('english', 'Fat | Rats:AB');
|
|||||||
'fat' | 'rat':AB
|
'fat' | 'rat':AB
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
|
Also, <literal>*</> can be attached to a lexeme to specify prefix matching:
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
SELECT to_tsquery('supern:*A & star:A*B');
|
||||||
|
to_tsquery
|
||||||
|
--------------------------
|
||||||
|
'supern':*A & 'star':*AB
|
||||||
|
</programlisting>
|
||||||
|
|
||||||
|
Such a lexeme will match any word in a <type>tsvector</> that begins
|
||||||
|
with the given string.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
<function>to_tsquery</function> can also accept single-quoted
|
<function>to_tsquery</function> can also accept single-quoted
|
||||||
phrases. This is primarily useful when the configuration includes a
|
phrases. This is primarily useful when the configuration includes a
|
||||||
thesaurus dictionary that may trigger on such phrases.
|
thesaurus dictionary that may trigger on such phrases.
|
||||||
@ -798,7 +812,8 @@ SELECT to_tsquery('''supernovae stars'' & !crab');
|
|||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
Note that <function>plainto_tsquery</> cannot
|
Note that <function>plainto_tsquery</> cannot
|
||||||
recognize either Boolean operators or weight labels in its input:
|
recognize Boolean operators, weight labels, or prefix-match labels
|
||||||
|
in its input:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
SELECT plainto_tsquery('english', 'The Fat & Rats:C');
|
SELECT plainto_tsquery('english', 'The Fat & Rats:C');
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/xindex.sgml,v 1.62 2008/04/14 17:05:32 tgl Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/xindex.sgml,v 1.63 2008/05/16 16:31:01 tgl Exp $ -->
|
||||||
|
|
||||||
<sect1 id="xindex">
|
<sect1 id="xindex">
|
||||||
<title>Interfacing Extensions To Indexes</title>
|
<title>Interfacing Extensions To Indexes</title>
|
||||||
@ -444,6 +444,13 @@
|
|||||||
<entry>consistent - determine whether value matches query condition</entry>
|
<entry>consistent - determine whether value matches query condition</entry>
|
||||||
<entry>4</entry>
|
<entry>4</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry>comparePartial - (optional method) compare partial key from
|
||||||
|
query and key from index, and return an integer less than zero, zero,
|
||||||
|
or greater than zero, indicating whether GIN should ignore this index
|
||||||
|
entry, treat the entry as a match, or stop the index scan</entry>
|
||||||
|
<entry>5</entry>
|
||||||
|
</row>
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
</table>
|
</table>
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -18,8 +18,13 @@
|
|||||||
#include "catalog/index.h"
|
#include "catalog/index.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
|
#include "utils/datum.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tries to refind previously taken ItemPointer on page.
|
||||||
|
*/
|
||||||
static bool
|
static bool
|
||||||
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
||||||
{
|
{
|
||||||
@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Start* functions setup state of searches: find correct buffer and locks it,
|
* Goes to the next page if current offset is outside of bounds
|
||||||
* Stop* functions unlock buffer (but don't release!)
|
*/
|
||||||
|
static bool
|
||||||
|
moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack )
|
||||||
|
{
|
||||||
|
Page page = BufferGetPage(stack->buffer);
|
||||||
|
|
||||||
|
if ( stack->off > PageGetMaxOffsetNumber(page) )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We scanned the whole page, so we should take right page
|
||||||
|
*/
|
||||||
|
stack->blkno = GinPageGetOpaque(page)->rightlink;
|
||||||
|
|
||||||
|
if ( GinPageRightMost(page) )
|
||||||
|
return false; /* no more pages */
|
||||||
|
|
||||||
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||||
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
||||||
|
LockBuffer(stack->buffer, GIN_SHARE);
|
||||||
|
stack->off = FirstOffsetNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does fullscan of posting tree and saves ItemPointers
|
||||||
|
* in scanEntry->partialMatch TIDBitmap
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree )
|
||||||
|
{
|
||||||
|
GinPostingTreeScan *gdi;
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
BlockNumber blkno;
|
||||||
|
|
||||||
|
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||||
|
|
||||||
|
buffer = scanBeginPostingTree(gdi);
|
||||||
|
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
|
||||||
|
|
||||||
|
freeGinBtreeStack(gdi->stack);
|
||||||
|
pfree(gdi);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Goes through all leaves
|
||||||
|
*/
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber )
|
||||||
|
{
|
||||||
|
tbm_add_tuples( scanEntry->partialMatch,
|
||||||
|
(ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber),
|
||||||
|
GinPageGetOpaque(page)->maxoff, false);
|
||||||
|
scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
blkno = GinPageGetOpaque(page)->rightlink;
|
||||||
|
if ( GinPageRightMost(page) )
|
||||||
|
{
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
return; /* no more pages */
|
||||||
|
}
|
||||||
|
|
||||||
|
LockBuffer(buffer, GIN_UNLOCK);
|
||||||
|
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
|
||||||
|
LockBuffer(buffer, GIN_SHARE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Collects all ItemPointer into the TIDBitmap struct
|
||||||
|
* for entries partially matched to search entry.
|
||||||
|
*
|
||||||
|
* Returns true if done, false if it's needed to restart scan from scratch
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry )
|
||||||
|
{
|
||||||
|
Page page;
|
||||||
|
IndexTuple itup;
|
||||||
|
Datum idatum;
|
||||||
|
bool isnull;
|
||||||
|
int32 cmp;
|
||||||
|
|
||||||
|
scanEntry->partialMatch = tbm_create( work_mem * 1024L );
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* stack->off points to the interested entry, buffer is already locked
|
||||||
|
*/
|
||||||
|
if ( moveRightIfItNeeded(btree, stack) == false )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
page = BufferGetPage(stack->buffer);
|
||||||
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
||||||
|
idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull);
|
||||||
|
Assert(!isnull);
|
||||||
|
|
||||||
|
/*----------
|
||||||
|
* Check of partial match.
|
||||||
|
* case cmp == 0 => match
|
||||||
|
* case cmp > 0 => not match and finish scan
|
||||||
|
* case cmp < 0 => not match and continue scan
|
||||||
|
*----------
|
||||||
|
*/
|
||||||
|
cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn,
|
||||||
|
scanEntry->entry,
|
||||||
|
idatum,
|
||||||
|
UInt16GetDatum(scanEntry->strategy)));
|
||||||
|
|
||||||
|
if ( cmp > 0 )
|
||||||
|
return true;
|
||||||
|
else if ( cmp < 0 )
|
||||||
|
{
|
||||||
|
stack->off++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( GinIsPostingTree(itup) )
|
||||||
|
{
|
||||||
|
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||||
|
Datum newDatum,
|
||||||
|
savedDatum = datumCopy (
|
||||||
|
idatum,
|
||||||
|
btree->ginstate->tupdesc->attrs[0]->attbyval,
|
||||||
|
btree->ginstate->tupdesc->attrs[0]->attlen
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
* We should unlock current page (but not unpin) during
|
||||||
|
* tree scan to prevent deadlock with vacuum processes.
|
||||||
|
*
|
||||||
|
* We save current entry value (savedDatum) to be able to refind
|
||||||
|
* our tuple after re-locking
|
||||||
|
*/
|
||||||
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||||
|
scanForItems( btree->index, scanEntry, rootPostingTree );
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We lock again the entry page and while it was unlocked
|
||||||
|
* insert might occured, so we need to refind our position
|
||||||
|
*/
|
||||||
|
LockBuffer(stack->buffer, GIN_SHARE);
|
||||||
|
page = BufferGetPage(stack->buffer);
|
||||||
|
if ( !GinPageIsLeaf(page) )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Root page becomes non-leaf while we unlock it. We
|
||||||
|
* will start again, this situation doesn't cause
|
||||||
|
* often - root can became a non-leaf only one per
|
||||||
|
* life of index.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
if ( moveRightIfItNeeded(btree, stack) == false )
|
||||||
|
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
|
||||||
|
|
||||||
|
page = BufferGetPage(stack->buffer);
|
||||||
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
||||||
|
newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
|
||||||
|
|
||||||
|
if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 )
|
||||||
|
{
|
||||||
|
/* Found! */
|
||||||
|
if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false )
|
||||||
|
pfree( DatumGetPointer(savedDatum) );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
stack->off++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false);
|
||||||
|
scanEntry->predictNumberResult += GinGetNPosting(itup);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ok, we save ItemPointers, go to the next entry
|
||||||
|
*/
|
||||||
|
stack->off++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start* functions setup begining state of searches: finds correct buffer and pins it.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
||||||
@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
|||||||
entry->offset = InvalidOffsetNumber;
|
entry->offset = InvalidOffsetNumber;
|
||||||
entry->list = NULL;
|
entry->list = NULL;
|
||||||
entry->nlist = 0;
|
entry->nlist = 0;
|
||||||
|
entry->partialMatch = NULL;
|
||||||
|
entry->partialMatchResult = NULL;
|
||||||
entry->reduceResult = FALSE;
|
entry->reduceResult = FALSE;
|
||||||
entry->predictNumberResult = 0;
|
entry->predictNumberResult = 0;
|
||||||
|
|
||||||
if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
if ( entry->isPartialMatch )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* btreeEntry.findItem points to the first equal or greater value
|
||||||
|
* than needed. So we will scan further and collect all
|
||||||
|
* ItemPointers
|
||||||
|
*/
|
||||||
|
btreeEntry.findItem(&btreeEntry, stackEntry);
|
||||||
|
if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* GIN tree was seriously restructured, so we will
|
||||||
|
* cleanup all found data and rescan. See comments near
|
||||||
|
* 'return false' in computePartialMatchList()
|
||||||
|
*/
|
||||||
|
if ( entry->partialMatch )
|
||||||
|
{
|
||||||
|
tbm_free( entry->partialMatch );
|
||||||
|
entry->partialMatch = NULL;
|
||||||
|
}
|
||||||
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||||
|
freeGinBtreeStack(stackEntry);
|
||||||
|
|
||||||
|
startScanEntry(index, ginstate, entry);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) )
|
||||||
|
{
|
||||||
|
tbm_begin_iterate(entry->partialMatch);
|
||||||
|
entry->isFinished = FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
||||||
{
|
{
|
||||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||||
|
|
||||||
@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
|||||||
GinPostingTreeScan *gdi;
|
GinPostingTreeScan *gdi;
|
||||||
Page page;
|
Page page;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We should unlock entry page before make deal with
|
||||||
|
* posting tree to prevent deadlocks with vacuum processes.
|
||||||
|
* Because entry is never deleted from page and posting tree is
|
||||||
|
* never reduced to the posting list, we can unlock page after
|
||||||
|
* getting BlockNumber of root of posting tree.
|
||||||
|
*/
|
||||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||||
needUnlock = FALSE;
|
needUnlock = FALSE;
|
||||||
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||||
@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = 0; i < key->nentries; i++)
|
for (i = 0; i < key->nentries; i++)
|
||||||
|
{
|
||||||
startScanEntry(index, ginstate, key->scanEntry + i);
|
startScanEntry(index, ginstate, key->scanEntry + i);
|
||||||
|
/*
|
||||||
|
* Copy strategy number to each entry of key to
|
||||||
|
* use in comparePartialFn call
|
||||||
|
*/
|
||||||
|
key->scanEntry[i].strategy = key->strategy;
|
||||||
|
}
|
||||||
|
|
||||||
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
||||||
key->isFinished = FALSE;
|
key->isFinished = FALSE;
|
||||||
@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry)
|
|||||||
entry->isFinished = entry->master->isFinished;
|
entry->isFinished = entry->master->isFinished;
|
||||||
entry->curItem = entry->master->curItem;
|
entry->curItem = entry->master->curItem;
|
||||||
}
|
}
|
||||||
|
else if ( entry->partialMatch )
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
|
||||||
|
{
|
||||||
|
entry->partialMatchResult = tbm_iterate( entry->partialMatch );
|
||||||
|
|
||||||
|
if ( entry->partialMatchResult == NULL )
|
||||||
|
{
|
||||||
|
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
|
||||||
|
entry->isFinished = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if ( entry->partialMatchResult->ntuples < 0 )
|
||||||
|
{
|
||||||
|
/* bitmap became lossy */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||||
|
errmsg("not enough memory to store result of partial match operator" ),
|
||||||
|
errhint("Increase the \"work_mem\" parameter.")));
|
||||||
|
}
|
||||||
|
entry->offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ItemPointerSet(&entry->curItem,
|
||||||
|
entry->partialMatchResult->blockno,
|
||||||
|
entry->partialMatchResult->offsets[ entry->offset ]);
|
||||||
|
entry->offset ++;
|
||||||
|
|
||||||
|
} while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
|
||||||
|
}
|
||||||
else if (!BufferIsValid(entry->buffer))
|
else if (!BufferIsValid(entry->buffer))
|
||||||
{
|
{
|
||||||
entry->offset++;
|
entry->offset++;
|
||||||
@ -297,6 +579,54 @@ entryGetItem(Relation index, GinScanEntry entry)
|
|||||||
return entry->isFinished;
|
return entry->isFinished;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* restart from saved position. Actually it's needed only for
|
||||||
|
* partial match. function is called only by ginrestpos()
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ginrestartentry(GinScanEntry entry)
|
||||||
|
{
|
||||||
|
ItemPointerData stopItem = entry->curItem;
|
||||||
|
bool savedReduceResult;
|
||||||
|
|
||||||
|
if ( entry->master || entry->partialMatch == NULL )
|
||||||
|
return; /* entry is slave or not a partial match type*/
|
||||||
|
|
||||||
|
if ( entry->isFinished )
|
||||||
|
return; /* entry was finished before ginmarkpos() call */
|
||||||
|
|
||||||
|
if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber )
|
||||||
|
return; /* entry wasn't began before ginmarkpos() call */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset iterator
|
||||||
|
*/
|
||||||
|
tbm_begin_iterate( entry->partialMatch );
|
||||||
|
entry->partialMatchResult = NULL;
|
||||||
|
entry->offset = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Temporary reset reduceResult flag to guarantee refinding
|
||||||
|
* of curItem
|
||||||
|
*/
|
||||||
|
savedReduceResult = entry->reduceResult;
|
||||||
|
entry->reduceResult = FALSE;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We can use null instead of index because
|
||||||
|
* partial match doesn't use it
|
||||||
|
*/
|
||||||
|
if ( entryGetItem( NULL, entry ) == false )
|
||||||
|
elog(ERROR, "cannot refind scan position"); /* must not be here! */
|
||||||
|
} while( compareItemPointers( &stopItem, &entry->curItem ) != 0 );
|
||||||
|
|
||||||
|
Assert( entry->isFinished == FALSE );
|
||||||
|
|
||||||
|
entry->reduceResult = savedReduceResult;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sets key->curItem to new found heap item pointer for one scan key
|
* Sets key->curItem to new found heap item pointer for one scan key
|
||||||
* Returns isFinished, ie TRUE means we did NOT get a new item pointer!
|
* Returns isFinished, ie TRUE means we did NOT get a new item pointer!
|
||||||
@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS)
|
|||||||
bool res;
|
bool res;
|
||||||
|
|
||||||
if (dir != ForwardScanDirection)
|
if (dir != ForwardScanDirection)
|
||||||
elog(ERROR, "Gin doesn't support other scan directions than forward");
|
elog(ERROR, "GIN doesn't support other scan directions than forward");
|
||||||
|
|
||||||
if (GinIsNewKey(scan))
|
if (GinIsNewKey(scan))
|
||||||
newScanKey(scan);
|
newScanKey(scan);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
|
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
|
||||||
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
|
Datum *entryValues, bool *partial_matches, uint32 nEntryValues,
|
||||||
|
StrategyNumber strategy)
|
||||||
{
|
{
|
||||||
uint32 i,
|
uint32 i,
|
||||||
j;
|
j;
|
||||||
@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
|
|||||||
key->scanEntry[i].buffer = InvalidBuffer;
|
key->scanEntry[i].buffer = InvalidBuffer;
|
||||||
key->scanEntry[i].list = NULL;
|
key->scanEntry[i].list = NULL;
|
||||||
key->scanEntry[i].nlist = 0;
|
key->scanEntry[i].nlist = 0;
|
||||||
|
key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches )
|
||||||
|
? partial_matches[i] : false;
|
||||||
|
|
||||||
/* link to the equals entry in current scan key */
|
/* link to the equals entry in current scan key */
|
||||||
key->scanEntry[i].master = NULL;
|
key->scanEntry[i].master = NULL;
|
||||||
@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
|
|||||||
key->scanEntry[j].buffer = InvalidBuffer;
|
key->scanEntry[j].buffer = InvalidBuffer;
|
||||||
key->scanEntry[j].list = NULL;
|
key->scanEntry[j].list = NULL;
|
||||||
key->scanEntry[j].nlist = 0;
|
key->scanEntry[j].nlist = 0;
|
||||||
|
key->scanEntry[j].partialMatch = NULL;
|
||||||
|
key->scanEntry[j].partialMatchResult = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
|
|||||||
ReleaseBuffer(key->scanEntry[j].buffer);
|
ReleaseBuffer(key->scanEntry[j].buffer);
|
||||||
if (removeRes && key->scanEntry[j].list)
|
if (removeRes && key->scanEntry[j].list)
|
||||||
pfree(key->scanEntry[j].list);
|
pfree(key->scanEntry[j].list);
|
||||||
|
if (removeRes && key->scanEntry[j].partialMatch)
|
||||||
|
tbm_free(key->scanEntry[j].partialMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (removeRes)
|
if (removeRes)
|
||||||
@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan)
|
|||||||
{
|
{
|
||||||
Datum *entryValues;
|
Datum *entryValues;
|
||||||
int32 nEntryValues;
|
int32 nEntryValues;
|
||||||
|
bool *partial_matches = NULL;
|
||||||
|
|
||||||
if (scankey[i].sk_flags & SK_ISNULL)
|
|
||||||
elog(ERROR, "Gin doesn't support NULL as scan key");
|
|
||||||
Assert(scankey[i].sk_attno == 1);
|
Assert(scankey[i].sk_attno == 1);
|
||||||
|
|
||||||
entryValues = (Datum *) DatumGetPointer(
|
/* XXX can't we treat nulls by just setting isVoidRes? */
|
||||||
FunctionCall3(
|
/* This would amount to assuming that all GIN operators are strict */
|
||||||
|
if (scankey[i].sk_flags & SK_ISNULL)
|
||||||
|
elog(ERROR, "GIN doesn't support NULL as scan key");
|
||||||
|
|
||||||
|
entryValues = (Datum *) DatumGetPointer(FunctionCall4(
|
||||||
&so->ginstate.extractQueryFn,
|
&so->ginstate.extractQueryFn,
|
||||||
scankey[i].sk_argument,
|
scankey[i].sk_argument,
|
||||||
PointerGetDatum(&nEntryValues),
|
PointerGetDatum(&nEntryValues),
|
||||||
UInt16GetDatum(scankey[i].sk_strategy)
|
UInt16GetDatum(scankey[i].sk_strategy),
|
||||||
)
|
PointerGetDatum(&partial_matches)));
|
||||||
);
|
|
||||||
if (nEntryValues < 0)
|
if (nEntryValues < 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan)
|
|||||||
so->isVoidRes = true;
|
so->isVoidRes = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* extractQueryFn signals that everything matches
|
||||||
|
*/
|
||||||
if (entryValues == NULL || nEntryValues == 0)
|
if (entryValues == NULL || nEntryValues == 0)
|
||||||
/* full scan... */
|
/* full scan... */
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
|
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
|
||||||
entryValues, nEntryValues, scankey[i].sk_strategy);
|
entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy);
|
||||||
nkeys++;
|
nkeys++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static GinScanKey
|
static GinScanKey
|
||||||
copyScanKeys(GinScanKey keys, uint32 nkeys)
|
copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart)
|
||||||
{
|
{
|
||||||
GinScanKey newkeys;
|
GinScanKey newkeys;
|
||||||
uint32 i,
|
uint32 i,
|
||||||
@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys)
|
|||||||
|
|
||||||
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
|
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( restart )
|
||||||
|
ginrestartentry( &keys[i].scanEntry[j] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS)
|
|||||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||||
|
|
||||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||||
so->markPos = copyScanKeys(so->keys, so->nkeys);
|
so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS)
|
|||||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||||
|
|
||||||
freeScanKeys(so->keys, so->nkeys, FALSE);
|
freeScanKeys(so->keys, so->nkeys, FALSE);
|
||||||
so->keys = copyScanKeys(so->markPos, so->nkeys);
|
so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
PG_RETURN_VOID();
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index)
|
|||||||
fmgr_info_copy(&(state->consistentFn),
|
fmgr_info_copy(&(state->consistentFn),
|
||||||
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
|
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
|
||||||
CurrentMemoryContext);
|
CurrentMemoryContext);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check opclass capability to do partial match.
|
||||||
|
*/
|
||||||
|
if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
|
||||||
|
{
|
||||||
|
fmgr_info_copy(&(state->comparePartialFn),
|
||||||
|
index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC),
|
||||||
|
CurrentMemoryContext);
|
||||||
|
|
||||||
|
state->canPartialMatch = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
state->canPartialMatch = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.229 2008/04/13 20:51:20 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.230 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -2364,7 +2364,10 @@ expand_boolean_index_clause(Node *clause,
|
|||||||
* expand_indexqual_opclause --- expand a single indexqual condition
|
* expand_indexqual_opclause --- expand a single indexqual condition
|
||||||
* that is an operator clause
|
* that is an operator clause
|
||||||
*
|
*
|
||||||
* The input is a single RestrictInfo, the output a list of RestrictInfos
|
* The input is a single RestrictInfo, the output a list of RestrictInfos.
|
||||||
|
*
|
||||||
|
* In the base case this is just list_make1(), but we have to be prepared to
|
||||||
|
* expand special cases that were accepted by match_special_index_operator().
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
|
expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
|
||||||
@ -2379,63 +2382,77 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
|
|||||||
Const *prefix = NULL;
|
Const *prefix = NULL;
|
||||||
Const *rest = NULL;
|
Const *rest = NULL;
|
||||||
Pattern_Prefix_Status pstatus;
|
Pattern_Prefix_Status pstatus;
|
||||||
List *result;
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LIKE and regex operators are not members of any btree index opfamily,
|
||||||
|
* but they can be members of opfamilies for more exotic index types such
|
||||||
|
* as GIN. Therefore, we should only do expansion if the operator is
|
||||||
|
* actually not in the opfamily. But checking that requires a syscache
|
||||||
|
* lookup, so it's best to first see if the operator is one we are
|
||||||
|
* interested in.
|
||||||
|
*/
|
||||||
switch (expr_op)
|
switch (expr_op)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* LIKE and regex operators are not members of any index opfamily,
|
|
||||||
* so if we find one in an indexqual list we can assume that it
|
|
||||||
* was accepted by match_special_index_operator().
|
|
||||||
*/
|
|
||||||
case OID_TEXT_LIKE_OP:
|
case OID_TEXT_LIKE_OP:
|
||||||
case OID_BPCHAR_LIKE_OP:
|
case OID_BPCHAR_LIKE_OP:
|
||||||
case OID_NAME_LIKE_OP:
|
case OID_NAME_LIKE_OP:
|
||||||
case OID_BYTEA_LIKE_OP:
|
case OID_BYTEA_LIKE_OP:
|
||||||
|
if (!op_in_opfamily(expr_op, opfamily))
|
||||||
|
{
|
||||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
|
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
|
||||||
&prefix, &rest);
|
&prefix, &rest);
|
||||||
result = prefix_quals(leftop, opfamily, prefix, pstatus);
|
return prefix_quals(leftop, opfamily, prefix, pstatus);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OID_TEXT_ICLIKE_OP:
|
case OID_TEXT_ICLIKE_OP:
|
||||||
case OID_BPCHAR_ICLIKE_OP:
|
case OID_BPCHAR_ICLIKE_OP:
|
||||||
case OID_NAME_ICLIKE_OP:
|
case OID_NAME_ICLIKE_OP:
|
||||||
|
if (!op_in_opfamily(expr_op, opfamily))
|
||||||
|
{
|
||||||
/* the right-hand const is type text for all of these */
|
/* the right-hand const is type text for all of these */
|
||||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
|
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
|
||||||
&prefix, &rest);
|
&prefix, &rest);
|
||||||
result = prefix_quals(leftop, opfamily, prefix, pstatus);
|
return prefix_quals(leftop, opfamily, prefix, pstatus);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OID_TEXT_REGEXEQ_OP:
|
case OID_TEXT_REGEXEQ_OP:
|
||||||
case OID_BPCHAR_REGEXEQ_OP:
|
case OID_BPCHAR_REGEXEQ_OP:
|
||||||
case OID_NAME_REGEXEQ_OP:
|
case OID_NAME_REGEXEQ_OP:
|
||||||
|
if (!op_in_opfamily(expr_op, opfamily))
|
||||||
|
{
|
||||||
/* the right-hand const is type text for all of these */
|
/* the right-hand const is type text for all of these */
|
||||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
|
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
|
||||||
&prefix, &rest);
|
&prefix, &rest);
|
||||||
result = prefix_quals(leftop, opfamily, prefix, pstatus);
|
return prefix_quals(leftop, opfamily, prefix, pstatus);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OID_TEXT_ICREGEXEQ_OP:
|
case OID_TEXT_ICREGEXEQ_OP:
|
||||||
case OID_BPCHAR_ICREGEXEQ_OP:
|
case OID_BPCHAR_ICREGEXEQ_OP:
|
||||||
case OID_NAME_ICREGEXEQ_OP:
|
case OID_NAME_ICREGEXEQ_OP:
|
||||||
|
if (!op_in_opfamily(expr_op, opfamily))
|
||||||
|
{
|
||||||
/* the right-hand const is type text for all of these */
|
/* the right-hand const is type text for all of these */
|
||||||
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
|
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
|
||||||
&prefix, &rest);
|
&prefix, &rest);
|
||||||
result = prefix_quals(leftop, opfamily, prefix, pstatus);
|
return prefix_quals(leftop, opfamily, prefix, pstatus);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OID_INET_SUB_OP:
|
case OID_INET_SUB_OP:
|
||||||
case OID_INET_SUBEQ_OP:
|
case OID_INET_SUBEQ_OP:
|
||||||
result = network_prefix_quals(leftop, expr_op, opfamily,
|
if (!op_in_opfamily(expr_op, opfamily))
|
||||||
|
{
|
||||||
|
return network_prefix_quals(leftop, expr_op, opfamily,
|
||||||
patt->constvalue);
|
patt->constvalue);
|
||||||
break;
|
}
|
||||||
|
|
||||||
default:
|
|
||||||
result = list_make1(rinfo);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
/* Default case: just make a list of the unmodified indexqual */
|
||||||
|
return list_make1(rinfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.11 2008/03/25 22:42:43 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -32,24 +32,23 @@ get_current_ts_config(PG_FUNCTION_ARGS)
|
|||||||
static int
|
static int
|
||||||
compareWORD(const void *a, const void *b)
|
compareWORD(const void *a, const void *b)
|
||||||
{
|
{
|
||||||
if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len)
|
int res;
|
||||||
{
|
|
||||||
int res = strncmp(
|
res = tsCompareString(
|
||||||
((ParsedWord *) a)->word,
|
((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
|
||||||
((ParsedWord *) b)->word,
|
((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
|
||||||
((ParsedWord *) b)->len);
|
false );
|
||||||
|
|
||||||
if (res == 0)
|
if (res == 0)
|
||||||
{
|
{
|
||||||
if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
|
if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
|
res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
uniqueWORD(ParsedWord *a, int4 l)
|
uniqueWORD(ParsedWord *a, int4 l)
|
||||||
@ -268,7 +267,7 @@ to_tsvector(PG_FUNCTION_ARGS)
|
|||||||
* and different variants are ORred together.
|
* and different variants are ORred together.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
|
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
|
||||||
{
|
{
|
||||||
int4 count = 0;
|
int4 count = 0;
|
||||||
ParsedText prs;
|
ParsedText prs;
|
||||||
@ -302,7 +301,8 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
|
|||||||
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
|
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
|
||||||
{
|
{
|
||||||
|
|
||||||
pushValue(state, prs.words[count].word, prs.words[count].len, weight);
|
pushValue(state, prs.words[count].word, prs.words[count].len, weight,
|
||||||
|
( (prs.words[count].flags & TSL_PREFIX) || prefix ) ? true : false );
|
||||||
pfree(prs.words[count].word);
|
pfree(prs.words[count].word);
|
||||||
if (cnt)
|
if (cnt)
|
||||||
pushOperator(state, OP_AND);
|
pushOperator(state, OP_AND);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.8 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -415,6 +415,7 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
|
|||||||
prs->words[prs->curwords].len = strlen(ptr->lexeme);
|
prs->words[prs->curwords].len = strlen(ptr->lexeme);
|
||||||
prs->words[prs->curwords].word = ptr->lexeme;
|
prs->words[prs->curwords].word = ptr->lexeme;
|
||||||
prs->words[prs->curwords].nvariant = ptr->nvariant;
|
prs->words[prs->curwords].nvariant = ptr->nvariant;
|
||||||
|
prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
|
||||||
prs->words[prs->curwords].alen = 0;
|
prs->words[prs->curwords].alen = 0;
|
||||||
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
|
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
|
||||||
ptr++;
|
ptr++;
|
||||||
@ -463,8 +464,8 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
|
|||||||
for (i = 0; i < query->size; i++)
|
for (i = 0; i < query->size; i++)
|
||||||
{
|
{
|
||||||
if (item->type == QI_VAL &&
|
if (item->type == QI_VAL &&
|
||||||
item->operand.length == buflen &&
|
tsCompareString( GETOPERAND(query) + item->operand.distance, item->operand.length,
|
||||||
strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
|
buf, buflen, item->operand.prefix ) == 0 )
|
||||||
{
|
{
|
||||||
if (word->item)
|
if (word->item)
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.11 2008/04/14 17:05:33 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -19,6 +19,46 @@
|
|||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_cmp_tslexeme(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *a = PG_GETARG_TEXT_P(0);
|
||||||
|
text *b = PG_GETARG_TEXT_P(1);
|
||||||
|
int cmp;
|
||||||
|
|
||||||
|
cmp = tsCompareString(
|
||||||
|
VARDATA(a), VARSIZE(a) - VARHDRSZ,
|
||||||
|
VARDATA(b), VARSIZE(b) - VARHDRSZ,
|
||||||
|
false );
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(a,0);
|
||||||
|
PG_FREE_IF_COPY(b,1);
|
||||||
|
PG_RETURN_INT32( cmp );
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_cmp_prefix(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *a = PG_GETARG_TEXT_P(0);
|
||||||
|
text *b = PG_GETARG_TEXT_P(1);
|
||||||
|
#ifdef NOT_USED
|
||||||
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||||
|
#endif
|
||||||
|
int cmp;
|
||||||
|
|
||||||
|
cmp = tsCompareString(
|
||||||
|
VARDATA(a), VARSIZE(a) - VARHDRSZ,
|
||||||
|
VARDATA(b), VARSIZE(b) - VARHDRSZ,
|
||||||
|
true );
|
||||||
|
|
||||||
|
if ( cmp < 0 )
|
||||||
|
cmp = 1; /* prevent continue scan */
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(a,0);
|
||||||
|
PG_FREE_IF_COPY(b,1);
|
||||||
|
PG_RETURN_INT32( cmp );
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -55,7 +95,9 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
|||||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
|
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
|
||||||
|
bool **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
|
||||||
Datum *entries = NULL;
|
Datum *entries = NULL;
|
||||||
|
bool *partialmatch;
|
||||||
|
|
||||||
*nentries = 0;
|
*nentries = 0;
|
||||||
|
|
||||||
@ -65,12 +107,14 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
|||||||
j = 0,
|
j = 0,
|
||||||
len;
|
len;
|
||||||
QueryItem *item;
|
QueryItem *item;
|
||||||
|
bool use_fullscan=false;
|
||||||
|
|
||||||
item = clean_NOT(GETQUERY(query), &len);
|
item = clean_NOT(GETQUERY(query), &len);
|
||||||
if (!item)
|
if (!item)
|
||||||
ereport(ERROR,
|
{
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
use_fullscan = true;
|
||||||
errmsg("query requires full scan, which is not supported by GIN indexes")));
|
*nentries = 1;
|
||||||
|
}
|
||||||
|
|
||||||
item = GETQUERY(query);
|
item = GETQUERY(query);
|
||||||
|
|
||||||
@ -79,6 +123,7 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
|||||||
(*nentries)++;
|
(*nentries)++;
|
||||||
|
|
||||||
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
||||||
|
partialmatch = *ptr_partialmatch = (bool*) palloc(sizeof(bool) * (*nentries));
|
||||||
|
|
||||||
for (i = 0; i < query->size; i++)
|
for (i = 0; i < query->size; i++)
|
||||||
if (item[i].type == QI_VAL)
|
if (item[i].type == QI_VAL)
|
||||||
@ -88,8 +133,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
|
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
|
||||||
val->length);
|
val->length);
|
||||||
|
partialmatch[j] = val->prefix;
|
||||||
entries[j++] = PointerGetDatum(txt);
|
entries[j++] = PointerGetDatum(txt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( use_fullscan )
|
||||||
|
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
*nentries = -1; /* nothing can be found */
|
*nentries = -1; /* nothing can be found */
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.8 2008/04/14 17:05:33 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -307,6 +307,12 @@ checkcondition_arr(void *checkval, QueryOperand *val)
|
|||||||
|
|
||||||
/* Loop invariant: StopLow <= val < StopHigh */
|
/* Loop invariant: StopLow <= val < StopHigh */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we are not able to find a a prefix by hash value
|
||||||
|
*/
|
||||||
|
if ( val->prefix )
|
||||||
|
return true;
|
||||||
|
|
||||||
while (StopLow < StopHigh)
|
while (StopLow < StopHigh)
|
||||||
{
|
{
|
||||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||||
@ -324,6 +330,11 @@ checkcondition_arr(void *checkval, QueryOperand *val)
|
|||||||
static bool
|
static bool
|
||||||
checkcondition_bit(void *checkval, QueryOperand *val)
|
checkcondition_bit(void *checkval, QueryOperand *val)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* we are not able to find a a prefix in signature tree
|
||||||
|
*/
|
||||||
|
if ( val->prefix )
|
||||||
|
return true;
|
||||||
return GETBIT(checkval, HASHVAL(val->valcrc));
|
return GETBIT(checkval, HASHVAL(val->valcrc));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -56,12 +56,14 @@ struct TSQueryParserStateData
|
|||||||
#define WAITSINGLEOPERAND 4
|
#define WAITSINGLEOPERAND 4
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* subroutine to parse the weight part, like ':1AB' of a query.
|
* subroutine to parse the modifiers (weight and prefix flag currently)
|
||||||
|
* part, like ':1AB' of a query.
|
||||||
*/
|
*/
|
||||||
static char *
|
static char *
|
||||||
get_weight(char *buf, int16 *weight)
|
get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||||
{
|
{
|
||||||
*weight = 0;
|
*weight = 0;
|
||||||
|
*prefix = false;
|
||||||
|
|
||||||
if (!t_iseq(buf, ':'))
|
if (!t_iseq(buf, ':'))
|
||||||
return buf;
|
return buf;
|
||||||
@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
|
|||||||
case 'D':
|
case 'D':
|
||||||
*weight |= 1;
|
*weight |= 1;
|
||||||
break;
|
break;
|
||||||
|
case '*':
|
||||||
|
*prefix = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -118,8 +123,11 @@ typedef enum
|
|||||||
static ts_tokentype
|
static ts_tokentype
|
||||||
gettoken_query(TSQueryParserState state,
|
gettoken_query(TSQueryParserState state,
|
||||||
int8 *operator,
|
int8 *operator,
|
||||||
int *lenval, char **strval, int16 *weight)
|
int *lenval, char **strval, int16 *weight, bool *prefix)
|
||||||
{
|
{
|
||||||
|
*weight = 0;
|
||||||
|
*prefix = false;
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
switch (state->state)
|
switch (state->state)
|
||||||
@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
|
|||||||
reset_tsvector_parser(state->valstate, state->buf);
|
reset_tsvector_parser(state->valstate, state->buf);
|
||||||
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
|
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
|
||||||
{
|
{
|
||||||
state->buf = get_weight(state->buf, weight);
|
state->buf = get_modifiers(state->buf, weight, prefix);
|
||||||
state->state = WAITOPERATOR;
|
state->state = WAITOPERATOR;
|
||||||
return PT_VAL;
|
return PT_VAL;
|
||||||
}
|
}
|
||||||
@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
|
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
|
||||||
{
|
{
|
||||||
QueryOperand *tmp;
|
QueryOperand *tmp;
|
||||||
|
|
||||||
@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
|||||||
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
|
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
|
||||||
tmp->type = QI_VAL;
|
tmp->type = QI_VAL;
|
||||||
tmp->weight = weight;
|
tmp->weight = weight;
|
||||||
|
tmp->prefix = prefix;
|
||||||
tmp->valcrc = (int32) valcrc;
|
tmp->valcrc = (int32) valcrc;
|
||||||
tmp->length = lenval;
|
tmp->length = lenval;
|
||||||
tmp->distance = distance;
|
tmp->distance = distance;
|
||||||
@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
|||||||
* of the string.
|
* of the string.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
|
||||||
{
|
{
|
||||||
pg_crc32 valcrc;
|
pg_crc32 valcrc;
|
||||||
|
|
||||||
@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
|||||||
INIT_CRC32(valcrc);
|
INIT_CRC32(valcrc);
|
||||||
COMP_CRC32(valcrc, strval, lenval);
|
COMP_CRC32(valcrc, strval, lenval);
|
||||||
FIN_CRC32(valcrc);
|
FIN_CRC32(valcrc);
|
||||||
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
|
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
|
||||||
|
|
||||||
/* append the value string to state.op, enlarging buffer if needed first */
|
/* append the value string to state.op, enlarging buffer if needed first */
|
||||||
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
||||||
@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
|
|||||||
int8 opstack[STACKDEPTH];
|
int8 opstack[STACKDEPTH];
|
||||||
int lenstack = 0;
|
int lenstack = 0;
|
||||||
int16 weight = 0;
|
int16 weight = 0;
|
||||||
|
bool prefix;
|
||||||
|
|
||||||
/* since this function recurses, it could be driven to stack overflow */
|
/* since this function recurses, it could be driven to stack overflow */
|
||||||
check_stack_depth();
|
check_stack_depth();
|
||||||
|
|
||||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
|
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case PT_VAL:
|
case PT_VAL:
|
||||||
pushval(opaque, state, strval, lenval, weight);
|
pushval(opaque, state, strval, lenval, weight, prefix);
|
||||||
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
||||||
opstack[lenstack - 1] == OP_NOT))
|
opstack[lenstack - 1] == OP_NOT))
|
||||||
{
|
{
|
||||||
@ -549,9 +559,9 @@ parse_tsquery(char *buf,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
|
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
|
||||||
int16 weight)
|
int16 weight, bool prefix)
|
||||||
{
|
{
|
||||||
pushValue(state, strval, lenval, weight);
|
pushValue(state, strval, lenval, weight, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
|
|||||||
char *op = in->op + curpol->distance;
|
char *op = in->op + curpol->distance;
|
||||||
int clen;
|
int clen;
|
||||||
|
|
||||||
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
|
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
|
||||||
*(in->cur) = '\'';
|
*(in->cur) = '\'';
|
||||||
in->cur++;
|
in->cur++;
|
||||||
while (*op)
|
while (*op)
|
||||||
@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
|
|||||||
}
|
}
|
||||||
*(in->cur) = '\'';
|
*(in->cur) = '\'';
|
||||||
in->cur++;
|
in->cur++;
|
||||||
if (curpol->weight)
|
if (curpol->weight || curpol->prefix)
|
||||||
{
|
{
|
||||||
*(in->cur) = ':';
|
*(in->cur) = ':';
|
||||||
in->cur++;
|
in->cur++;
|
||||||
|
if ( curpol->prefix )
|
||||||
|
{
|
||||||
|
*(in->cur) = '*';
|
||||||
|
in->cur++;
|
||||||
|
}
|
||||||
if (curpol->weight & (1 << 3))
|
if (curpol->weight & (1 << 3))
|
||||||
{
|
{
|
||||||
*(in->cur) = 'A';
|
*(in->cur) = 'A';
|
||||||
@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
|
|||||||
* uint8 type, QI_VAL
|
* uint8 type, QI_VAL
|
||||||
* uint8 weight
|
* uint8 weight
|
||||||
* operand text in client encoding, null-terminated
|
* operand text in client encoding, null-terminated
|
||||||
|
* uint8 prefix
|
||||||
*
|
*
|
||||||
* For each operator:
|
* For each operator:
|
||||||
* uint8 type, QI_OPR
|
* uint8 type, QI_OPR
|
||||||
@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
case QI_VAL:
|
case QI_VAL:
|
||||||
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
|
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
|
||||||
|
pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
|
||||||
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
|
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
|
||||||
break;
|
break;
|
||||||
case QI_OPR:
|
case QI_OPR:
|
||||||
@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
size_t val_len; /* length after recoding to server encoding */
|
size_t val_len; /* length after recoding to server encoding */
|
||||||
uint8 weight;
|
uint8 weight;
|
||||||
|
uint8 prefix;
|
||||||
const char *val;
|
const char *val;
|
||||||
pg_crc32 valcrc;
|
pg_crc32 valcrc;
|
||||||
|
|
||||||
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||||
|
prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||||
val = pq_getmsgstring(buf);
|
val = pq_getmsgstring(buf);
|
||||||
val_len = strlen(val);
|
val_len = strlen(val);
|
||||||
|
|
||||||
@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
|||||||
FIN_CRC32(valcrc);
|
FIN_CRC32(valcrc);
|
||||||
|
|
||||||
item->operand.weight = weight;
|
item->operand.weight = weight;
|
||||||
|
item->operand.prefix = (prefix) ? true : false;
|
||||||
item->operand.valcrc = (int32) valcrc;
|
item->operand.valcrc = (int32) valcrc;
|
||||||
item->operand.length = val_len;
|
item->operand.length = val_len;
|
||||||
item->operand.distance = datalen;
|
item->operand.distance = datalen;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.8 2008/01/01 19:45:53 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -125,10 +125,7 @@ QTNodeCompare(QTNode *an, QTNode *bn)
|
|||||||
return (ao->valcrc > bo->valcrc) ? -1 : 1;
|
return (ao->valcrc > bo->valcrc) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ao->length == bo->length)
|
return tsCompareString( an->word, ao->length, bn->word, bo->length, false);
|
||||||
return strncmp(an->word, bn->word, ao->length);
|
|
||||||
else
|
|
||||||
return (ao->length > bo->length) ? -1 : 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -71,45 +71,60 @@ cnt_length(TSVector t)
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
|
|
||||||
{
|
|
||||||
if (ptr->len == item->length)
|
|
||||||
return strncmp(
|
|
||||||
eval + ptr->pos,
|
|
||||||
qval + item->distance,
|
|
||||||
item->length);
|
|
||||||
|
|
||||||
return (ptr->len > item->length) ? 1 : -1;
|
#define WordECompareQueryItem(e,q,p,i,m) \
|
||||||
}
|
tsCompareString((q) + (i)->distance, (i)->length, \
|
||||||
|
(e) + (p)->pos, (p)->len, (m))
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
|
* Returns a pointer to a WordEntry's array corresponding to 'item' from
|
||||||
* is the TSQuery containing 'item'. Returns NULL if not found.
|
* tsvector 't'. 'q' is the TSQuery containing 'item'.
|
||||||
|
* Returns NULL if not found.
|
||||||
*/
|
*/
|
||||||
static WordEntry *
|
static WordEntry *
|
||||||
find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
|
find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
|
||||||
{
|
{
|
||||||
WordEntry *StopLow = ARRPTR(t);
|
WordEntry *StopLow = ARRPTR(t);
|
||||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||||
WordEntry *StopMiddle;
|
WordEntry *StopMiddle = StopHigh;
|
||||||
int difference;
|
int difference;
|
||||||
|
|
||||||
/* Loop invariant: StopLow <= item < StopHigh */
|
*nitem=0;
|
||||||
|
|
||||||
|
/* Loop invariant: StopLow <= item < StopHigh */
|
||||||
while (StopLow < StopHigh)
|
while (StopLow < StopHigh)
|
||||||
{
|
{
|
||||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||||
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
|
||||||
if (difference == 0)
|
if (difference == 0)
|
||||||
return StopMiddle;
|
{
|
||||||
else if (difference < 0)
|
StopHigh = StopMiddle;
|
||||||
|
*nitem=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (difference > 0)
|
||||||
StopLow = StopMiddle + 1;
|
StopLow = StopMiddle + 1;
|
||||||
else
|
else
|
||||||
StopHigh = StopMiddle;
|
StopHigh = StopMiddle;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
if ( item->prefix == true )
|
||||||
|
{
|
||||||
|
if ( StopLow >= StopHigh )
|
||||||
|
StopMiddle = StopHigh;
|
||||||
|
|
||||||
|
*nitem=0;
|
||||||
|
|
||||||
|
while( StopMiddle < (WordEntry *) STRPTR(t) &&
|
||||||
|
WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 )
|
||||||
|
{
|
||||||
|
(*nitem)++;
|
||||||
|
StopMiddle++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ( *nitem > 0 ) ? StopHigh : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg)
|
|||||||
QueryOperand *qa = (*(QueryOperand **) a);
|
QueryOperand *qa = (*(QueryOperand **) a);
|
||||||
QueryOperand *qb = (*(QueryOperand **) b);
|
QueryOperand *qb = (*(QueryOperand **) b);
|
||||||
|
|
||||||
if (qa->length == qb->length)
|
return tsCompareString(operand + qa->distance, qa->length,
|
||||||
return strncmp(operand + qa->distance,
|
operand + qb->distance, qb->length,
|
||||||
operand + qb->distance,
|
false);
|
||||||
qb->length);
|
|
||||||
|
|
||||||
return (qa->length > qb->length) ? 1 : -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
|||||||
k,
|
k,
|
||||||
l,
|
l,
|
||||||
p;
|
p;
|
||||||
WordEntry *entry;
|
WordEntry *entry,
|
||||||
|
*firstentry;
|
||||||
WordEntryPos *post,
|
WordEntryPos *post,
|
||||||
*ct;
|
*ct;
|
||||||
int4 dimt,
|
int4 dimt,
|
||||||
lenct,
|
lenct,
|
||||||
dist;
|
dist,
|
||||||
|
nitem;
|
||||||
float res = -1.0;
|
float res = -1.0;
|
||||||
QueryOperand **item;
|
QueryOperand **item;
|
||||||
int size = q->size;
|
int size = q->size;
|
||||||
@ -219,16 +233,17 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
|||||||
|
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
entry = find_wordentry(t, q, item[i]);
|
firstentry = entry = find_wordentry(t, q, item[i], &nitem);
|
||||||
if (!entry)
|
if (!entry)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
while( entry - firstentry < nitem )
|
||||||
|
{
|
||||||
if (entry->haspos)
|
if (entry->haspos)
|
||||||
pos[i] = _POSVECPTR(t, entry);
|
pos[i] = _POSVECPTR(t, entry);
|
||||||
else
|
else
|
||||||
pos[i] = &POSNULL;
|
pos[i] = &POSNULL;
|
||||||
|
|
||||||
|
|
||||||
dimt = pos[i]->npos;
|
dimt = pos[i]->npos;
|
||||||
post = pos[i]->pos;
|
post = pos[i]->pos;
|
||||||
for (k = 0; k < i; k++)
|
for (k = 0; k < i; k++)
|
||||||
@ -254,6 +269,9 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pfree(pos);
|
pfree(pos);
|
||||||
pfree(item);
|
pfree(item);
|
||||||
@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
|||||||
static float
|
static float
|
||||||
calc_rank_or(float *w, TSVector t, TSQuery q)
|
calc_rank_or(float *w, TSVector t, TSQuery q)
|
||||||
{
|
{
|
||||||
WordEntry *entry;
|
WordEntry *entry,
|
||||||
|
*firstentry;
|
||||||
WordEntryPos *post;
|
WordEntryPos *post;
|
||||||
int4 dimt,
|
int4 dimt,
|
||||||
j,
|
j,
|
||||||
i;
|
i,
|
||||||
|
nitem;
|
||||||
float res = 0.0;
|
float res = 0.0;
|
||||||
QueryOperand **item;
|
QueryOperand **item;
|
||||||
int size = q->size;
|
int size = q->size;
|
||||||
@ -280,10 +300,12 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
|
|||||||
wjm;
|
wjm;
|
||||||
int4 jm;
|
int4 jm;
|
||||||
|
|
||||||
entry = find_wordentry(t, q, item[i]);
|
firstentry = entry = find_wordentry(t, q, item[i], &nitem);
|
||||||
if (!entry)
|
if (!entry)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
while( entry - firstentry < nitem )
|
||||||
|
{
|
||||||
if (entry->haspos)
|
if (entry->haspos)
|
||||||
{
|
{
|
||||||
dimt = POSDATALEN(t, entry);
|
dimt = POSDATALEN(t, entry);
|
||||||
@ -315,6 +337,9 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
|
|||||||
Oleg Bartunov
|
Oleg Bartunov
|
||||||
*/
|
*/
|
||||||
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
||||||
|
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
res = res / size;
|
res = res / size;
|
||||||
@ -594,11 +619,13 @@ static DocRepresentation *
|
|||||||
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||||
{
|
{
|
||||||
QueryItem *item = GETQUERY(qr->query);
|
QueryItem *item = GETQUERY(qr->query);
|
||||||
WordEntry *entry;
|
WordEntry *entry,
|
||||||
|
*firstentry;
|
||||||
WordEntryPos *post;
|
WordEntryPos *post;
|
||||||
int4 dimt,
|
int4 dimt,
|
||||||
j,
|
j,
|
||||||
i;
|
i,
|
||||||
|
nitem;
|
||||||
int len = qr->query->size * 4,
|
int len = qr->query->size * 4,
|
||||||
cur = 0;
|
cur = 0;
|
||||||
DocRepresentation *doc;
|
DocRepresentation *doc;
|
||||||
@ -619,10 +646,12 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
|||||||
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
|
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
entry = find_wordentry(txt, qr->query, curoperand);
|
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
|
||||||
if (!entry)
|
if (!entry)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
while( entry - firstentry < nitem )
|
||||||
|
{
|
||||||
if (entry->haspos)
|
if (entry->haspos)
|
||||||
{
|
{
|
||||||
dimt = POSDATALEN(txt, entry);
|
dimt = POSDATALEN(txt, entry);
|
||||||
@ -677,6 +706,9 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
|||||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||||
cur++;
|
cur++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*doclen = cur;
|
*doclen = cur;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.13 2008/03/10 12:57:05 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -85,14 +85,9 @@ compareentry(const void *va, const void *vb, void *arg)
|
|||||||
const WordEntryIN *b = (const WordEntryIN *) vb;
|
const WordEntryIN *b = (const WordEntryIN *) vb;
|
||||||
char *BufferStr = (char *) arg;
|
char *BufferStr = (char *) arg;
|
||||||
|
|
||||||
if (a->entry.len == b->entry.len)
|
return tsCompareString( &BufferStr[a->entry.pos], a->entry.len,
|
||||||
{
|
&BufferStr[b->entry.pos], b->entry.len,
|
||||||
return strncmp(&BufferStr[a->entry.pos],
|
false );
|
||||||
&BufferStr[b->entry.pos],
|
|
||||||
a->entry.len);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (a->entry.len > b->entry.len) ? 1 : -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.15 2008/04/08 18:20:29 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -127,11 +127,7 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
|
|||||||
{
|
{
|
||||||
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
||||||
}
|
}
|
||||||
else if (aptr->len != bptr->len)
|
else if ( (res=tsCompareString( STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) !=0 )
|
||||||
{
|
|
||||||
return (aptr->len > bptr->len) ? -1 : 1;
|
|
||||||
}
|
|
||||||
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
|
|
||||||
{
|
{
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -286,18 +282,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_POINTER(out);
|
PG_RETURN_POINTER(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
#define compareEntry(pa, a, pb, b) \
|
||||||
compareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b)
|
tsCompareString((pa) + (a)->pos, (a)->len, \
|
||||||
{
|
(pb) + (b)->pos, (b)->len, \
|
||||||
if (a->len == b->len)
|
false)
|
||||||
{
|
|
||||||
return strncmp(
|
|
||||||
ptra + a->pos,
|
|
||||||
ptrb + b->pos,
|
|
||||||
a->len);
|
|
||||||
}
|
|
||||||
return (a->len > b->len) ? 1 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add positions from src to dest after offsetting them by maxpos.
|
* Add positions from src to dest after offsetting them by maxpos.
|
||||||
@ -534,18 +522,46 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* compare 2 string values
|
* Compare two strings by tsvector rules.
|
||||||
|
* if isPrefix = true then it returns not-zero value if b has prefix a
|
||||||
*/
|
*/
|
||||||
static int4
|
int4
|
||||||
ValCompare(CHKVAL *chkval, WordEntry *ptr, QueryOperand *item)
|
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
|
||||||
{
|
{
|
||||||
if (ptr->len == item->length)
|
int cmp;
|
||||||
return strncmp(
|
|
||||||
&(chkval->values[ptr->pos]),
|
|
||||||
&(chkval->operand[item->distance]),
|
|
||||||
item->length);
|
|
||||||
|
|
||||||
return (ptr->len > item->length) ? 1 : -1;
|
if ( lena == 0 )
|
||||||
|
{
|
||||||
|
if ( prefix )
|
||||||
|
cmp = 0; /* emtry string is equal to any if a prefix match */
|
||||||
|
else
|
||||||
|
cmp = (lenb>0) ? -1 : 0;
|
||||||
|
}
|
||||||
|
else if ( lenb == 0 )
|
||||||
|
{
|
||||||
|
cmp = (lena>0) ? 1 : 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cmp = memcmp(a, b, Min(lena, lenb));
|
||||||
|
|
||||||
|
if ( prefix )
|
||||||
|
{
|
||||||
|
if ( cmp == 0 && lena > lenb )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* b argument is not beginning with argument a
|
||||||
|
*/
|
||||||
|
cmp=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( (cmp == 0) && (lena != lenb) )
|
||||||
|
{
|
||||||
|
cmp = (lena < lenb) ? -1 : 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -582,25 +598,52 @@ checkcondition_str(void *checkval, QueryOperand *val)
|
|||||||
CHKVAL *chkval = (CHKVAL *) checkval;
|
CHKVAL *chkval = (CHKVAL *) checkval;
|
||||||
WordEntry *StopLow = chkval->arrb;
|
WordEntry *StopLow = chkval->arrb;
|
||||||
WordEntry *StopHigh = chkval->arre;
|
WordEntry *StopHigh = chkval->arre;
|
||||||
WordEntry *StopMiddle;
|
WordEntry *StopMiddle = StopHigh;
|
||||||
int difference;
|
int difference = -1;
|
||||||
|
bool res=false;
|
||||||
|
|
||||||
/* Loop invariant: StopLow <= val < StopHigh */
|
/* Loop invariant: StopLow <= val < StopHigh */
|
||||||
|
|
||||||
while (StopLow < StopHigh)
|
while (StopLow < StopHigh)
|
||||||
{
|
{
|
||||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||||
difference = ValCompare(chkval, StopMiddle, val);
|
difference = tsCompareString( chkval->operand + val->distance, val->length,
|
||||||
|
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||||
|
false);
|
||||||
|
|
||||||
if (difference == 0)
|
if (difference == 0)
|
||||||
return (val->weight && StopMiddle->haspos) ?
|
{
|
||||||
|
res = (val->weight && StopMiddle->haspos) ?
|
||||||
checkclass_str(chkval, StopMiddle, val) : true;
|
checkclass_str(chkval, StopMiddle, val) : true;
|
||||||
else if (difference < 0)
|
break;
|
||||||
|
}
|
||||||
|
else if (difference > 0)
|
||||||
StopLow = StopMiddle + 1;
|
StopLow = StopMiddle + 1;
|
||||||
else
|
else
|
||||||
StopHigh = StopMiddle;
|
StopHigh = StopMiddle;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (false);
|
if ( res == false && val->prefix == true )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* there was a failed exact search, so we should scan further to find
|
||||||
|
* a prefix match.
|
||||||
|
*/
|
||||||
|
if ( StopLow >= StopHigh )
|
||||||
|
StopMiddle = StopHigh;
|
||||||
|
|
||||||
|
while( res == false && StopMiddle < chkval->arre &&
|
||||||
|
tsCompareString( chkval->operand + val->distance, val->length,
|
||||||
|
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||||
|
true) == 0 )
|
||||||
|
{
|
||||||
|
res = (val->weight && StopMiddle->haspos) ?
|
||||||
|
checkclass_str(chkval, StopMiddle, val) : true;
|
||||||
|
|
||||||
|
StopMiddle++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -758,50 +801,38 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
|
|||||||
return num;
|
return num;
|
||||||
}
|
}
|
||||||
|
|
||||||
static WordEntry **
|
#define compareStatWord(a,e,s,t) \
|
||||||
SEI_realloc(WordEntry **in, uint32 *len)
|
tsCompareString(STATSTRPTR(s) + (a)->pos, (a)->len, \
|
||||||
{
|
STRPTR(t) + (e)->pos, (e)->len, \
|
||||||
if (*len == 0 || in == NULL)
|
false)
|
||||||
{
|
|
||||||
*len = 8;
|
|
||||||
in = palloc(sizeof(WordEntry *) * (*len));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*len *= 2;
|
|
||||||
in = repalloc(in, sizeof(WordEntry *) * (*len));
|
|
||||||
}
|
|
||||||
return in;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
typedef struct WordEntryMark
|
||||||
compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, TSVector txt)
|
|
||||||
{
|
{
|
||||||
if (a->len == b->len)
|
WordEntry *newentry;
|
||||||
return strncmp(
|
StatEntry *pos;
|
||||||
STATSTRPTR(stat) + a->pos,
|
} WordEntryMark;
|
||||||
STRPTR(txt) + b->pos,
|
|
||||||
a->len
|
|
||||||
);
|
|
||||||
return (a->len > b->len) ? 1 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static tsstat *
|
static tsstat *
|
||||||
formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
|
formstat(tsstat *stat, TSVector txt, List *entries)
|
||||||
{
|
{
|
||||||
tsstat *newstat;
|
tsstat *newstat;
|
||||||
uint32 totallen,
|
uint32 totallen,
|
||||||
nentry;
|
nentry,
|
||||||
|
len = list_length(entries);
|
||||||
uint32 slen = 0;
|
uint32 slen = 0;
|
||||||
WordEntry **ptr = entry;
|
WordEntry *ptr;
|
||||||
char *curptr;
|
char *curptr;
|
||||||
StatEntry *sptr,
|
StatEntry *sptr,
|
||||||
*nptr;
|
*nptr;
|
||||||
|
ListCell *entry;
|
||||||
|
StatEntry *PosSE = STATPTR(stat),
|
||||||
|
*prevPosSE;
|
||||||
|
WordEntryMark *mark;
|
||||||
|
|
||||||
while (ptr - entry < len)
|
foreach( entry, entries )
|
||||||
{
|
{
|
||||||
slen += (*ptr)->len;
|
mark = (WordEntryMark*)lfirst(entry);
|
||||||
ptr++;
|
slen += mark->newentry->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
nentry = stat->size + len;
|
nentry = stat->size + len;
|
||||||
@ -815,77 +846,45 @@ formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
|
|||||||
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
||||||
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
||||||
|
|
||||||
ptr = entry;
|
|
||||||
sptr = STATPTR(stat);
|
sptr = STATPTR(stat);
|
||||||
nptr = STATPTR(newstat);
|
nptr = STATPTR(newstat);
|
||||||
|
|
||||||
if (len == 1)
|
foreach(entry, entries)
|
||||||
{
|
{
|
||||||
StatEntry *StopLow = STATPTR(stat);
|
prevPosSE = PosSE;
|
||||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
|
||||||
|
|
||||||
while (StopLow < StopHigh)
|
mark = (WordEntryMark*)lfirst(entry);
|
||||||
|
ptr = mark->newentry;
|
||||||
|
PosSE = mark->pos;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy missed entries
|
||||||
|
*/
|
||||||
|
if ( PosSE > prevPosSE )
|
||||||
{
|
{
|
||||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
memcpy( nptr, prevPosSE, sizeof(StatEntry) * (PosSE-prevPosSE) );
|
||||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
nptr += PosSE-prevPosSE;
|
||||||
StopLow = sptr + 1;
|
|
||||||
else
|
|
||||||
StopHigh = sptr;
|
|
||||||
}
|
}
|
||||||
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
|
||||||
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
/*
|
||||||
if ((*ptr)->haspos)
|
* Copy new entry
|
||||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
*/
|
||||||
|
if (ptr->haspos)
|
||||||
|
nptr->nentry = (stat->weight) ? check_weight(txt, ptr, stat->weight) : POSDATALEN(txt, ptr);
|
||||||
else
|
else
|
||||||
nptr->nentry = 1;
|
nptr->nentry = 1;
|
||||||
nptr->ndoc = 1;
|
nptr->ndoc = 1;
|
||||||
nptr->len = (*ptr)->len;
|
nptr->len = ptr->len;
|
||||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
memcpy(curptr, STRPTR(txt) + ptr->pos, nptr->len);
|
||||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
|
||||||
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
|
|
||||||
{
|
|
||||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
|
||||||
{
|
|
||||||
memcpy(nptr, sptr, sizeof(StatEntry));
|
|
||||||
sptr++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ((*ptr)->haspos)
|
|
||||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
|
||||||
else
|
|
||||||
nptr->nentry = 1;
|
|
||||||
nptr->ndoc = 1;
|
|
||||||
nptr->len = (*ptr)->len;
|
|
||||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
|
||||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||||
curptr += nptr->len;
|
curptr += nptr->len;
|
||||||
ptr++;
|
|
||||||
}
|
|
||||||
nptr++;
|
nptr++;
|
||||||
|
|
||||||
|
pfree(mark);
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
|
if ( PosSE < (StatEntry *) STATSTRPTR(stat) )
|
||||||
|
memcpy(nptr, PosSE, sizeof(StatEntry) * (stat->size - (PosSE - STATPTR(stat))));
|
||||||
while (ptr - entry < len)
|
|
||||||
{
|
|
||||||
if ((*ptr)->haspos)
|
|
||||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
|
||||||
else
|
|
||||||
nptr->nentry = 1;
|
|
||||||
nptr->ndoc = 1;
|
|
||||||
nptr->len = (*ptr)->len;
|
|
||||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
|
||||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
|
||||||
curptr += nptr->len;
|
|
||||||
ptr++;
|
|
||||||
nptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return newstat;
|
return newstat;
|
||||||
}
|
}
|
||||||
@ -907,12 +906,11 @@ ts_accum(tsstat *stat, Datum data)
|
|||||||
{
|
{
|
||||||
tsstat *newstat;
|
tsstat *newstat;
|
||||||
TSVector txt = DatumGetTSVector(data);
|
TSVector txt = DatumGetTSVector(data);
|
||||||
WordEntry **newentry = NULL;
|
|
||||||
uint32 len = 0,
|
|
||||||
cur = 0;
|
|
||||||
StatEntry *sptr;
|
StatEntry *sptr;
|
||||||
WordEntry *wptr;
|
WordEntry *wptr;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
|
List *newentries=NIL;
|
||||||
|
StatEntry *StopLow;
|
||||||
|
|
||||||
if (stat == NULL)
|
if (stat == NULL)
|
||||||
{ /* Init in first */
|
{ /* Init in first */
|
||||||
@ -932,63 +930,18 @@ ts_accum(tsstat *stat, Datum data)
|
|||||||
|
|
||||||
sptr = STATPTR(stat);
|
sptr = STATPTR(stat);
|
||||||
wptr = ARRPTR(txt);
|
wptr = ARRPTR(txt);
|
||||||
|
StopLow = STATPTR(stat);
|
||||||
if (stat->size < 100 * txt->size)
|
|
||||||
{ /* merge */
|
|
||||||
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
|
|
||||||
{
|
|
||||||
int cmp = compareStatWord(sptr, wptr, stat, txt);
|
|
||||||
|
|
||||||
if (cmp < 0)
|
|
||||||
sptr++;
|
|
||||||
else if (cmp == 0)
|
|
||||||
{
|
|
||||||
if (stat->weight == 0)
|
|
||||||
{
|
|
||||||
sptr->ndoc++;
|
|
||||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
|
||||||
}
|
|
||||||
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
|
||||||
{
|
|
||||||
sptr->ndoc++;
|
|
||||||
sptr->nentry += n;
|
|
||||||
}
|
|
||||||
sptr++;
|
|
||||||
wptr++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
|
||||||
{
|
|
||||||
if (cur == len)
|
|
||||||
newentry = SEI_realloc(newentry, &len);
|
|
||||||
newentry[cur] = wptr;
|
|
||||||
cur++;
|
|
||||||
}
|
|
||||||
wptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (wptr - ARRPTR(txt) < txt->size)
|
while (wptr - ARRPTR(txt) < txt->size)
|
||||||
{
|
{
|
||||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
|
||||||
{
|
|
||||||
if (cur == len)
|
|
||||||
newentry = SEI_realloc(newentry, &len);
|
|
||||||
newentry[cur] = wptr;
|
|
||||||
cur++;
|
|
||||||
}
|
|
||||||
wptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ /* search */
|
|
||||||
while (wptr - ARRPTR(txt) < txt->size)
|
|
||||||
{
|
|
||||||
StatEntry *StopLow = STATPTR(stat);
|
|
||||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||||
int cmp;
|
int cmp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We do not set StopLow to begin of array because tsvector is ordered
|
||||||
|
* with the sames rule, so we can search from last stopped position
|
||||||
|
*/
|
||||||
|
|
||||||
while (StopLow < StopHigh)
|
while (StopLow < StopHigh)
|
||||||
{
|
{
|
||||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||||
@ -1017,26 +970,26 @@ ts_accum(tsstat *stat, Datum data)
|
|||||||
{ /* not found */
|
{ /* not found */
|
||||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||||
{
|
{
|
||||||
if (cur == len)
|
WordEntryMark *mark = (WordEntryMark*)palloc(sizeof(WordEntryMark));
|
||||||
newentry = SEI_realloc(newentry, &len);
|
|
||||||
newentry[cur] = wptr;
|
mark->newentry = wptr;
|
||||||
cur++;
|
mark->pos = StopLow;
|
||||||
|
newentries = lappend( newentries, mark );
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wptr++;
|
wptr++;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
if (list_length(newentries) == 0)
|
||||||
if (cur == 0)
|
|
||||||
{ /* no new words */
|
{ /* no new words */
|
||||||
if (txt != (TSVector) DatumGetPointer(data))
|
if (txt != (TSVector) DatumGetPointer(data))
|
||||||
pfree(txt);
|
pfree(txt);
|
||||||
return stat;
|
return stat;
|
||||||
}
|
}
|
||||||
|
|
||||||
newstat = formstat(stat, txt, newentry, cur);
|
newstat = formstat(stat, txt, newentries);
|
||||||
pfree(newentry);
|
list_free(newentries);
|
||||||
|
|
||||||
if (txt != (TSVector) DatumGetPointer(data))
|
if (txt != (TSVector) DatumGetPointer(data))
|
||||||
pfree(txt);
|
pfree(txt);
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
|
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.19 2008/05/12 00:00:53 alvherre Exp $
|
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.20 2008/05/16 16:31:01 tgl Exp $
|
||||||
*--------------------------------------------------------------------------
|
*--------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -15,6 +15,7 @@
|
|||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
#include "access/relscan.h"
|
#include "access/relscan.h"
|
||||||
#include "fmgr.h"
|
#include "fmgr.h"
|
||||||
|
#include "nodes/tidbitmap.h"
|
||||||
#include "storage/block.h"
|
#include "storage/block.h"
|
||||||
#include "storage/buf.h"
|
#include "storage/buf.h"
|
||||||
#include "storage/off.h"
|
#include "storage/off.h"
|
||||||
@ -28,7 +29,8 @@
|
|||||||
#define GIN_EXTRACTVALUE_PROC 2
|
#define GIN_EXTRACTVALUE_PROC 2
|
||||||
#define GIN_EXTRACTQUERY_PROC 3
|
#define GIN_EXTRACTQUERY_PROC 3
|
||||||
#define GIN_CONSISTENT_PROC 4
|
#define GIN_CONSISTENT_PROC 4
|
||||||
#define GINNProcs 4
|
#define GIN_COMPARE_PARTIAL_PROC 5
|
||||||
|
#define GINNProcs 5
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page opaque data in a inverted index page.
|
* Page opaque data in a inverted index page.
|
||||||
@ -141,7 +143,10 @@ typedef struct GinState
|
|||||||
FmgrInfo extractValueFn;
|
FmgrInfo extractValueFn;
|
||||||
FmgrInfo extractQueryFn;
|
FmgrInfo extractQueryFn;
|
||||||
FmgrInfo consistentFn;
|
FmgrInfo consistentFn;
|
||||||
|
FmgrInfo comparePartialFn; /* optional method */
|
||||||
|
|
||||||
|
bool canPartialMatch; /* can opclass perform partial
|
||||||
|
* match (prefix search)? */
|
||||||
TupleDesc tupdesc;
|
TupleDesc tupdesc;
|
||||||
} GinState;
|
} GinState;
|
||||||
|
|
||||||
@ -360,6 +365,12 @@ typedef struct GinScanEntryData
|
|||||||
/* current ItemPointer to heap */
|
/* current ItemPointer to heap */
|
||||||
ItemPointerData curItem;
|
ItemPointerData curItem;
|
||||||
|
|
||||||
|
/* partial match support */
|
||||||
|
bool isPartialMatch;
|
||||||
|
TIDBitmap *partialMatch;
|
||||||
|
TBMIterateResult *partialMatchResult;
|
||||||
|
StrategyNumber strategy;
|
||||||
|
|
||||||
/* used for Posting list and one page in Posting tree */
|
/* used for Posting list and one page in Posting tree */
|
||||||
ItemPointerData *list;
|
ItemPointerData *list;
|
||||||
uint32 nlist;
|
uint32 nlist;
|
||||||
@ -424,6 +435,7 @@ extern PGDLLIMPORT int GinFuzzySearchLimit;
|
|||||||
|
|
||||||
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
|
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
|
||||||
extern Datum gingettuple(PG_FUNCTION_ARGS);
|
extern Datum gingettuple(PG_FUNCTION_ARGS);
|
||||||
|
extern void ginrestartentry(GinScanEntry entry);
|
||||||
|
|
||||||
/* ginvacuum.c */
|
/* ginvacuum.c */
|
||||||
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
|
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.459 2008/05/15 00:17:40 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.460 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -53,6 +53,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* yyyymmddN */
|
/* yyyymmddN */
|
||||||
#define CATALOG_VERSION_NO 200805141
|
#define CATALOG_VERSION_NO 200805161
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.55 2008/04/10 22:25:25 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.56 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* the genbki.sh script reads this file and generates .bki
|
* the genbki.sh script reads this file and generates .bki
|
||||||
@ -114,7 +114,7 @@ DESCR("hash index access method");
|
|||||||
DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
|
DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
|
||||||
DESCR("GiST index access method");
|
DESCR("GiST index access method");
|
||||||
#define GIST_AM_OID 783
|
#define GIST_AM_OID 783
|
||||||
DATA(insert OID = 2742 ( gin 0 4 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
|
DATA(insert OID = 2742 ( gin 0 5 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
|
||||||
DESCR("GIN index access method");
|
DESCR("GIN index access method");
|
||||||
#define GIN_AM_OID 2742
|
#define GIN_AM_OID 2742
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.71 2008/03/27 03:57:34 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.72 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* the genbki.sh script reads this file and generates .bki
|
* the genbki.sh script reads this file and generates .bki
|
||||||
@ -321,10 +321,11 @@ DATA(insert ( 2745 1025 1025 1 381 ));
|
|||||||
DATA(insert ( 2745 1025 1025 2 2743 ));
|
DATA(insert ( 2745 1025 1025 2 2743 ));
|
||||||
DATA(insert ( 2745 1025 1025 3 2774 ));
|
DATA(insert ( 2745 1025 1025 3 2774 ));
|
||||||
DATA(insert ( 2745 1025 1025 4 2744 ));
|
DATA(insert ( 2745 1025 1025 4 2744 ));
|
||||||
DATA(insert ( 3659 3614 3614 1 360 ));
|
DATA(insert ( 3659 3614 3614 1 3724 ));
|
||||||
DATA(insert ( 3659 3614 3614 2 3656 ));
|
DATA(insert ( 3659 3614 3614 2 3656 ));
|
||||||
DATA(insert ( 3659 3614 3614 3 3657 ));
|
DATA(insert ( 3659 3614 3614 3 3657 ));
|
||||||
DATA(insert ( 3659 3614 3614 4 3658 ));
|
DATA(insert ( 3659 3614 3614 4 3658 ));
|
||||||
|
DATA(insert ( 3659 3614 3614 5 2700 ));
|
||||||
DATA(insert ( 3626 3614 3614 1 3622 ));
|
DATA(insert ( 3626 3614 3614 1 3622 ));
|
||||||
DATA(insert ( 3683 3615 3615 1 3668 ));
|
DATA(insert ( 3683 3615 3615 1 3668 ));
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.499 2008/05/15 00:17:40 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.500 2008/05/16 16:31:01 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* The script catalog/genbki.sh reads this file and generates .bki
|
* The script catalog/genbki.sh reads this file and generates .bki
|
||||||
@ -4018,7 +4018,7 @@ DESCR("gin(internal)");
|
|||||||
/* GIN array support */
|
/* GIN array support */
|
||||||
DATA(insert OID = 2743 ( ginarrayextract PGNSP PGUID 12 1 0 f f t f i 2 2281 "2277 2281" _null_ _null_ _null_ ginarrayextract - _null_ _null_ ));
|
DATA(insert OID = 2743 ( ginarrayextract PGNSP PGUID 12 1 0 f f t f i 2 2281 "2277 2281" _null_ _null_ _null_ ginarrayextract - _null_ _null_ ));
|
||||||
DESCR("GIN array support");
|
DESCR("GIN array support");
|
||||||
DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 3 2281 "2277 2281 21" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
|
DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 4 2281 "2277 2281 21 2281" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
|
||||||
DESCR("GIN array support");
|
DESCR("GIN array support");
|
||||||
DATA(insert OID = 2744 ( ginarrayconsistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 2281 2281" _null_ _null_ _null_ ginarrayconsistent - _null_ _null_ ));
|
DATA(insert OID = 2744 ( ginarrayconsistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 2281 2281" _null_ _null_ _null_ ginarrayconsistent - _null_ _null_ ));
|
||||||
DESCR("GIN array support");
|
DESCR("GIN array support");
|
||||||
@ -4253,10 +4253,14 @@ DESCR("GiST tsvector support");
|
|||||||
|
|
||||||
DATA(insert OID = 3656 ( gin_extract_tsvector PGNSP PGUID 12 1 0 f f t f i 2 2281 "3614 2281" _null_ _null_ _null_ gin_extract_tsvector - _null_ _null_ ));
|
DATA(insert OID = 3656 ( gin_extract_tsvector PGNSP PGUID 12 1 0 f f t f i 2 2281 "3614 2281" _null_ _null_ _null_ gin_extract_tsvector - _null_ _null_ ));
|
||||||
DESCR("GIN tsvector support");
|
DESCR("GIN tsvector support");
|
||||||
DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 3 2281 "3615 2281 21" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
|
DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 4 2281 "3615 2281 21 2281" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
|
||||||
DESCR("GIN tsvector support");
|
DESCR("GIN tsvector support");
|
||||||
DATA(insert OID = 3658 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 3615 2281" _null_ _null_ _null_ gin_tsquery_consistent - _null_ _null_ ));
|
DATA(insert OID = 3658 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 3615 2281" _null_ _null_ _null_ gin_tsquery_consistent - _null_ _null_ ));
|
||||||
DESCR("GIN tsvector support");
|
DESCR("GIN tsvector support");
|
||||||
|
DATA(insert OID = 3724 ( gin_cmp_tslexeme PGNSP PGUID 12 1 0 f f t f i 2 23 "25 25" _null_ _null_ _null_ gin_cmp_tslexeme - _null_ _null_ ));
|
||||||
|
DESCR("GIN tsvector support");
|
||||||
|
DATA(insert OID = 2700 ( gin_cmp_prefix PGNSP PGUID 12 1 0 f f t f i 3 23 "25 25 21" _null_ _null_ _null_ gin_cmp_prefix - _null_ _null_ ));
|
||||||
|
DESCR("GIN tsvector support");
|
||||||
|
|
||||||
DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 f f t f i 2 16 "3615 3615" _null_ _null_ _null_ tsquery_lt - _null_ _null_ ));
|
DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 f f t f i 2 16 "3615 3615" _null_ _null_ _null_ tsquery_lt - _null_ _null_ ));
|
||||||
DESCR("less-than");
|
DESCR("less-than");
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.8 2008/01/01 19:45:59 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -99,6 +99,7 @@ typedef struct
|
|||||||
} TSLexeme;
|
} TSLexeme;
|
||||||
|
|
||||||
#define TSL_ADDPOS 0x01
|
#define TSL_ADDPOS 0x01
|
||||||
|
#define TSL_PREFIX 0x02
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct for supporting complex dictionaries like thesaurus.
|
* Struct for supporting complex dictionaries like thesaurus.
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.10 2008/01/01 19:45:59 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.11 2008/05/16 16:31:02 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -179,6 +179,7 @@ typedef struct
|
|||||||
* bitmask of allowed weights. if it =0 then
|
* bitmask of allowed weights. if it =0 then
|
||||||
* any weight are allowed. Weights and bit
|
* any weight are allowed. Weights and bit
|
||||||
* map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
|
* map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
|
||||||
|
bool prefix; /* true if it's a prefix search */
|
||||||
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
|
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
|
||||||
* data type, but we use comparisons to signed
|
* data type, but we use comparisons to signed
|
||||||
* integers in the code. They would need to be
|
* integers in the code. They would need to be
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.14 2008/04/21 00:26:47 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.15 2008/05/16 16:31:02 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -42,9 +42,10 @@ typedef struct TSQueryParserStateData *TSQueryParserState;
|
|||||||
|
|
||||||
typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
|
typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
|
||||||
char *token, int tokenlen,
|
char *token, int tokenlen,
|
||||||
int2 tokenweights /* bitmap as described
|
int2 tokenweights, /* bitmap as described
|
||||||
* in QueryOperand
|
* in QueryOperand
|
||||||
struct */ );
|
* struct */
|
||||||
|
bool prefix);
|
||||||
|
|
||||||
extern TSQuery parse_tsquery(char *buf,
|
extern TSQuery parse_tsquery(char *buf,
|
||||||
PushFunction pushval,
|
PushFunction pushval,
|
||||||
@ -52,7 +53,7 @@ extern TSQuery parse_tsquery(char *buf,
|
|||||||
|
|
||||||
/* Functions for use by PushFunction implementations */
|
/* Functions for use by PushFunction implementations */
|
||||||
extern void pushValue(TSQueryParserState state,
|
extern void pushValue(TSQueryParserState state,
|
||||||
char *strval, int lenval, int2 weight);
|
char *strval, int lenval, int2 weight, bool prefix);
|
||||||
extern void pushStop(TSQueryParserState state);
|
extern void pushStop(TSQueryParserState state);
|
||||||
extern void pushOperator(TSQueryParserState state, int8 operator);
|
extern void pushOperator(TSQueryParserState state, int8 operator);
|
||||||
|
|
||||||
@ -74,6 +75,7 @@ typedef struct
|
|||||||
*/
|
*/
|
||||||
uint16 *apos;
|
uint16 *apos;
|
||||||
} pos;
|
} pos;
|
||||||
|
uint16 flags; /* currently, only TSL_PREFIX */
|
||||||
char *word;
|
char *word;
|
||||||
uint32 alen;
|
uint32 alen;
|
||||||
} ParsedWord;
|
} ParsedWord;
|
||||||
@ -110,6 +112,7 @@ extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
|
|||||||
* to_ts* - text transformation to tsvector, tsquery
|
* to_ts* - text transformation to tsvector, tsquery
|
||||||
*/
|
*/
|
||||||
extern TSVector make_tsvector(ParsedText *prs);
|
extern TSVector make_tsvector(ParsedText *prs);
|
||||||
|
extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
|
||||||
|
|
||||||
extern Datum to_tsvector_byid(PG_FUNCTION_ARGS);
|
extern Datum to_tsvector_byid(PG_FUNCTION_ARGS);
|
||||||
extern Datum to_tsvector(PG_FUNCTION_ARGS);
|
extern Datum to_tsvector(PG_FUNCTION_ARGS);
|
||||||
@ -142,6 +145,8 @@ extern Datum gtsvectorout(PG_FUNCTION_ARGS);
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS);
|
||||||
|
extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS);
|
||||||
extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
||||||
extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS);
|
extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
@ -935,9 +935,11 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
|
|||||||
|
|
||||||
-- Detect missing pg_amproc entries: should have as many support functions
|
-- Detect missing pg_amproc entries: should have as many support functions
|
||||||
-- as AM expects for each datatype combination supported by the opfamily.
|
-- as AM expects for each datatype combination supported by the opfamily.
|
||||||
|
-- GIN is a special case because it has an optional support function.
|
||||||
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
||||||
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
||||||
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
||||||
|
p1.amname <> 'gin' AND
|
||||||
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
|
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
|
||||||
WHERE p4.amprocfamily = p2.oid AND
|
WHERE p4.amprocfamily = p2.oid AND
|
||||||
p4.amproclefttype = p3.amproclefttype AND
|
p4.amproclefttype = p3.amproclefttype AND
|
||||||
@ -946,18 +948,43 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
|||||||
--------+---------+----------------+-----------------
|
--------+---------+----------------+-----------------
|
||||||
(0 rows)
|
(0 rows)
|
||||||
|
|
||||||
|
-- Similar check for GIN, allowing one optional proc
|
||||||
|
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
||||||
|
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
||||||
|
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
||||||
|
p1.amname = 'gin' AND
|
||||||
|
p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4
|
||||||
|
WHERE p4.amprocfamily = p2.oid AND
|
||||||
|
p4.amproclefttype = p3.amproclefttype AND
|
||||||
|
p4.amprocrighttype = p3.amprocrighttype);
|
||||||
|
amname | opfname | amproclefttype | amprocrighttype
|
||||||
|
--------+---------+----------------+-----------------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
-- Also, check if there are any pg_opclass entries that don't seem to have
|
-- Also, check if there are any pg_opclass entries that don't seem to have
|
||||||
-- pg_amproc support.
|
-- pg_amproc support. Again, GIN has to be checked separately.
|
||||||
SELECT amname, opcname, count(*)
|
SELECT amname, opcname, count(*)
|
||||||
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
||||||
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
||||||
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
||||||
|
WHERE am.amname <> 'gin'
|
||||||
GROUP BY amname, amsupport, opcname, amprocfamily
|
GROUP BY amname, amsupport, opcname, amprocfamily
|
||||||
HAVING count(*) != amsupport OR amprocfamily IS NULL;
|
HAVING count(*) != amsupport OR amprocfamily IS NULL;
|
||||||
amname | opcname | count
|
amname | opcname | count
|
||||||
--------+---------+-------
|
--------+---------+-------
|
||||||
(0 rows)
|
(0 rows)
|
||||||
|
|
||||||
|
SELECT amname, opcname, count(*)
|
||||||
|
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
||||||
|
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
||||||
|
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
||||||
|
WHERE am.amname = 'gin'
|
||||||
|
GROUP BY amname, amsupport, opcname, amprocfamily
|
||||||
|
HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
|
||||||
|
amname | opcname | count
|
||||||
|
--------+---------+-------
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
-- Unfortunately, we can't check the amproc link very well because the
|
-- Unfortunately, we can't check the amproc link very well because the
|
||||||
-- signature of the function may be different for different support routines
|
-- signature of the function may be different for different support routines
|
||||||
-- or different base data types.
|
-- or different base data types.
|
||||||
|
@ -232,7 +232,7 @@ ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
|
|||||||
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
----------------------------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------------------------
|
||||||
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT to_tsquery('ispell_tst', 'footballklubber');
|
SELECT to_tsquery('ispell_tst', 'footballklubber');
|
||||||
@ -256,7 +256,7 @@ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
|
|||||||
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
----------------------------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------------------------
|
||||||
'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT to_tsquery('hunspell_tst', 'footballklubber');
|
SELECT to_tsquery('hunspell_tst', 'footballklubber');
|
||||||
@ -287,7 +287,7 @@ SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgs
|
|||||||
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
|
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8
|
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- test thesaurus in configuration
|
-- test thesaurus in configuration
|
||||||
@ -307,12 +307,12 @@ SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
|
|||||||
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
|
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
-------------------------------------------------------------
|
-------------------------------------------------------------
|
||||||
'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10
|
'abbrev':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
|
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
-------------------------------------------------------
|
-------------------------------------------------------
|
||||||
'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
|
'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
@ -92,6 +92,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|||||||
39
|
39
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
494
|
||||||
|
(1 row)
|
||||||
|
|
||||||
create index wowidx on test_tsvector using gist (a);
|
create index wowidx on test_tsvector using gist (a);
|
||||||
SET enable_seqscan=OFF;
|
SET enable_seqscan=OFF;
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
||||||
@ -130,6 +136,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|||||||
39
|
39
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
494
|
||||||
|
(1 row)
|
||||||
|
|
||||||
RESET enable_seqscan;
|
RESET enable_seqscan;
|
||||||
DROP INDEX wowidx;
|
DROP INDEX wowidx;
|
||||||
CREATE INDEX wowidx ON test_tsvector USING gin (a);
|
CREATE INDEX wowidx ON test_tsvector USING gin (a);
|
||||||
@ -170,6 +182,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
|||||||
39
|
39
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
494
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
||||||
RESET enable_seqscan;
|
RESET enable_seqscan;
|
||||||
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
||||||
@ -380,7 +398,7 @@ SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.
|
|||||||
<i <b> wow < jqw <> qwerty');
|
<i <b> wow < jqw <> qwerty');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
'ad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
|
'+4.0e-10':26 '-4.2':58,60 '/?ad=qwe&dw':7,10,14,22 '/?ad=qwe&dw=%20%32':25 '/awdf/dwqe/4325':46 '/usr/local/fff':45 '/wqe-324/ewr':49 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':61 '234.435':30 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':54,55,56 '455':31 '4aew.werc.ewr':15 '5.005':32 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100':24 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 'ad':17 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':37 'dw':19 'efd.r':3 'ewr1':43 'ewri2':44 'gist.c':52 'gist.h':50 'gist.h.c':51 'hjwer':42 'jf':39 'jqw':64 'qwe':2,18,27,28,35 'qwe-wer':34 'qwer':38 'qwerti':65 'qwqwe':29 'readlin':53,57,59 'rewt/ewr':47 'sdjk':40 'teodor@stack.net':33 'wefjn':48 'wer':36 'wow':63 'www.com':4
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
|
||||||
@ -852,7 +870,7 @@ SET default_text_search_config=simple;
|
|||||||
SELECT to_tsvector('SKIES My booKs');
|
SELECT to_tsvector('SKIES My booKs');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
----------------------------
|
----------------------------
|
||||||
'my':2 'books':3 'skies':1
|
'books':3 'my':2 'skies':1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT plainto_tsquery('SKIES My booKs');
|
SELECT plainto_tsquery('SKIES My booKs');
|
||||||
@ -871,7 +889,7 @@ SET default_text_search_config=english;
|
|||||||
SELECT to_tsvector('SKIES My booKs');
|
SELECT to_tsvector('SKIES My booKs');
|
||||||
to_tsvector
|
to_tsvector
|
||||||
------------------
|
------------------
|
||||||
'sky':1 'book':3
|
'book':3 'sky':1
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT plainto_tsquery('SKIES My booKs');
|
SELECT plainto_tsquery('SKIES My booKs');
|
||||||
|
@ -44,31 +44,31 @@ SELECT E'''1 \\''2'''::tsvector;
|
|||||||
SELECT E'''1 \\''2''3'::tsvector;
|
SELECT E'''1 \\''2''3'::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
-------------
|
-------------
|
||||||
'3' '1 ''2'
|
'1 ''2' '3'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT E'''1 \\''2'' 3'::tsvector;
|
SELECT E'''1 \\''2'' 3'::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
-------------
|
-------------
|
||||||
'3' '1 ''2'
|
'1 ''2' '3'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector;
|
SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
------------------
|
------------------
|
||||||
'4' ' 3' '1 ''2'
|
' 3' '1 ''2' '4'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
|
SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
|
||||||
tsvector
|
tsvector
|
||||||
----------------------------------------
|
----------------------------------------
|
||||||
'\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
|
'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
|
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
|
||||||
tsvectorin
|
tsvectorin
|
||||||
----------------------------------------
|
----------------------------------------
|
||||||
'\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
|
'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
SELECT '''w'':4A,3B,2C,1D,5 a:8';
|
||||||
@ -86,13 +86,13 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
|
|||||||
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
||||||
setweight
|
setweight
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
|
'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
|
||||||
strip
|
strip
|
||||||
---------------
|
---------------
|
||||||
'a' 'w' 'asd'
|
'a' 'asd' 'w'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
--Base tsquery test
|
--Base tsquery test
|
||||||
@ -336,6 +336,12 @@ SELECT $$'\\as'$$::tsquery;
|
|||||||
'\\as'
|
'\\as'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
|
||||||
|
tsquery
|
||||||
|
------------------------------------------
|
||||||
|
( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT 'a' < 'b & c'::tsquery as "true";
|
SELECT 'a' < 'b & c'::tsquery as "true";
|
||||||
true
|
true
|
||||||
------
|
------
|
||||||
@ -439,12 +445,96 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
|
|||||||
t
|
t
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
|
||||||
|
false
|
||||||
|
-------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
false
|
||||||
|
-------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
false
|
||||||
|
-------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
false
|
||||||
|
-------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
true
|
||||||
|
------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
|
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
|
||||||
ts_rank
|
ts_rank
|
||||||
-----------
|
-----------
|
||||||
0.0911891
|
0.0911891
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
|
||||||
|
ts_rank
|
||||||
|
-----------
|
||||||
|
0.0303964
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
|
||||||
|
ts_rank
|
||||||
|
-----------
|
||||||
|
0.0911891
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
|
||||||
|
ts_rank
|
||||||
|
-----------
|
||||||
|
0.0911891
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
|
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
|
||||||
ts_rank
|
ts_rank
|
||||||
----------
|
----------
|
||||||
@ -481,6 +571,30 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
|
|||||||
0.3
|
0.3
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
|
||||||
|
ts_rank_cd
|
||||||
|
------------
|
||||||
|
0.1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
|
||||||
|
ts_rank_cd
|
||||||
|
------------
|
||||||
|
0.3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
|
||||||
|
ts_rank_cd
|
||||||
|
------------
|
||||||
|
0.3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
|
||||||
|
ts_rank_cd
|
||||||
|
------------
|
||||||
|
0.5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
|
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
|
||||||
ts_rank_cd
|
ts_rank_cd
|
||||||
------------
|
------------
|
||||||
|
@ -746,25 +746,47 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
|
|||||||
|
|
||||||
-- Detect missing pg_amproc entries: should have as many support functions
|
-- Detect missing pg_amproc entries: should have as many support functions
|
||||||
-- as AM expects for each datatype combination supported by the opfamily.
|
-- as AM expects for each datatype combination supported by the opfamily.
|
||||||
|
-- GIN is a special case because it has an optional support function.
|
||||||
|
|
||||||
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
||||||
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
||||||
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
||||||
|
p1.amname <> 'gin' AND
|
||||||
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
|
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
|
||||||
WHERE p4.amprocfamily = p2.oid AND
|
WHERE p4.amprocfamily = p2.oid AND
|
||||||
p4.amproclefttype = p3.amproclefttype AND
|
p4.amproclefttype = p3.amproclefttype AND
|
||||||
p4.amprocrighttype = p3.amprocrighttype);
|
p4.amprocrighttype = p3.amprocrighttype);
|
||||||
|
|
||||||
|
-- Similar check for GIN, allowing one optional proc
|
||||||
|
|
||||||
|
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
|
||||||
|
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
|
||||||
|
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
|
||||||
|
p1.amname = 'gin' AND
|
||||||
|
p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4
|
||||||
|
WHERE p4.amprocfamily = p2.oid AND
|
||||||
|
p4.amproclefttype = p3.amproclefttype AND
|
||||||
|
p4.amprocrighttype = p3.amprocrighttype);
|
||||||
|
|
||||||
-- Also, check if there are any pg_opclass entries that don't seem to have
|
-- Also, check if there are any pg_opclass entries that don't seem to have
|
||||||
-- pg_amproc support.
|
-- pg_amproc support. Again, GIN has to be checked separately.
|
||||||
|
|
||||||
SELECT amname, opcname, count(*)
|
SELECT amname, opcname, count(*)
|
||||||
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
||||||
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
||||||
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
||||||
|
WHERE am.amname <> 'gin'
|
||||||
GROUP BY amname, amsupport, opcname, amprocfamily
|
GROUP BY amname, amsupport, opcname, amprocfamily
|
||||||
HAVING count(*) != amsupport OR amprocfamily IS NULL;
|
HAVING count(*) != amsupport OR amprocfamily IS NULL;
|
||||||
|
|
||||||
|
SELECT amname, opcname, count(*)
|
||||||
|
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
|
||||||
|
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
|
||||||
|
amproclefttype = amprocrighttype AND amproclefttype = opcintype
|
||||||
|
WHERE am.amname = 'gin'
|
||||||
|
GROUP BY amname, amsupport, opcname, amprocfamily
|
||||||
|
HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
|
||||||
|
|
||||||
-- Unfortunately, we can't check the amproc link very well because the
|
-- Unfortunately, we can't check the amproc link very well because the
|
||||||
-- signature of the function may be different for different support routines
|
-- signature of the function may be different for different support routines
|
||||||
-- or different base data types.
|
-- or different base data types.
|
||||||
|
@ -47,6 +47,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|||||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
|
||||||
create index wowidx on test_tsvector using gist (a);
|
create index wowidx on test_tsvector using gist (a);
|
||||||
|
|
||||||
@ -58,6 +59,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|||||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
|
||||||
RESET enable_seqscan;
|
RESET enable_seqscan;
|
||||||
|
|
||||||
@ -73,6 +75,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
|||||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||||
|
SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
|
||||||
|
|
||||||
RESET enable_seqscan;
|
RESET enable_seqscan;
|
||||||
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
|
||||||
|
@ -58,6 +58,7 @@ SELECT '1&(2&(4&(5|6)))'::tsquery;
|
|||||||
SELECT '1&(2&(4&(5|!6)))'::tsquery;
|
SELECT '1&(2&(4&(5|!6)))'::tsquery;
|
||||||
SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
|
SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
|
||||||
SELECT $$'\\as'$$::tsquery;
|
SELECT $$'\\as'$$::tsquery;
|
||||||
|
SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
|
||||||
|
|
||||||
SELECT 'a' < 'b & c'::tsquery as "true";
|
SELECT 'a' < 'b & c'::tsquery as "true";
|
||||||
SELECT 'a' > 'b & c'::tsquery as "false";
|
SELECT 'a' > 'b & c'::tsquery as "false";
|
||||||
@ -81,8 +82,23 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B' as "true";
|
|||||||
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true";
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true";
|
||||||
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false";
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false";
|
||||||
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
|
||||||
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
|
||||||
|
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
|
||||||
|
SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
|
||||||
|
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
|
||||||
|
SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
|
||||||
|
|
||||||
|
SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
|
||||||
|
SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
|
||||||
|
|
||||||
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
|
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
|
||||||
|
SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
|
||||||
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
|
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
|
||||||
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s');
|
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s');
|
||||||
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s');
|
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s');
|
||||||
@ -90,6 +106,10 @@ SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a & s');
|
|||||||
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s');
|
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s');
|
||||||
|
|
||||||
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
|
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
|
||||||
|
SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
|
||||||
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
|
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
|
||||||
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
|
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
|
||||||
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
|
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user