Improve headeline generation. Now headline can contain

several fragments a-la Google. Sushant Sinha <sushant354@gmail.com>
2008-10-17 18:05:19 +00:00 · 2008-10-17 18:05:19 +00:00 · 2a0083ede8
commit 2a0083ede8
parent 906b7e5f6c
6 changed files with 518 additions and 63 deletions
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.45 2008/09/23 09:20:34 heikki Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.46 2008/10/17 18:05:19 teodor Exp $ -->
 <chapter id="textsearch">
 <title id="textsearch-title">Full Text Search</title>
@ -1098,6 +1098,29 @@ ORDER BY rank DESC LIMIT 10;
       value of three eliminates the English articles.
      </para>
     </listitem>
     <listitem>
      <para>
       <literal>MaxFragments</literal>: maximum number of text excerpts 
       or fragments that matches the query words. It also triggers a 
       different headline generation function than the default one. This
       function finds text fragments with as many query words as possible and 
       stretches  those fragments around the query words. As a result 
       query words are close to the middle of each fragment and have words on 
       each side. Each fragment will be of at  most MaxWords and will not 
       have words of size less than or equal to ShortWord at the start or 
       end of a  fragment. If all query words are not found in the document, 
       then  a single fragment of MinWords will be displayed.
      </para>
     </listitem>
     <listitem>
      <para>
       <literal>FragmentDelimiter</literal>: When more than one fragments are 
       displayed, then the fragments will be separated by this delimiter. This 
       option is effective only if MaxFragments is greater than 1 and there are
       more than one fragments to be diplayed. This option has no effect on the
       default headline generation function.
      </para>
     </listitem>
     <listitem>
      <para>
       <literal>HighlightAll</literal>: Boolean flag;  if
@ -1109,7 +1132,7 @@ ORDER BY rank DESC LIMIT 10;
    Any unspecified options receive these defaults:
 <programlisting>
-StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxWords=35, MinWords=15, ShortWord=3, HighlightAll=FALSE
+StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxFragments=0, FragmentDelimiter=" ... ", MaxWords=35, MinWords=15, ShortWord=3, HighlightAll=FALSE
 </programlisting>
   </para>
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.8 2008/05/16 16:31:01 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.9 2008/10/17 18:05:19 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -583,8 +583,11 @@ text *
 generateHeadline(HeadlineParsedText *prs)
 {
 	text	   *out;
 	int			len = 128;
 	char	   *ptr;
 	int			len          = 128;
 	int	   		numfragments = 0;
 	int2	   		infrag       = 0;
 	HeadlineWordEntry *wrd = prs->words;
 	out = (text *) palloc(len);
@ -592,7 +595,7 @@ generateHeadline(HeadlineParsedText *prs)
 	while (wrd - prs->words < prs->curwords)
 	{
-		while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
+		while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
 		{
 			int			dist = ptr - ((char *) out);
@ -603,6 +606,20 @@ generateHeadline(HeadlineParsedText *prs)
 		if (wrd->in && !wrd->repeated)
 		{
 			if (!infrag)
 			{
 				/* start of a new fragment */
 				infrag = 1;
 				numfragments ++;
 				/* add a fragment delimitor if this is after the first one */ 
 				if (numfragments > 1)
 				{
 					memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
 					ptr += prs->fragdelimlen;
 				}	
 			}	
 			if (wrd->replace)
 			{
 				*ptr = ' ';
@ -625,7 +642,11 @@ generateHeadline(HeadlineParsedText *prs)
 			}
 		}
 		else if (!wrd->repeated)
 		{
 			if (infrag)
 				infrag = 0;
 			pfree(wrd->word);
 		}	
 		wrd++;
 	}
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.16 2008/10/17 17:27:46 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.17 2008/10/17 18:05:19 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -1684,18 +1684,247 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
 	return false;
 }
-Datum
+static void 
-prsd_headline(PG_FUNCTION_ARGS)
+mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
 {
-	HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
+	int   i;
 	List	   *prsoptions = (List *) PG_GETARG_POINTER(1);
 	TSQuery		query = PG_GETARG_TSQUERY(2);
-	/* from opt + start and and tag */
+	for (i = startpos; i <= endpos; i++)
-	int			min_words = 15;
+	{
-	int			max_words = 35;
+		if (prs->words[i].item)
-	int			shortword = 3;
+			prs->words[i].selected = 1;
 		if (highlight == 0)
 		{
 			if (HLIDIGNORE(prs->words[i].type))
 				prs->words[i].replace = 1;
 		}
 		else
 		{
 			if (XMLHLIDIGNORE(prs->words[i].type))
 				prs->words[i].replace = 1;
 		}
 		prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
 	}
 }
 typedef struct 
 {
 	int4 startpos;
 	int4 endpos;
 	int4 poslen;
 	int4 curlen;
 	int2 in;
 	int2 excluded;
 } CoverPos;
 static void 
 get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
 			int *curlen, int *poslen, int max_words)
 {
 	int i;
 	/* Objective: Generate a fragment of words between startpos and endpos 
 	 * such that it has at most max_words and both ends has query words. 
 	 * If the startpos and endpos are the endpoints of the cover and the 
 	 * cover has fewer words than max_words, then this function should 
 	 * just return the cover 
 	 */
 	/* first move startpos to an item */
 	for(i = *startpos; i <= *endpos; i++)
 	{
 		*startpos = i;
 		if (prs->words[i].item && !prs->words[i].repeated)
 			break;
 	}
 	/* cut endpos to have only max_words */
 	*curlen = 0;
 	*poslen = 0;
 	for(i = *startpos; i <= *endpos && *curlen < max_words; i++) 
 	{
 		if (!NONWORDTOKEN(prs->words[i].type))
 			*curlen += 1;
 		if (prs->words[i].item && !prs->words[i].repeated)
 			*poslen += 1;
 	}
 	/* if the cover was cut then move back endpos to a query item */ 		
 	if (*endpos > i)
 	{
 		*endpos = i;
 		for(i = *endpos; i >= *startpos; i --)
 		{
 			*endpos = i;
 			if (prs->words[i].item && !prs->words[i].repeated)
 				break;
 			if (!NONWORDTOKEN(prs->words[i].type))
 				*curlen -= 1;
 		}		
 	}	
 }
 static void
 mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
                        int shortword, int min_words, 
 			int max_words, int max_fragments)
 {
 	int4           	poslen, curlen, i, f, num_f = 0;
 	int4		stretch, maxstretch, posmarker;
 	int4           	startpos = 0, 
 			endpos   = 0,
 			p        = 0,
 			q        = 0;
 	int4		numcovers = 0, 
 			maxcovers = 32;
 	int4          	minI, minwords, maxitems;
 	CoverPos	*covers;
 	covers = palloc(maxcovers * sizeof(CoverPos));
 	/* get all covers */
 	while (hlCover(prs, query, &p, &q))
 	{
 		startpos = p;
 		endpos   = q;
 		/* Break the cover into smaller fragments such that each fragment
 		 * has at most max_words. Also ensure that each end of the fragment
 		 * is a query word. This will allow us to stretch the fragment in 
 		 * either direction
 		 */
 		while (startpos <= endpos)
 		{
 			get_next_fragment(prs, &startpos, &endpos, &curlen, &poslen, max_words);
 			if (numcovers >= maxcovers)
 			{
 				maxcovers *= 2;
 				covers     = repalloc(covers, sizeof(CoverPos) * maxcovers);
 			}
 			covers[numcovers].startpos = startpos;
 			covers[numcovers].endpos   = endpos;
 			covers[numcovers].curlen   = curlen;
 			covers[numcovers].poslen   = poslen;
 			covers[numcovers].in       = 0;
 			covers[numcovers].excluded = 0;
 			numcovers ++;
 			startpos = endpos + 1;
 			endpos   = q;
 		}	
 		/* move p to generate the next cover */
 		p++;
 	}
 	/* choose best covers */
 	for (f = 0; f < max_fragments; f++)
 	{
 		maxitems = 0;
 		minwords = 0x7fffffff;
 		minI = -1;
 		/* Choose the cover that contains max items.
 		 * In case of tie choose the one with smaller 
 		 * number of words. 
 		 */
 		for (i = 0; i < numcovers; i ++)
 		{
 			if (!covers[i].in &&  !covers[i].excluded && 
  				(maxitems < covers[i].poslen || (maxitems == covers[i].poslen
 				&& minwords > covers[i].curlen)))
 			{
 				maxitems = covers[i].poslen;
 				minwords = covers[i].curlen;
 				minI     = i;
 			}
 		}
 		/* if a cover was found mark it */
 		if (minI >= 0)
 		{
 			covers[minI].in = 1;
 			/* adjust the size of cover */
 			startpos = covers[minI].startpos;
 			endpos   = covers[minI].endpos;
 			curlen   = covers[minI].curlen;
 			/* stretch the cover if cover size is lower than max_words */
 			if (curlen < max_words) 
 			{
 				/* divide the stretch on both sides of cover */
 				maxstretch = (max_words - curlen)/2;
 				/* first stretch the startpos 
 				 * stop stretching if 
 				 * 	1. we hit the beginning of document
 				 * 	2. exceed maxstretch
 				 * 	3. we hit an already marked fragment 
 				 */
 				stretch   = 0;
 				posmarker = startpos;
 				for (i = startpos - 1; i >= 0 && stretch < maxstretch && !prs->words[i].in; i--)
 				{
 					if (!NONWORDTOKEN(prs->words[i].type))
 					{
 						curlen  ++;
 						stretch ++;
 					}
 					posmarker = i;
 				}
 				/* cut back startpos till we find a non short token */
 				for (i = posmarker; i < startpos && (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword); i++)
 				{
 					if (!NONWORDTOKEN(prs->words[i].type))
 						curlen --;
 				}
 				startpos = i;
 				/* now stretch the endpos as much as possible*/
 				posmarker = endpos;
 				for (i = endpos + 1; i < prs->curwords && curlen < max_words && !prs->words[i].in; i++)
 				{
 					if (!NONWORDTOKEN(prs->words[i].type))
 						curlen  ++;
 					posmarker = i;	
 				}
 				/* cut back endpos till we find a non-short token */
 				for ( i = posmarker; i > endpos && (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword); i--)
 				{
 					if (!NONWORDTOKEN(prs->words[i].type))
 						curlen --;
 				}
 				endpos = i;
 			}
 			covers[minI].startpos = startpos;
 			covers[minI].endpos   = endpos;
 			covers[minI].curlen   = curlen;
 			/* Mark the chosen fragments (covers) */
 			mark_fragment(prs, highlight, startpos, endpos);
 			num_f ++;
 			/* exclude overlapping covers */
 			for (i = 0; i < numcovers; i ++)
 			{
 				if (i != minI && ( (covers[i].startpos >= covers[minI].startpos && covers[i].startpos <= covers[minI].endpos)  || (covers[i].endpos >= covers[minI].startpos && covers[i].endpos <= covers[minI].endpos))) 
 					covers[i].excluded = 1;
 			}
 		}
 		else
 			break;
 	}
 	/* show at least min_words we have not marked anything*/
 	if (num_f <= 0)
 	{
 		startpos = endpos = curlen = 0;
 		for (i = 0; i < prs->curwords && curlen < min_words; i++)
 		{
 			if (!NONWORDTOKEN(prs->words[i].type))
 				curlen++;
 			endpos = i;
 		}
 		mark_fragment(prs, highlight, startpos, endpos);
 	}
 	pfree(covers);
 }
 static void
 mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight, 
 		int shortword, int min_words, int max_words)
 {
 	int			p = 0,
 				q = 0;
 	int			bestb = -1,
@ -1707,56 +1936,9 @@ prsd_headline(PG_FUNCTION_ARGS)
 				curlen;
 	int			i;
 	int			highlight = 0;
 	ListCell   *l;
 	/* config */
 	prs->startsel = NULL;
 	prs->stopsel = NULL;
 	foreach(l, prsoptions)
 	{
 		DefElem    *defel = (DefElem *) lfirst(l);
 		char	   *val = defGetString(defel);
 		if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
 			max_words = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
 			min_words = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
 			shortword = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
 			prs->startsel = pstrdup(val);
 		else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
 			prs->stopsel = pstrdup(val);
 		else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
 			highlight = (pg_strcasecmp(val, "1") == 0 ||
 						 pg_strcasecmp(val, "on") == 0 ||
 						 pg_strcasecmp(val, "true") == 0 ||
 						 pg_strcasecmp(val, "t") == 0 ||
 						 pg_strcasecmp(val, "y") == 0 ||
 						 pg_strcasecmp(val, "yes") == 0);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized headline parameter: \"%s\"",
 							defel->defname)));
 	}
 	if (highlight == 0)
 	{
 		if (min_words >= max_words)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("MinWords should be less than MaxWords")));
 		if (min_words <= 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("MinWords should be positive")));
 		if (shortword < 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("ShortWord should be >= 0")));
 		while (hlCover(prs, query, &p, &q))
 		{
 			/* find cover len in words */
@ -1877,12 +2059,95 @@ prsd_headline(PG_FUNCTION_ARGS)
 		prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
 	}
 }
 Datum
 prsd_headline(PG_FUNCTION_ARGS)
 {
 	HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
 	List	   *prsoptions = (List *) PG_GETARG_POINTER(1);
 	TSQuery		query = PG_GETARG_TSQUERY(2);
 	/* from opt + start and and tag */
 	int			min_words     = 15;
 	int			max_words     = 35;
 	int			shortword     = 3;
 	int			max_fragments = 0;
 	int			highlight     = 0;
 	ListCell   *l;
 	/* config */
 	prs->startsel = NULL;
 	prs->stopsel = NULL;
 	foreach(l, prsoptions)
 	{
 		DefElem    *defel = (DefElem *) lfirst(l);
 		char	   *val = defGetString(defel);
 		if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
 			max_words = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
 			min_words = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
 			shortword = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
 			max_fragments = pg_atoi(val, sizeof(int32), 0);
 		else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
 			prs->startsel = pstrdup(val);
 		else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
 			prs->stopsel = pstrdup(val);
 		else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
 			prs->fragdelim = pstrdup(val);
 		else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
 			highlight = (pg_strcasecmp(val, "1") == 0 ||
 						 pg_strcasecmp(val, "on") == 0 ||
 						 pg_strcasecmp(val, "true") == 0 ||
 						 pg_strcasecmp(val, "t") == 0 ||
 						 pg_strcasecmp(val, "y") == 0 ||
 						 pg_strcasecmp(val, "yes") == 0);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("unrecognized headline parameter: \"%s\"",
 							defel->defname)));
 	}
 	if (highlight == 0)
 	{
 		if (min_words >= max_words)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("MinWords should be less than MaxWords")));
 		if (min_words <= 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("MinWords should be positive")));
 		if (shortword < 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("ShortWord should be >= 0")));
 		if (max_fragments < 0)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("MaxFragments should be >= 0")));
 	}				 
 	if (max_fragments == 0)
 		/* call the default headline generator */
 		mark_hl_words(prs, query, highlight, shortword, min_words, max_words);
 	else
 		mark_hl_fragments(prs, query, highlight, shortword, min_words, max_words, max_fragments);
 	if (!prs->startsel)
 		prs->startsel = pstrdup("<b>");
 	if (!prs->stopsel)
 		prs->stopsel = pstrdup("</b>");
 	if (!prs->fragdelim)
 		prs->fragdelim = pstrdup(" ... ");
 	prs->startsellen = strlen(prs->startsel);
 	prs->stopsellen = strlen(prs->stopsel);
 	prs->fragdelimlen = strlen(prs->fragdelim);
 	PG_RETURN_POINTER(prs);
 }
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@ -6,7 +6,7 @@
 *
 * Copyright (c) 1998-2008, PostgreSQL Global Development Group
 *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.11 2008/10/17 18:05:19 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -52,8 +52,10 @@ typedef struct
 	int4		curwords;
 	char	   *startsel;
 	char	   *stopsel;
 	char	   *fragdelim;
 	int2		startsellen;
 	int2		stopsellen;
 	int2		fragdelimlen;         
 } HeadlineParsedText;
 /*
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@ -632,6 +632,98 @@ to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
 </html>
 (1 row)
 --Check if headline fragments work 
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'ocean'), 'MaxFragments=1');
            ts_headline             
 ------------------------------------
 after day,
   We stuck, nor breath nor motion,
 As idle as a painted Ship
   Upon a painted <b>Ocean</b>.
 Water, water, every where
   And all the boards did shrink;
 Water, water, every where,
   Nor any drop
 (1 row)
 --Check if more than one fragments are displayed
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2');
                 ts_headline                  
 ----------------------------------------------
 after day, day after day,
   We <b>stuck</b>, nor breath nor motion,
 As idle as a painted Ship
   Upon a painted Ocean.
 Water, water, every where
   And all the boards did shrink;
 Water, water, every where ... drop to drink.
 S. T. <b>Coleridge</b>
 (1 row)
 --Fragments when there all query words are not in the document
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'ocean & seahorse'), 'MaxFragments=1');
            ts_headline             
 ------------------------------------
 Day after day, day after day,
   We stuck, nor breath nor motion,
 As idle as
 (1 row)
 --FragmentDelimiter option
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2,FragmentDelimiter=***');
                ts_headline                 
 --------------------------------------------
 after day, day after day,
   We <b>stuck</b>, nor breath nor motion,
 As idle as a painted Ship
   Upon a painted Ocean.
 Water, water, every where
   And all the boards did shrink;
 Water, water, every where***drop to drink.
 S. T. <b>Coleridge</b>
 (1 row)
 --Rewrite sub system
 CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
 \set ECHO none
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@ -208,6 +208,58 @@ ff-bg
 </html>',
 to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
 --Check if headline fragments work 
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'ocean'), 'MaxFragments=1');
 --Check if more than one fragments are displayed
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2');
 --Fragments when there all query words are not in the document
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'ocean & seahorse'), 'MaxFragments=1');
 --FragmentDelimiter option
 SELECT ts_headline('english', '
 Day after day, day after day,
  We stuck, nor breath nor motion,
 As idle as a painted Ship
  Upon a painted Ocean.
 Water, water, every where
  And all the boards did shrink;
 Water, water, every where,
  Nor any drop to drink.
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'Coleridge & stuck'), 'MaxFragments=2,FragmentDelimiter=***');
 --Rewrite sub system
 CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);