Fixing and improve compound word support. This changes cannot be applied to

previous version iwthout recreating tsvector fields... Thanks to Alexander Presber <aljoscha@weisshuhn.de> to discover a problem.
2006-02-20 17:51:05 +00:00 · 2006-02-20 17:51:05 +00:00 · dde9457294
commit dde9457294
parent 21e2544aa7
1 changed files with 77 additions and 58 deletions
--- a/contrib/tsearch2/ispell/spell.c
+++ b/contrib/tsearch2/ispell/spell.c
@ -737,9 +737,9 @@ NISortAffixes(IspellDict * Conf)
 		{
 			if (firstsuffix < 0)
 				firstsuffix = i;
-			if (Affix->flagflags & FF_COMPOUNDONLYAFX)
+			if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
 			{
-				if (!ptr->affix ||
+				if (ptr == Conf->CompoundAffix ||
 					strbncmp((const unsigned char *) (ptr - 1)->affix,
 							 (const unsigned char *) Affix->repl,
 							 (ptr - 1)->len))
@ -1024,8 +1024,9 @@ typedef struct SplitVar
 }	SplitVar;

 static int
-CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
+CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
 {
+	if ( CheckInPlace ) {
 		while ((*ptr)->affix)
 		{
 			if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
@ -1036,6 +1037,19 @@ CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len)
 			}
 			(*ptr)++;
 		}
+	} else {
+		char *affbegin;
+		while ((*ptr)->affix)
+		{
+			if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
+			{
+				len = (*ptr)->len + (affbegin-word);
+				(*ptr)++;
+				return len;
+			}
+			(*ptr)++;
+		}
+	}
 	return 0;
 }

@ -1078,26 +1092,11 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
 	memset(notprobed, 1, wordlen);
 	var = CopyVar(orig, 1);

-	while (node && level < wordlen)
+	while (level < wordlen)
 	{
-		StopLow = node->data;
-		StopHigh = node->data + node->length;
-		while (StopLow < StopHigh)
-		{
-			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
-			if (StopMiddle->val == ((uint8 *) (word))[level])
-				break;
-			else if (StopMiddle->val < ((uint8 *) (word))[level])
-				StopLow = StopMiddle + 1;
-			else
-				StopHigh = StopMiddle;
-		}
-		if (StopLow >= StopHigh)
-			break;
-
-		/* find word with epenthetic */
+		/* find word with epenthetic or/and compound suffix */
 		caff = Conf->CompoundAffix;
-		while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level)) > 0)
+		while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) > 0)
 		{
 			/*
 			 * there is one of compound suffixes, so check word for existings
@ -1143,6 +1142,24 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
 			}
 		}

+		if ( !node )
+			break; 
+
+		StopLow = node->data;
+		StopHigh = node->data + node->length;
+		while (StopLow < StopHigh)
+		{
+			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+			if (StopMiddle->val == ((uint8 *) (word))[level])
+				break;
+			else if (StopMiddle->val < ((uint8 *) (word))[level])
+				StopLow = StopMiddle + 1;
+			else
+				StopHigh = StopMiddle;
+		}
+
+		if (StopLow < StopHigh) {
+
 			/* find infinitive */
 			if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
 			{
@ -1176,8 +1193,10 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
 					}
 				}
 			}
-		level++;
 			node = StopMiddle->node;
+		} else
+			node = NULL;  
+		level++;
 	}

 	var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos);