Just a few minutes after checking in, I mentioned it to Japanese expert

Koki and he suggested also including the Japanese comma. So before I forget to do it... git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@15195 a95241bf-73f2-0310-859d-f6bbb57e9c96
2005-11-28 02:07:55 +00:00 · 2005-11-28 02:07:55 +00:00 · 968748e669
commit 968748e669
parent 45bfb95c2a
1 changed files with 8 additions and 3 deletions
--- a/src/bin/bemail_utils/spamdbm.cpp
+++ b/src/bin/bemail_utils/spamdbm.cpp
@ -2351,7 +2351,7 @@ static size_t TokenizerPassRemoveHTMLStyle (

 /* Convert Japanese periods (a round hollow dot symbol) to spaces so that the
 start of the next sentence is recognised at least as the start of a very long
-word. */
+word.  The Japanese comma also does the same job. */

 static size_t TokenizerPassJapanesePeriodsToSpaces (
  char *BufferPntr,
@ -2359,8 +2359,13 @@ static size_t TokenizerPassJapanesePeriodsToSpaces (
  char PrefixCharacter,
  set<string> &WordSet)
 {
-  return TokenizerUtilRemoveStartEndThing (BufferPntr,
-    NumberOfBytes, PrefixCharacter, WordSet, "。", "", true);
+  size_t BytesRemaining = NumberOfBytes;
+
+  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
+    BytesRemaining, PrefixCharacter, WordSet, "。" /* period */, "", true);
+  BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
+    BytesRemaining, PrefixCharacter, WordSet, "、" /* comma */, "", true);
+  return BytesRemaining;
 }