Now ispell dictionary can eat dictionaries in MySpell format,
used by OpenOffice. Dictionaries are placed at http://lingucomponent.openoffice.org/spell_dic.html Dictionary automatically recognizes format of files. Warning. MySpell's format has limitation with compound word support: it's impossible to mark affix as compound-only affix. So for norwegian, german etc languages it's recommended to use original ispell format. For that reason I don't want to remove my2ispell scripts, it's has workaround at least for norwegian language.
This commit is contained in:
parent
1a1326d64d
commit
04e9704b9e
@ -391,6 +391,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
char flagflags = 0;
|
||||
FILE *affix;
|
||||
int line=0;
|
||||
int oldformat = 0;
|
||||
|
||||
if (!(affix = fopen(filename, "r")))
|
||||
return (1);
|
||||
@ -412,6 +413,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
while (*s && t_isspace(s)) s++;
|
||||
if ( *s && pg_mblen(s) == 1 )
|
||||
Conf->compoundcontrol = *s;
|
||||
oldformat++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -419,12 +421,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
{
|
||||
suffixes = 1;
|
||||
prefixes = 0;
|
||||
oldformat++;
|
||||
continue;
|
||||
}
|
||||
if (STRNCMP(tmpstr, "prefixes") == 0)
|
||||
{
|
||||
suffixes = 0;
|
||||
prefixes = 1;
|
||||
oldformat++;
|
||||
continue;
|
||||
}
|
||||
if (STRNCMP(tmpstr, "flag") == 0)
|
||||
@ -433,10 +437,11 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
flagflags = 0;
|
||||
|
||||
while (*s && t_isspace(s)) s++;
|
||||
oldformat++;
|
||||
|
||||
/* allow only single-encoded flags */
|
||||
if ( pg_mblen(s) != 1 )
|
||||
continue;
|
||||
if ( pg_mblen(s) != 1 )
|
||||
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
|
||||
|
||||
if (*s == '*')
|
||||
{
|
||||
@ -455,12 +460,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
/* allow only single-encoded flags */
|
||||
if ( pg_mblen(s) != 1 ) {
|
||||
flagflags = 0;
|
||||
continue;
|
||||
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
|
||||
}
|
||||
|
||||
flag = (unsigned char) *s;
|
||||
continue;
|
||||
}
|
||||
if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
|
||||
STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) {
|
||||
|
||||
if ( oldformat )
|
||||
elog(ERROR,"Wrong affix file format");
|
||||
|
||||
fclose(affix);
|
||||
return NIImportOOAffixes(Conf, filename);
|
||||
|
||||
}
|
||||
if ((!suffixes) && (!prefixes))
|
||||
continue;
|
||||
|
||||
@ -475,6 +490,79 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
NIImportOOAffixes(IspellDict * Conf, const char *filename) {
|
||||
char str[BUFSIZ];
|
||||
char type[BUFSIZ];
|
||||
char sflag[BUFSIZ];
|
||||
char mask[BUFSIZ];
|
||||
char find[BUFSIZ];
|
||||
char repl[BUFSIZ];
|
||||
bool isSuffix = false;
|
||||
int flag = 0;
|
||||
char flagflags = 0;
|
||||
FILE *affix;
|
||||
int line=0;
|
||||
int scanread = 0;
|
||||
char scanbuf[BUFSIZ];
|
||||
|
||||
sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5);
|
||||
|
||||
if (!(affix = fopen(filename, "r")))
|
||||
return (1);
|
||||
Conf->compoundcontrol = '\t';
|
||||
|
||||
while (fgets(str, sizeof(str), affix))
|
||||
{
|
||||
line++;
|
||||
if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') )
|
||||
continue;
|
||||
pg_verifymbstr( str, strlen(str), false);
|
||||
|
||||
if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) {
|
||||
char *s = str+strlen("COMPOUNDFLAG");
|
||||
while (*s && t_isspace(s)) s++;
|
||||
if ( *s && pg_mblen(s) == 1 )
|
||||
Conf->compoundcontrol = *s;
|
||||
continue;
|
||||
}
|
||||
|
||||
scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
|
||||
|
||||
lowerstr(type);
|
||||
if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) )
|
||||
continue;
|
||||
|
||||
if ( scanread == 4 ) {
|
||||
if ( strlen(sflag) != 1 )
|
||||
continue;
|
||||
flag = *sflag;
|
||||
isSuffix = (STRNCMP(type,"sfx")==0) ? true : false;
|
||||
lowerstr(find);
|
||||
if ( t_iseq(find,'y') )
|
||||
flagflags |= FF_CROSSPRODUCT;
|
||||
else
|
||||
flagflags = 0;
|
||||
} else {
|
||||
if ( strlen(sflag) != 1 || flag != *sflag || flag==0 )
|
||||
continue;
|
||||
lowerstr(repl);
|
||||
lowerstr(find);
|
||||
lowerstr(mask);
|
||||
if ( t_iseq(find,'0') )
|
||||
*find = '\0';
|
||||
if ( t_iseq(repl,'0') )
|
||||
*repl = '\0';
|
||||
|
||||
NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(affix);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
MergeAffix(IspellDict * Conf, int a1, int a2)
|
||||
{
|
||||
|
@ -121,6 +121,7 @@ typedef struct
|
||||
|
||||
TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
|
||||
int NIImportAffixes(IspellDict * Conf, const char *filename);
|
||||
int NIImportOOAffixes(IspellDict * Conf, const char *filename);
|
||||
int NIImportDictionary(IspellDict * Conf, const char *filename);
|
||||
|
||||
int NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
|
||||
|
Loading…
x
Reference in New Issue
Block a user