Sync our Snowball stemmer dictionaries with current upstream
The main change is a new stemmer for Greek. There are minor changes in the Danish and French stemmers. Author: Panagiotis Mavrogiorgos <pmav99@gmail.com>
This commit is contained in:
parent
dedb6e0143
commit
7b925e1270
@ -3810,6 +3810,7 @@ Parser: "pg_catalog.default"
|
||||
pg_catalog | finnish_stem | snowball stemmer for finnish language
|
||||
pg_catalog | french_stem | snowball stemmer for french language
|
||||
pg_catalog | german_stem | snowball stemmer for german language
|
||||
pg_catalog | greek_stem | snowball stemmer for greek language
|
||||
pg_catalog | hungarian_stem | snowball stemmer for hungarian language
|
||||
pg_catalog | indonesian_stem | snowball stemmer for indonesian language
|
||||
pg_catalog | irish_stem | snowball stemmer for irish language
|
||||
|
@ -41,6 +41,7 @@ OBJS= $(WIN32RES) dict_snowball.o api.o utilities.o \
|
||||
stem_UTF_8_finnish.o \
|
||||
stem_UTF_8_french.o \
|
||||
stem_UTF_8_german.o \
|
||||
stem_UTF_8_greek.o \
|
||||
stem_UTF_8_hungarian.o \
|
||||
stem_UTF_8_indonesian.o \
|
||||
stem_UTF_8_irish.o \
|
||||
@ -69,6 +70,7 @@ LANGUAGES= \
|
||||
finnish finnish \
|
||||
french french \
|
||||
german german \
|
||||
greek greek \
|
||||
hungarian hungarian \
|
||||
indonesian indonesian \
|
||||
irish irish \
|
||||
|
@ -29,8 +29,8 @@ We choose to include the derived files in the PostgreSQL distribution
|
||||
because most installations will not have the Snowball compiler available.
|
||||
|
||||
We are currently synced with the Snowball git commit
|
||||
1964ce688cbeca505263c8f77e16ed923296ce7a
|
||||
of 2018-06-29.
|
||||
4456b82c26c02493e8807a66f30593a98c5d2888
|
||||
of 2019-06-24.
|
||||
|
||||
To update the PostgreSQL sources from a new Snowball version:
|
||||
|
||||
@ -57,7 +57,7 @@ do not require any changes.
|
||||
4. Check whether any stemmer modules have been added or removed. If so, edit
|
||||
the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the
|
||||
stemmer_modules[] table in dict_snowball.c. You might also need to change
|
||||
the LANGUAGES list in Makefile.
|
||||
the LANGUAGES list in Makefile and tsearch_config_languages in initdb.c.
|
||||
|
||||
5. The various stopword files in stopwords/ must be downloaded
|
||||
individually from pages on the snowballstem.org website.
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "snowball/libstemmer/stem_UTF_8_finnish.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_french.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_german.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_greek.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_indonesian.h"
|
||||
#include "snowball/libstemmer/stem_UTF_8_irish.h"
|
||||
@ -115,6 +116,7 @@ static const stemmer_module stemmer_modules[] =
|
||||
STEMMER_MODULE(finnish, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(french, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(german, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(greek, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(hungarian, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(indonesian, PG_UTF8, UTF_8),
|
||||
STEMMER_MODULE(irish, PG_UTF8, UTF_8),
|
||||
|
@ -124,6 +124,8 @@ static const struct among a_2[5] =
|
||||
/* 4 */ { 4, s_2_4, -1, 2, 0}
|
||||
};
|
||||
|
||||
static const unsigned char g_c[] = { 119, 223, 119, 1 };
|
||||
|
||||
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
@ -133,25 +135,25 @@ static const symbol s_1[] = { 'i', 'g' };
|
||||
static const symbol s_2[] = { 'l', 0xF8, 's' };
|
||||
|
||||
static int r_mark_regions(struct SN_env * z) { /* forwardmode */
|
||||
z->I[0] = z->l; /* $p1 = <integer expression>, line 31 */
|
||||
{ int c_test1 = z->c; /* test, line 33 */
|
||||
{ int ret = z->c + 3; /* hop, line 33 */
|
||||
z->I[0] = z->l; /* $p1 = <integer expression>, line 33 */
|
||||
{ int c_test1 = z->c; /* test, line 35 */
|
||||
{ int ret = z->c + 3; /* hop, line 35 */
|
||||
if (0 > ret || ret > z->l) return 0;
|
||||
z->c = ret;
|
||||
}
|
||||
z->I[1] = z->c; /* setmark x, line 33 */
|
||||
z->I[1] = z->c; /* setmark x, line 35 */
|
||||
z->c = c_test1;
|
||||
}
|
||||
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
|
||||
{ /* gopast */ /* non v, line 34 */
|
||||
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 36 */
|
||||
{ /* gopast */ /* non v, line 36 */
|
||||
int ret = in_grouping(z, g_v, 97, 248, 1);
|
||||
if (ret < 0) return 0;
|
||||
z->c += ret;
|
||||
}
|
||||
z->I[0] = z->c; /* setmark p1, line 34 */
|
||||
/* try, line 35 */
|
||||
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 35 */
|
||||
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 35 */
|
||||
z->I[0] = z->c; /* setmark p1, line 36 */
|
||||
/* try, line 37 */
|
||||
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 37 */
|
||||
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 37 */
|
||||
lab0:
|
||||
return 1;
|
||||
}
|
||||
@ -159,25 +161,25 @@ lab0:
|
||||
static int r_main_suffix(struct SN_env * z) { /* backwardmode */
|
||||
int among_var;
|
||||
|
||||
{ int mlimit1; /* setlimit, line 41 */
|
||||
{ int mlimit1; /* setlimit, line 43 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit1 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 41 */
|
||||
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 41 */
|
||||
z->ket = z->c; /* [, line 43 */
|
||||
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 43 */
|
||||
among_var = find_among_b(z, a_0, 32);
|
||||
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
||||
z->bra = z->c; /* ], line 41 */
|
||||
z->bra = z->c; /* ], line 43 */
|
||||
z->lb = mlimit1;
|
||||
}
|
||||
switch (among_var) { /* among, line 42 */
|
||||
switch (among_var) { /* among, line 44 */
|
||||
case 1:
|
||||
{ int ret = slice_del(z); /* delete, line 48 */
|
||||
{ int ret = slice_del(z); /* delete, line 50 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 50 */
|
||||
{ int ret = slice_del(z); /* delete, line 50 */
|
||||
if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 52 */
|
||||
{ int ret = slice_del(z); /* delete, line 52 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
@ -186,23 +188,23 @@ static int r_main_suffix(struct SN_env * z) { /* backwardmode */
|
||||
}
|
||||
|
||||
static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
|
||||
{ int m_test1 = z->l - z->c; /* test, line 55 */
|
||||
{ int m_test1 = z->l - z->c; /* test, line 57 */
|
||||
|
||||
{ int mlimit2; /* setlimit, line 56 */
|
||||
{ int mlimit2; /* setlimit, line 58 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit2 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 56 */
|
||||
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 56 */
|
||||
z->ket = z->c; /* [, line 58 */
|
||||
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 58 */
|
||||
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; }
|
||||
z->bra = z->c; /* ], line 56 */
|
||||
z->bra = z->c; /* ], line 58 */
|
||||
z->lb = mlimit2;
|
||||
}
|
||||
z->c = z->l - m_test1;
|
||||
}
|
||||
if (z->c <= z->lb) return 0;
|
||||
z->c--; /* next, line 62 */
|
||||
z->bra = z->c; /* ], line 62 */
|
||||
{ int ret = slice_del(z); /* delete, line 62 */
|
||||
z->c--; /* next, line 64 */
|
||||
z->bra = z->c; /* ], line 64 */
|
||||
{ int ret = slice_del(z); /* delete, line 64 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
return 1;
|
||||
@ -210,35 +212,35 @@ static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
int among_var;
|
||||
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
|
||||
z->ket = z->c; /* [, line 66 */
|
||||
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 66 */
|
||||
z->bra = z->c; /* ], line 66 */
|
||||
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 66 */
|
||||
{ int ret = slice_del(z); /* delete, line 66 */
|
||||
{ int m1 = z->l - z->c; (void)m1; /* do, line 68 */
|
||||
z->ket = z->c; /* [, line 68 */
|
||||
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 68 */
|
||||
z->bra = z->c; /* ], line 68 */
|
||||
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 68 */
|
||||
{ int ret = slice_del(z); /* delete, line 68 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = z->l - m1;
|
||||
}
|
||||
|
||||
{ int mlimit2; /* setlimit, line 67 */
|
||||
{ int mlimit2; /* setlimit, line 69 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit2 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 67 */
|
||||
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 67 */
|
||||
z->ket = z->c; /* [, line 69 */
|
||||
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 69 */
|
||||
among_var = find_among_b(z, a_2, 5);
|
||||
if (!(among_var)) { z->lb = mlimit2; return 0; }
|
||||
z->bra = z->c; /* ], line 67 */
|
||||
z->bra = z->c; /* ], line 69 */
|
||||
z->lb = mlimit2;
|
||||
}
|
||||
switch (among_var) { /* among, line 68 */
|
||||
switch (among_var) { /* among, line 70 */
|
||||
case 1:
|
||||
{ int ret = slice_del(z); /* delete, line 70 */
|
||||
{ int ret = slice_del(z); /* delete, line 72 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 70 */
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 72 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 72 */
|
||||
if (ret == 0) goto lab1;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
@ -247,7 +249,7 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */
|
||||
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
@ -257,60 +259,60 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
static int r_undouble(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
{ int mlimit1; /* setlimit, line 76 */
|
||||
{ int mlimit1; /* setlimit, line 78 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit1 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 76 */
|
||||
if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit1; return 0; } /* non v, line 76 */
|
||||
z->bra = z->c; /* ], line 76 */
|
||||
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
|
||||
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
|
||||
z->ket = z->c; /* [, line 78 */
|
||||
if (in_grouping_b(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } /* grouping c, line 78 */
|
||||
z->bra = z->c; /* ], line 78 */
|
||||
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 78 */
|
||||
if (z->S[0] == 0) return -1; /* -> ch, line 78 */
|
||||
z->lb = mlimit1;
|
||||
}
|
||||
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
|
||||
{ int ret = slice_del(z); /* delete, line 78 */
|
||||
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 79 */
|
||||
{ int ret = slice_del(z); /* delete, line 80 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int danish_ISO_8859_1_stem(struct SN_env * z) { /* forwardmode */
|
||||
{ int c1 = z->c; /* do, line 84 */
|
||||
{ int ret = r_mark_regions(z); /* call mark_regions, line 84 */
|
||||
{ int c1 = z->c; /* do, line 86 */
|
||||
{ int ret = r_mark_regions(z); /* call mark_regions, line 86 */
|
||||
if (ret == 0) goto lab0;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = c1;
|
||||
}
|
||||
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
|
||||
z->lb = z->c; z->c = z->l; /* backwards, line 87 */
|
||||
|
||||
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
|
||||
{ int ret = r_main_suffix(z); /* call main_suffix, line 86 */
|
||||
{ int m2 = z->l - z->c; (void)m2; /* do, line 88 */
|
||||
{ int ret = r_main_suffix(z); /* call main_suffix, line 88 */
|
||||
if (ret == 0) goto lab1;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab1:
|
||||
z->c = z->l - m2;
|
||||
}
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 87 */
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 89 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 89 */
|
||||
if (ret == 0) goto lab2;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab2:
|
||||
z->c = z->l - m3;
|
||||
}
|
||||
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
|
||||
{ int ret = r_other_suffix(z); /* call other_suffix, line 88 */
|
||||
{ int m4 = z->l - z->c; (void)m4; /* do, line 90 */
|
||||
{ int ret = r_other_suffix(z); /* call other_suffix, line 90 */
|
||||
if (ret == 0) goto lab3;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab3:
|
||||
z->c = z->l - m4;
|
||||
}
|
||||
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
|
||||
{ int ret = r_undouble(z); /* call undouble, line 89 */
|
||||
{ int m5 = z->l - z->c; (void)m5; /* do, line 91 */
|
||||
{ int ret = r_undouble(z); /* call undouble, line 91 */
|
||||
if (ret == 0) goto lab4;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -124,6 +124,8 @@ static const struct among a_2[5] =
|
||||
/* 4 */ { 5, s_2_4, -1, 2, 0}
|
||||
};
|
||||
|
||||
static const unsigned char g_c[] = { 119, 223, 119, 1 };
|
||||
|
||||
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
|
||||
|
||||
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
|
||||
@ -133,25 +135,25 @@ static const symbol s_1[] = { 'i', 'g' };
|
||||
static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
|
||||
|
||||
static int r_mark_regions(struct SN_env * z) { /* forwardmode */
|
||||
z->I[0] = z->l; /* $p1 = <integer expression>, line 31 */
|
||||
{ int c_test1 = z->c; /* test, line 33 */
|
||||
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); /* hop, line 33 */
|
||||
z->I[0] = z->l; /* $p1 = <integer expression>, line 33 */
|
||||
{ int c_test1 = z->c; /* test, line 35 */
|
||||
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); /* hop, line 35 */
|
||||
if (ret < 0) return 0;
|
||||
z->c = ret;
|
||||
}
|
||||
z->I[1] = z->c; /* setmark x, line 33 */
|
||||
z->I[1] = z->c; /* setmark x, line 35 */
|
||||
z->c = c_test1;
|
||||
}
|
||||
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
|
||||
{ /* gopast */ /* non v, line 34 */
|
||||
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 36 */
|
||||
{ /* gopast */ /* non v, line 36 */
|
||||
int ret = in_grouping_U(z, g_v, 97, 248, 1);
|
||||
if (ret < 0) return 0;
|
||||
z->c += ret;
|
||||
}
|
||||
z->I[0] = z->c; /* setmark p1, line 34 */
|
||||
/* try, line 35 */
|
||||
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 35 */
|
||||
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 35 */
|
||||
z->I[0] = z->c; /* setmark p1, line 36 */
|
||||
/* try, line 37 */
|
||||
if (!(z->I[0] < z->I[1])) goto lab0; /* $(<integer expression> < <integer expression>), line 37 */
|
||||
z->I[0] = z->I[1]; /* $p1 = <integer expression>, line 37 */
|
||||
lab0:
|
||||
return 1;
|
||||
}
|
||||
@ -159,25 +161,25 @@ lab0:
|
||||
static int r_main_suffix(struct SN_env * z) { /* backwardmode */
|
||||
int among_var;
|
||||
|
||||
{ int mlimit1; /* setlimit, line 41 */
|
||||
{ int mlimit1; /* setlimit, line 43 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit1 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 41 */
|
||||
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 41 */
|
||||
z->ket = z->c; /* [, line 43 */
|
||||
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } /* substring, line 43 */
|
||||
among_var = find_among_b(z, a_0, 32);
|
||||
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
||||
z->bra = z->c; /* ], line 41 */
|
||||
z->bra = z->c; /* ], line 43 */
|
||||
z->lb = mlimit1;
|
||||
}
|
||||
switch (among_var) { /* among, line 42 */
|
||||
switch (among_var) { /* among, line 44 */
|
||||
case 1:
|
||||
{ int ret = slice_del(z); /* delete, line 48 */
|
||||
{ int ret = slice_del(z); /* delete, line 50 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 50 */
|
||||
{ int ret = slice_del(z); /* delete, line 50 */
|
||||
if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; /* grouping s_ending, line 52 */
|
||||
{ int ret = slice_del(z); /* delete, line 52 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
@ -186,25 +188,25 @@ static int r_main_suffix(struct SN_env * z) { /* backwardmode */
|
||||
}
|
||||
|
||||
static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
|
||||
{ int m_test1 = z->l - z->c; /* test, line 55 */
|
||||
{ int m_test1 = z->l - z->c; /* test, line 57 */
|
||||
|
||||
{ int mlimit2; /* setlimit, line 56 */
|
||||
{ int mlimit2; /* setlimit, line 58 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit2 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 56 */
|
||||
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 56 */
|
||||
z->ket = z->c; /* [, line 58 */
|
||||
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } /* substring, line 58 */
|
||||
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; }
|
||||
z->bra = z->c; /* ], line 56 */
|
||||
z->bra = z->c; /* ], line 58 */
|
||||
z->lb = mlimit2;
|
||||
}
|
||||
z->c = z->l - m_test1;
|
||||
}
|
||||
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
|
||||
if (ret < 0) return 0;
|
||||
z->c = ret; /* next, line 62 */
|
||||
z->c = ret; /* next, line 64 */
|
||||
}
|
||||
z->bra = z->c; /* ], line 62 */
|
||||
{ int ret = slice_del(z); /* delete, line 62 */
|
||||
z->bra = z->c; /* ], line 64 */
|
||||
{ int ret = slice_del(z); /* delete, line 64 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
return 1;
|
||||
@ -212,35 +214,35 @@ static int r_consonant_pair(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
int among_var;
|
||||
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
|
||||
z->ket = z->c; /* [, line 66 */
|
||||
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 66 */
|
||||
z->bra = z->c; /* ], line 66 */
|
||||
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 66 */
|
||||
{ int ret = slice_del(z); /* delete, line 66 */
|
||||
{ int m1 = z->l - z->c; (void)m1; /* do, line 68 */
|
||||
z->ket = z->c; /* [, line 68 */
|
||||
if (!(eq_s_b(z, 2, s_0))) goto lab0; /* literal, line 68 */
|
||||
z->bra = z->c; /* ], line 68 */
|
||||
if (!(eq_s_b(z, 2, s_1))) goto lab0; /* literal, line 68 */
|
||||
{ int ret = slice_del(z); /* delete, line 68 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = z->l - m1;
|
||||
}
|
||||
|
||||
{ int mlimit2; /* setlimit, line 67 */
|
||||
{ int mlimit2; /* setlimit, line 69 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit2 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 67 */
|
||||
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 67 */
|
||||
z->ket = z->c; /* [, line 69 */
|
||||
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } /* substring, line 69 */
|
||||
among_var = find_among_b(z, a_2, 5);
|
||||
if (!(among_var)) { z->lb = mlimit2; return 0; }
|
||||
z->bra = z->c; /* ], line 67 */
|
||||
z->bra = z->c; /* ], line 69 */
|
||||
z->lb = mlimit2;
|
||||
}
|
||||
switch (among_var) { /* among, line 68 */
|
||||
switch (among_var) { /* among, line 70 */
|
||||
case 1:
|
||||
{ int ret = slice_del(z); /* delete, line 70 */
|
||||
{ int ret = slice_del(z); /* delete, line 72 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 70 */
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 72 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 72 */
|
||||
if (ret == 0) goto lab1;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
@ -249,7 +251,7 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{ int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
|
||||
{ int ret = slice_from_s(z, 4, s_2); /* <-, line 74 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
break;
|
||||
@ -259,60 +261,60 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
static int r_undouble(struct SN_env * z) { /* backwardmode */
|
||||
|
||||
{ int mlimit1; /* setlimit, line 76 */
|
||||
{ int mlimit1; /* setlimit, line 78 */
|
||||
if (z->c < z->I[0]) return 0;
|
||||
mlimit1 = z->lb; z->lb = z->I[0];
|
||||
z->ket = z->c; /* [, line 76 */
|
||||
if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit1; return 0; } /* non v, line 76 */
|
||||
z->bra = z->c; /* ], line 76 */
|
||||
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
|
||||
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
|
||||
z->ket = z->c; /* [, line 78 */
|
||||
if (in_grouping_b_U(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } /* grouping c, line 78 */
|
||||
z->bra = z->c; /* ], line 78 */
|
||||
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 78 */
|
||||
if (z->S[0] == 0) return -1; /* -> ch, line 78 */
|
||||
z->lb = mlimit1;
|
||||
}
|
||||
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
|
||||
{ int ret = slice_del(z); /* delete, line 78 */
|
||||
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 79 */
|
||||
{ int ret = slice_del(z); /* delete, line 80 */
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int danish_UTF_8_stem(struct SN_env * z) { /* forwardmode */
|
||||
{ int c1 = z->c; /* do, line 84 */
|
||||
{ int ret = r_mark_regions(z); /* call mark_regions, line 84 */
|
||||
{ int c1 = z->c; /* do, line 86 */
|
||||
{ int ret = r_mark_regions(z); /* call mark_regions, line 86 */
|
||||
if (ret == 0) goto lab0;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab0:
|
||||
z->c = c1;
|
||||
}
|
||||
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
|
||||
z->lb = z->c; z->c = z->l; /* backwards, line 87 */
|
||||
|
||||
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
|
||||
{ int ret = r_main_suffix(z); /* call main_suffix, line 86 */
|
||||
{ int m2 = z->l - z->c; (void)m2; /* do, line 88 */
|
||||
{ int ret = r_main_suffix(z); /* call main_suffix, line 88 */
|
||||
if (ret == 0) goto lab1;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab1:
|
||||
z->c = z->l - m2;
|
||||
}
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 87 */
|
||||
{ int m3 = z->l - z->c; (void)m3; /* do, line 89 */
|
||||
{ int ret = r_consonant_pair(z); /* call consonant_pair, line 89 */
|
||||
if (ret == 0) goto lab2;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab2:
|
||||
z->c = z->l - m3;
|
||||
}
|
||||
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
|
||||
{ int ret = r_other_suffix(z); /* call other_suffix, line 88 */
|
||||
{ int m4 = z->l - z->c; (void)m4; /* do, line 90 */
|
||||
{ int ret = r_other_suffix(z); /* call other_suffix, line 90 */
|
||||
if (ret == 0) goto lab3;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
lab3:
|
||||
z->c = z->l - m4;
|
||||
}
|
||||
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
|
||||
{ int ret = r_undouble(z); /* call undouble, line 89 */
|
||||
{ int m5 = z->l - z->c; (void)m5; /* do, line 91 */
|
||||
{ int ret = r_undouble(z); /* call undouble, line 91 */
|
||||
if (ret == 0) goto lab4;
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
4199
src/backend/snowball/libstemmer/stem_UTF_8_greek.c
Normal file
4199
src/backend/snowball/libstemmer/stem_UTF_8_greek.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -59,31 +59,49 @@ extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
|
||||
/* Code for character groupings: utf8 cases */
|
||||
|
||||
static int get_utf8(const symbol * p, int c, int l, int * slot) {
|
||||
int b0, b1;
|
||||
int b0, b1, b2;
|
||||
if (c >= l) return 0;
|
||||
b0 = p[c++];
|
||||
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
|
||||
* slot = b0; return 1;
|
||||
*slot = b0;
|
||||
return 1;
|
||||
}
|
||||
b1 = p[c++];
|
||||
b1 = p[c++] & 0x3F;
|
||||
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
|
||||
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
|
||||
*slot = (b0 & 0x1F) << 6 | b1;
|
||||
return 2;
|
||||
}
|
||||
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
|
||||
b2 = p[c++] & 0x3F;
|
||||
if (b0 < 0xF0 || c == l) { /* 1111 0000 */
|
||||
*slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
|
||||
return 3;
|
||||
}
|
||||
*slot = (b0 & 0xE) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
|
||||
int b0, b1;
|
||||
int a, b;
|
||||
if (c <= lb) return 0;
|
||||
b0 = p[--c];
|
||||
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
|
||||
* slot = b0; return 1;
|
||||
b = p[--c];
|
||||
if (b < 0x80 || c == lb) { /* 1000 0000 */
|
||||
*slot = b;
|
||||
return 1;
|
||||
}
|
||||
b1 = p[--c];
|
||||
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
|
||||
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
|
||||
a = b & 0x3F;
|
||||
b = p[--c];
|
||||
if (b >= 0xC0 || c == lb) { /* 1100 0000 */
|
||||
*slot = (b & 0x1F) << 6 | a;
|
||||
return 2;
|
||||
}
|
||||
* slot = (p[--c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
|
||||
a |= (b & 0x3F) << 6;
|
||||
b = p[--c];
|
||||
if (b >= 0xE0 || c == lb) { /* 1110 0000 */
|
||||
*slot = (b & 0xF) << 12 | a;
|
||||
return 3;
|
||||
}
|
||||
*slot = (p[--c] & 0xE) << 18 | (b & 0x3F) << 12 | a;
|
||||
return 4;
|
||||
}
|
||||
|
||||
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
||||
@ -230,8 +248,13 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0) { j = k; common_j = common; }
|
||||
else { i = k; common_i = common; }
|
||||
if (diff < 0) {
|
||||
j = k;
|
||||
common_j = common;
|
||||
} else {
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1) {
|
||||
if (i > 0) break; /* v->s has been inspected */
|
||||
if (j == i) break; /* only one item in v */
|
||||
@ -360,9 +383,8 @@ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const
|
||||
z->l += adjustment;
|
||||
if (z->c >= c_ket)
|
||||
z->c += adjustment;
|
||||
else
|
||||
if (z->c > c_bra)
|
||||
z->c = c_bra;
|
||||
else if (z->c > c_bra)
|
||||
z->c = c_bra;
|
||||
}
|
||||
if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
||||
if (adjptr != NULL)
|
||||
|
@ -716,6 +716,8 @@ static const struct tsearch_config_match tsearch_config_languages[] =
|
||||
{"french", "French"},
|
||||
{"german", "de"},
|
||||
{"german", "German"},
|
||||
{"greek", "el"},
|
||||
{"greek", "Greek"},
|
||||
{"hungarian", "hu"},
|
||||
{"hungarian", "Hungarian"},
|
||||
{"indonesian", "id"},
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201906161
|
||||
#define CATALOG_VERSION_NO 201907041
|
||||
|
||||
#endif
|
||||
|
@ -19,8 +19,15 @@ struct SN_env {
|
||||
unsigned char * B;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
|
||||
extern void SN_close_env(struct SN_env * z, int S_size);
|
||||
|
||||
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
16
src/include/snowball/libstemmer/stem_UTF_8_greek.h
Normal file
16
src/include/snowball/libstemmer/stem_UTF_8_greek.h
Normal file
@ -0,0 +1,16 @@
|
||||
/* This file was generated automatically by the Snowball to ISO C compiler */
|
||||
/* http://snowballstem.org/ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern struct SN_env * greek_UTF_8_create_env(void);
|
||||
extern void greek_UTF_8_close_env(struct SN_env * z);
|
||||
|
||||
extern int greek_UTF_8_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user