Tsearch2 functionality migrates to core. The bulk of this work is by

Oleg Bartunov and Teodor Sigaev, but I did a lot of editorializing,
so anything that's broken is probably my fault.

Documentation is nonexistent as yet, but let's land the patch so we can
get some portability testing done.
This commit is contained in:
Tom Lane 2007-08-21 01:11:32 +00:00
parent 4e94d1f952
commit 140d4ebcb4
200 changed files with 54388 additions and 147 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.206 2007/08/04 01:26:53 tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.207 2007/08/21 01:11:11 tgl Exp $ -->
<chapter id="datatype">
<title id="datatype-title">Data Types</title>
@ -3484,6 +3484,14 @@ SET xmloption TO { DOCUMENT | CONTENT };
<primary>regtype</primary>
</indexterm>
<indexterm zone="datatype-oid">
<primary>regconfig</primary>
</indexterm>
<indexterm zone="datatype-oid">
<primary>regdictionary</primary>
</indexterm>
<indexterm zone="datatype-oid">
<primary>xid</primary>
</indexterm>
@ -3505,9 +3513,9 @@ SET xmloption TO { DOCUMENT | CONTENT };
configuration variable is enabled. Type <type>oid</> represents
an object identifier. There are also several alias types for
<type>oid</>: <type>regproc</>, <type>regprocedure</>,
<type>regoper</>, <type>regoperator</>, <type>regclass</>, and
<type>regtype</>. <xref linkend="datatype-oid-table"> shows an
overview.
<type>regoper</>, <type>regoperator</>, <type>regclass</>,
<type>regtype</>, <type>regconfig</>, and <type>regdictionary</>.
<xref linkend="datatype-oid-table"> shows an overview.
</para>
<para>
@ -3614,6 +3622,20 @@ SELECT * FROM pg_attribute
<entry>data type name</entry>
<entry><literal>integer</></entry>
</row>
<row>
<entry><type>regconfig</></entry>
<entry><structname>pg_ts_config</></entry>
<entry>text search configuration</entry>
<entry><literal>english</></entry>
</row>
<row>
<entry><type>regdictionary</></entry>
<entry><structname>pg_ts_dict</></entry>
<entry>text search dictionary</entry>
<entry><literal>simple</></entry>
</row>
</tbody>
</tgroup>
</table>

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.387 2007/08/19 03:23:30 adunstan Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.388 2007/08/21 01:11:11 tgl Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@ -10794,7 +10794,9 @@ SELECT relname FROM pg_class WHERE pg_table_is_visible(oid);
All these functions require object OIDs to identify the object to be
checked. If you want to test an object by name, it is convenient to use
the OID alias types (<type>regclass</>, <type>regtype</>,
<type>regprocedure</>, or <type>regoperator</>), for example:
<type>regprocedure</>, <type>regoperator</>, <type>regconfig</>,
or <type>regdictionary</>),
for example:
<programlisting>
SELECT pg_type_is_visible('myschema.widget'::regtype);
</programlisting>
@ -11255,8 +11257,8 @@ SELECT set_config('log_statement_stats', 'off', false);
<para>
<function>pg_rotate_logfile</> signals the log-file manager to switch
to a new output file immediately. This works only when the built-in
log collector is running, since otherwise there is no log-file manager
subprocess.
log collector is running, since otherwise there is no log-file manager
subprocess.
</para>
<indexterm zone="functions-admin">

View File

@ -4,7 +4,7 @@
#
# Copyright (c) 1994, Regents of the University of California
#
# $PostgreSQL: pgsql/src/Makefile,v 1.41 2007/01/20 17:16:09 petere Exp $
# $PostgreSQL: pgsql/src/Makefile,v 1.42 2007/08/21 01:11:12 tgl Exp $
#
#-------------------------------------------------------------------------
@ -18,6 +18,7 @@ all install installdirs uninstall distprep:
$(MAKE) -C timezone $@
$(MAKE) -C backend $@
$(MAKE) -C backend/utils/mb/conversion_procs $@
$(MAKE) -C backend/snowball $@
$(MAKE) -C include $@
$(MAKE) -C interfaces $@
$(MAKE) -C bin $@
@ -47,6 +48,7 @@ clean:
$(MAKE) -C port $@
$(MAKE) -C timezone $@
$(MAKE) -C backend $@
$(MAKE) -C backend/snowball $@
$(MAKE) -C include $@
$(MAKE) -C interfaces $@
$(MAKE) -C bin $@
@ -60,6 +62,7 @@ distclean maintainer-clean:
-$(MAKE) -C port $@
-$(MAKE) -C timezone $@
-$(MAKE) -C backend $@
-$(MAKE) -C backend/snowball $@
-$(MAKE) -C include $@
-$(MAKE) -C interfaces $@
-$(MAKE) -C bin $@

View File

@ -2,9 +2,10 @@
#
# Makefile for the postgres backend
#
# Copyright (c) 1994, Regents of the University of California
# Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# $PostgreSQL: pgsql/src/backend/Makefile,v 1.123 2007/07/24 09:00:27 mha Exp $
# $PostgreSQL: pgsql/src/backend/Makefile,v 1.124 2007/08/21 01:11:12 tgl Exp $
#
#-------------------------------------------------------------------------
@ -15,7 +16,7 @@ include $(top_builddir)/src/Makefile.global
DIRS = access bootstrap catalog parser commands executor lib libpq \
main nodes optimizer port postmaster regex rewrite \
storage tcop utils $(top_builddir)/src/timezone
storage tcop tsearch utils $(top_builddir)/src/timezone
SUBSYSOBJS = $(DIRS:%=%/SUBSYS.o)
@ -166,6 +167,7 @@ ifeq ($(MAKE_DLL), true)
endif
endif
$(MAKE) -C catalog install-data
$(MAKE) -C tsearch install-data
$(INSTALL_DATA) $(srcdir)/libpq/pg_hba.conf.sample '$(DESTDIR)$(datadir)/pg_hba.conf.sample'
$(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample'
$(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
@ -220,6 +222,7 @@ ifeq ($(MAKE_DLL), true)
endif
endif
$(MAKE) -C catalog uninstall-data
$(MAKE) -C tsearch uninstall-data
rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \
'$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
'$(DESTDIR)$(datadir)/postgresql.conf.sample' \

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.9 2007/01/31 15:09:45 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.10 2007/08/21 01:11:12 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@ -62,7 +62,7 @@ ginarrayextract(PG_FUNCTION_ARGS)
if ( *nentries == 0 && PG_NARGS() == 3 )
{
switch( PG_GETARG_UINT16(2) )
switch( PG_GETARG_UINT16(2) ) /* StrategyNumber */
{
case GinOverlapStrategy:
*nentries = -1; /* nobody can be found */
@ -79,6 +79,15 @@ ginarrayextract(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(entries);
}
Datum
ginqueryarrayextract(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ginarrayextract,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ginarrayconsistent(PG_FUNCTION_ARGS)
{

View File

@ -2,7 +2,7 @@
#
# Makefile for backend/catalog
#
# $PostgreSQL: pgsql/src/backend/catalog/Makefile,v 1.64 2007/04/02 03:49:37 tgl Exp $
# $PostgreSQL: pgsql/src/backend/catalog/Makefile,v 1.65 2007/08/21 01:11:13 tgl Exp $
#
#-------------------------------------------------------------------------
@ -35,6 +35,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
pg_database.h pg_tablespace.h pg_pltemplate.h \
pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
pg_ts_parser.h pg_ts_template.h \
toasting.h indexing.h \
)

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/aclchk.c,v 1.139 2007/04/20 02:37:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/aclchk.c,v 1.140 2007/08/21 01:11:13 tgl Exp $
*
* NOTES
* See acl.h.
@ -34,6 +34,8 @@
#include "catalog/pg_proc.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "commands/dbcommands.h"
#include "miscadmin.h"
#include "parser/parse_func.h"
@ -1416,7 +1418,11 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
/* ACL_KIND_CONVERSION */
gettext_noop("permission denied for conversion %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("permission denied for tablespace %s")
gettext_noop("permission denied for tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
gettext_noop("permission denied for text search dictionary %s"),
/* ACL_KIND_TSCONFIGURATION */
gettext_noop("permission denied for text search configuration %s")
};
static const char *const not_owner_msg[MAX_ACL_KIND] =
@ -1444,7 +1450,11 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
/* ACL_KIND_CONVERSION */
gettext_noop("must be owner of conversion %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("must be owner of tablespace %s")
gettext_noop("must be owner of tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
gettext_noop("must be owner of text search dictionary %s"),
/* ACL_KIND_TSCONFIGURATION */
gettext_noop("must be owner of text search configuration %s")
};
@ -2297,6 +2307,65 @@ pg_opfamily_ownercheck(Oid opf_oid, Oid roleid)
return has_privs_of_role(roleid, ownerId);
}
/*
* Ownership check for a text search dictionary (specified by OID).
*/
bool
pg_ts_dict_ownercheck(Oid dict_oid, Oid roleid)
{
HeapTuple tuple;
Oid ownerId;
/* Superusers bypass all permission checking. */
if (superuser_arg(roleid))
return true;
tuple = SearchSysCache(TSDICTOID,
ObjectIdGetDatum(dict_oid),
0, 0, 0);
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search dictionary with OID %u does not exist",
dict_oid)));
ownerId = ((Form_pg_ts_dict) GETSTRUCT(tuple))->dictowner;
ReleaseSysCache(tuple);
return has_privs_of_role(roleid, ownerId);
}
/*
* Ownership check for a text search configuration (specified by OID).
*/
bool
pg_ts_config_ownercheck(Oid cfg_oid, Oid roleid)
{
HeapTuple tuple;
Oid ownerId;
/* Superusers bypass all permission checking. */
if (superuser_arg(roleid))
return true;
tuple = SearchSysCache(TSCONFIGOID,
ObjectIdGetDatum(cfg_oid),
0, 0, 0);
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search configuration with OID %u does not exist",
cfg_oid)));
ownerId = ((Form_pg_ts_config) GETSTRUCT(tuple))->cfgowner;
ReleaseSysCache(tuple);
return has_privs_of_role(roleid, ownerId);
}
/*
* Ownership check for a database (specified by OID).
*/

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.66 2007/06/05 21:31:04 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.67 2007/08/21 01:11:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -40,6 +40,10 @@
#include "catalog/pg_rewrite.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
@ -97,6 +101,10 @@ static const Oid object_classes[MAX_OCLASS] = {
RewriteRelationId, /* OCLASS_REWRITE */
TriggerRelationId, /* OCLASS_TRIGGER */
NamespaceRelationId, /* OCLASS_SCHEMA */
TSParserRelationId, /* OCLASS_TSPARSER */
TSDictionaryRelationId, /* OCLASS_TSDICT */
TSTemplateRelationId, /* OCLASS_TSTEMPLATE */
TSConfigRelationId, /* OCLASS_TSCONFIG */
AuthIdRelationId, /* OCLASS_ROLE */
DatabaseRelationId, /* OCLASS_DATABASE */
TableSpaceRelationId /* OCLASS_TBLSPACE */
@ -988,6 +996,22 @@ doDeletion(const ObjectAddress *object)
RemoveSchemaById(object->objectId);
break;
case OCLASS_TSPARSER:
RemoveTSParserById(object->objectId);
break;
case OCLASS_TSDICT:
RemoveTSDictionaryById(object->objectId);
break;
case OCLASS_TSTEMPLATE:
RemoveTSTemplateById(object->objectId);
break;
case OCLASS_TSCONFIG:
RemoveTSConfigurationById(object->objectId);
break;
/* OCLASS_ROLE, OCLASS_DATABASE, OCLASS_TBLSPACE not handled */
default:
@ -1201,8 +1225,8 @@ find_expr_references_walker(Node *node,
/*
* If it's a regclass or similar literal referring to an existing
* object, add a reference to that object. (Currently, only the
* regclass case has any likely use, but we may as well handle all the
* OID-alias datatypes consistently.)
* regclass and regconfig cases have any likely use, but we may as
* well handle all the OID-alias datatypes consistently.)
*/
if (!con->constisnull)
{
@ -1242,6 +1266,22 @@ find_expr_references_walker(Node *node,
add_object_address(OCLASS_TYPE, objoid, 0,
context->addrs);
break;
case REGCONFIGOID:
objoid = DatumGetObjectId(con->constvalue);
if (SearchSysCacheExists(TSCONFIGOID,
ObjectIdGetDatum(objoid),
0, 0, 0))
add_object_address(OCLASS_TSCONFIG, objoid, 0,
context->addrs);
break;
case REGDICTIONARYOID:
objoid = DatumGetObjectId(con->constvalue);
if (SearchSysCacheExists(TSDICTOID,
ObjectIdGetDatum(objoid),
0, 0, 0))
add_object_address(OCLASS_TSDICT, objoid, 0,
context->addrs);
break;
}
}
return false;
@ -1605,6 +1645,21 @@ object_address_present(const ObjectAddress *object,
return false;
}
/*
* Record multiple dependencies from an ObjectAddresses array, after first
* removing any duplicates.
*/
void
record_object_address_dependencies(const ObjectAddress *depender,
ObjectAddresses *referenced,
DependencyType behavior)
{
eliminate_duplicate_dependencies(referenced);
recordMultipleDependencies(depender,
referenced->refs, referenced->numrefs,
behavior);
}
/*
* Clean up when done with an ObjectAddresses array.
*/
@ -1690,6 +1745,22 @@ getObjectClass(const ObjectAddress *object)
Assert(object->objectSubId == 0);
return OCLASS_SCHEMA;
case TSParserRelationId:
Assert(object->objectSubId == 0);
return OCLASS_TSPARSER;
case TSDictionaryRelationId:
Assert(object->objectSubId == 0);
return OCLASS_TSDICT;
case TSTemplateRelationId:
Assert(object->objectSubId == 0);
return OCLASS_TSTEMPLATE;
case TSConfigRelationId:
Assert(object->objectSubId == 0);
return OCLASS_TSCONFIG;
case AuthIdRelationId:
Assert(object->objectSubId == 0);
return OCLASS_ROLE;
@ -2080,6 +2151,70 @@ getObjectDescription(const ObjectAddress *object)
break;
}
case OCLASS_TSPARSER:
{
HeapTuple tup;
tup = SearchSysCache(TSPARSEROID,
ObjectIdGetDatum(object->objectId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search parser %u",
object->objectId);
appendStringInfo(&buffer, _("text search parser %s"),
NameStr(((Form_pg_ts_parser) GETSTRUCT(tup))->prsname));
ReleaseSysCache(tup);
break;
}
case OCLASS_TSDICT:
{
HeapTuple tup;
tup = SearchSysCache(TSDICTOID,
ObjectIdGetDatum(object->objectId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search dictionary %u",
object->objectId);
appendStringInfo(&buffer, _("text search dictionary %s"),
NameStr(((Form_pg_ts_dict) GETSTRUCT(tup))->dictname));
ReleaseSysCache(tup);
break;
}
case OCLASS_TSTEMPLATE:
{
HeapTuple tup;
tup = SearchSysCache(TSTEMPLATEOID,
ObjectIdGetDatum(object->objectId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search template %u",
object->objectId);
appendStringInfo(&buffer, _("text search template %s"),
NameStr(((Form_pg_ts_template) GETSTRUCT(tup))->tmplname));
ReleaseSysCache(tup);
break;
}
case OCLASS_TSCONFIG:
{
HeapTuple tup;
tup = SearchSysCache(TSCONFIGOID,
ObjectIdGetDatum(object->objectId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search configuration %u",
object->objectId);
appendStringInfo(&buffer, _("text search configuration %s"),
NameStr(((Form_pg_ts_config) GETSTRUCT(tup))->cfgname));
ReleaseSysCache(tup);
break;
}
case OCLASS_ROLE:
{
appendStringInfo(&buffer, _("role %s"),

View File

@ -13,7 +13,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.97 2007/07/25 22:16:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.98 2007/08/21 01:11:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -29,6 +29,10 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "miscadmin.h"
@ -189,6 +193,10 @@ Datum pg_function_is_visible(PG_FUNCTION_ARGS);
Datum pg_operator_is_visible(PG_FUNCTION_ARGS);
Datum pg_opclass_is_visible(PG_FUNCTION_ARGS);
Datum pg_conversion_is_visible(PG_FUNCTION_ARGS);
Datum pg_ts_parser_is_visible(PG_FUNCTION_ARGS);
Datum pg_ts_dict_is_visible(PG_FUNCTION_ARGS);
Datum pg_ts_template_is_visible(PG_FUNCTION_ARGS);
Datum pg_ts_config_is_visible(PG_FUNCTION_ARGS);
Datum pg_my_temp_schema(PG_FUNCTION_ARGS);
Datum pg_is_other_temp_schema(PG_FUNCTION_ARGS);
@ -1314,6 +1322,521 @@ ConversionIsVisible(Oid conid)
return visible;
}
/*
* TSParserGetPrsid - find a TS parser by possibly qualified name
*
* If not found, returns InvalidOid if failOK, else throws error
*/
Oid
TSParserGetPrsid(List *names, bool failOK)
{
char *schemaname;
char *parser_name;
Oid namespaceId;
Oid prsoid = InvalidOid;
ListCell *l;
/* deconstruct the name list */
DeconstructQualifiedName(names, &schemaname, &parser_name);
if (schemaname)
{
/* use exact schema given */
namespaceId = LookupExplicitNamespace(schemaname);
prsoid = GetSysCacheOid(TSPARSERNAMENSP,
PointerGetDatum(parser_name),
ObjectIdGetDatum(namespaceId),
0, 0);
}
else
{
/* search for it in search path */
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
prsoid = GetSysCacheOid(TSPARSERNAMENSP,
PointerGetDatum(parser_name),
ObjectIdGetDatum(namespaceId),
0, 0);
if (OidIsValid(prsoid))
break;
}
}
if (!OidIsValid(prsoid) && !failOK)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search parser \"%s\" does not exist",
NameListToString(names))));
return prsoid;
}
/*
* TSParserIsVisible
* Determine whether a parser (identified by OID) is visible in the
* current search path. Visible means "would be found by searching
* for the unqualified parser name".
*/
bool
TSParserIsVisible(Oid prsId)
{
HeapTuple tup;
Form_pg_ts_parser form;
Oid namespace;
bool visible;
tup = SearchSysCache(TSPARSEROID,
ObjectIdGetDatum(prsId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search parser %u", prsId);
form = (Form_pg_ts_parser) GETSTRUCT(tup);
recomputeNamespacePath();
/*
* Quick check: if it ain't in the path at all, it ain't visible. Items in
* the system namespace are surely in the path and so we needn't even do
* list_member_oid() for them.
*/
namespace = form->prsnamespace;
if (namespace != PG_CATALOG_NAMESPACE &&
!list_member_oid(activeSearchPath, namespace))
visible = false;
else
{
/*
* If it is in the path, it might still not be visible; it could be
* hidden by another parser of the same name earlier in the path. So we
* must do a slow check for conflicting parsers.
*/
char *name = NameStr(form->prsname);
ListCell *l;
visible = false;
foreach(l, activeSearchPath)
{
Oid namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
if (namespaceId == namespace)
{
/* Found it first in path */
visible = true;
break;
}
if (SearchSysCacheExists(TSPARSERNAMENSP,
PointerGetDatum(name),
ObjectIdGetDatum(namespaceId),
0, 0))
{
/* Found something else first in path */
break;
}
}
}
ReleaseSysCache(tup);
return visible;
}
/*
* TSDictionaryGetDictid - find a TS dictionary by possibly qualified name
*
* If not found, returns InvalidOid if failOK, else throws error
*/
Oid
TSDictionaryGetDictid(List *names, bool failOK)
{
char *schemaname;
char *dict_name;
Oid namespaceId;
Oid dictoid = InvalidOid;
ListCell *l;
/* deconstruct the name list */
DeconstructQualifiedName(names, &schemaname, &dict_name);
if (schemaname)
{
/* use exact schema given */
namespaceId = LookupExplicitNamespace(schemaname);
dictoid = GetSysCacheOid(TSDICTNAMENSP,
PointerGetDatum(dict_name),
ObjectIdGetDatum(namespaceId),
0, 0);
}
else
{
/* search for it in search path */
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
dictoid = GetSysCacheOid(TSDICTNAMENSP,
PointerGetDatum(dict_name),
ObjectIdGetDatum(namespaceId),
0, 0);
if (OidIsValid(dictoid))
break;
}
}
if (!OidIsValid(dictoid) && !failOK)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search dictionary \"%s\" does not exist",
NameListToString(names))));
return dictoid;
}
/*
* TSDictionaryIsVisible
* Determine whether a dictionary (identified by OID) is visible in the
* current search path. Visible means "would be found by searching
* for the unqualified dictionary name".
*/
bool
TSDictionaryIsVisible(Oid dictId)
{
HeapTuple tup;
Form_pg_ts_dict form;
Oid namespace;
bool visible;
tup = SearchSysCache(TSDICTOID,
ObjectIdGetDatum(dictId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search dictionary %u",
dictId);
form = (Form_pg_ts_dict) GETSTRUCT(tup);
recomputeNamespacePath();
/*
* Quick check: if it ain't in the path at all, it ain't visible. Items in
* the system namespace are surely in the path and so we needn't even do
* list_member_oid() for them.
*/
namespace = form->dictnamespace;
if (namespace != PG_CATALOG_NAMESPACE &&
!list_member_oid(activeSearchPath, namespace))
visible = false;
else
{
/*
* If it is in the path, it might still not be visible; it could be
* hidden by another dictionary of the same name earlier in the
* path. So we must do a slow check for conflicting dictionaries.
*/
char *name = NameStr(form->dictname);
ListCell *l;
visible = false;
foreach(l, activeSearchPath)
{
Oid namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
if (namespaceId == namespace)
{
/* Found it first in path */
visible = true;
break;
}
if (SearchSysCacheExists(TSDICTNAMENSP,
PointerGetDatum(name),
ObjectIdGetDatum(namespaceId),
0, 0))
{
/* Found something else first in path */
break;
}
}
}
ReleaseSysCache(tup);
return visible;
}
/*
* TSTemplateGetTmplid - find a TS template by possibly qualified name
*
* If not found, returns InvalidOid if failOK, else throws error
*/
Oid
TSTemplateGetTmplid(List *names, bool failOK)
{
char *schemaname;
char *template_name;
Oid namespaceId;
Oid tmploid = InvalidOid;
ListCell *l;
/* deconstruct the name list */
DeconstructQualifiedName(names, &schemaname, &template_name);
if (schemaname)
{
/* use exact schema given */
namespaceId = LookupExplicitNamespace(schemaname);
tmploid = GetSysCacheOid(TSTEMPLATENAMENSP,
PointerGetDatum(template_name),
ObjectIdGetDatum(namespaceId),
0, 0);
}
else
{
/* search for it in search path */
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
tmploid = GetSysCacheOid(TSTEMPLATENAMENSP,
PointerGetDatum(template_name),
ObjectIdGetDatum(namespaceId),
0, 0);
if (OidIsValid(tmploid))
break;
}
}
if (!OidIsValid(tmploid) && !failOK)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search template \"%s\" does not exist",
NameListToString(names))));
return tmploid;
}
/*
* TSTemplateIsVisible
* Determine whether a template (identified by OID) is visible in the
* current search path. Visible means "would be found by searching
* for the unqualified template name".
*/
bool
TSTemplateIsVisible(Oid tmplId)
{
HeapTuple tup;
Form_pg_ts_template form;
Oid namespace;
bool visible;
tup = SearchSysCache(TSTEMPLATEOID,
ObjectIdGetDatum(tmplId),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search template %u", tmplId);
form = (Form_pg_ts_template) GETSTRUCT(tup);
recomputeNamespacePath();
/*
* Quick check: if it ain't in the path at all, it ain't visible. Items in
* the system namespace are surely in the path and so we needn't even do
* list_member_oid() for them.
*/
namespace = form->tmplnamespace;
if (namespace != PG_CATALOG_NAMESPACE &&
!list_member_oid(activeSearchPath, namespace))
visible = false;
else
{
/*
* If it is in the path, it might still not be visible; it could be
* hidden by another template of the same name earlier in the path.
* So we must do a slow check for conflicting templates.
*/
char *name = NameStr(form->tmplname);
ListCell *l;
visible = false;
foreach(l, activeSearchPath)
{
Oid namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
if (namespaceId == namespace)
{
/* Found it first in path */
visible = true;
break;
}
if (SearchSysCacheExists(TSTEMPLATENAMENSP,
PointerGetDatum(name),
ObjectIdGetDatum(namespaceId),
0, 0))
{
/* Found something else first in path */
break;
}
}
}
ReleaseSysCache(tup);
return visible;
}
/*
* TSConfigGetCfgid - find a TS config by possibly qualified name
*
* If not found, returns InvalidOid if failOK, else throws error
*/
Oid
TSConfigGetCfgid(List *names, bool failOK)
{
char *schemaname;
char *config_name;
Oid namespaceId;
Oid cfgoid = InvalidOid;
ListCell *l;
/* deconstruct the name list */
DeconstructQualifiedName(names, &schemaname, &config_name);
if (schemaname)
{
/* use exact schema given */
namespaceId = LookupExplicitNamespace(schemaname);
cfgoid = GetSysCacheOid(TSCONFIGNAMENSP,
PointerGetDatum(config_name),
ObjectIdGetDatum(namespaceId),
0, 0);
}
else
{
/* search for it in search path */
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
cfgoid = GetSysCacheOid(TSCONFIGNAMENSP,
PointerGetDatum(config_name),
ObjectIdGetDatum(namespaceId),
0, 0);
if (OidIsValid(cfgoid))
break;
}
}
if (!OidIsValid(cfgoid) && !failOK)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search configuration \"%s\" does not exist",
NameListToString(names))));
return cfgoid;
}
/*
* TSConfigIsVisible
* Determine whether a text search configuration (identified by OID)
* is visible in the current search path. Visible means "would be found
* by searching for the unqualified text search configuration name".
*/
bool
TSConfigIsVisible(Oid cfgid)
{
HeapTuple tup;
Form_pg_ts_config form;
Oid namespace;
bool visible;
tup = SearchSysCache(TSCONFIGOID,
ObjectIdGetDatum(cfgid),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search configuration %u",
cfgid);
form = (Form_pg_ts_config) GETSTRUCT(tup);
recomputeNamespacePath();
/*
* Quick check: if it ain't in the path at all, it ain't visible. Items in
* the system namespace are surely in the path and so we needn't even do
* list_member_oid() for them.
*/
namespace = form->cfgnamespace;
if (namespace != PG_CATALOG_NAMESPACE &&
!list_member_oid(activeSearchPath, namespace))
visible = false;
else
{
/*
* If it is in the path, it might still not be visible; it could be
* hidden by another configuration of the same name earlier in the
* path. So we must do a slow check for conflicting configurations.
*/
char *name = NameStr(form->cfgname);
ListCell *l;
visible = false;
foreach(l, activeSearchPath)
{
Oid namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
if (namespaceId == namespace)
{
/* Found it first in path */
visible = true;
break;
}
if (SearchSysCacheExists(TSCONFIGNAMENSP,
PointerGetDatum(name),
ObjectIdGetDatum(namespaceId),
0, 0))
{
/* Found something else first in path */
break;
}
}
}
ReleaseSysCache(tup);
return visible;
}
/*
* DeconstructQualifiedName
* Given a possibly-qualified name expressed as a list of String nodes,
@ -2515,6 +3038,38 @@ pg_conversion_is_visible(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(ConversionIsVisible(oid));
}
Datum
pg_ts_parser_is_visible(PG_FUNCTION_ARGS)
{
Oid oid = PG_GETARG_OID(0);
PG_RETURN_BOOL(TSParserIsVisible(oid));
}
Datum
pg_ts_dict_is_visible(PG_FUNCTION_ARGS)
{
Oid oid = PG_GETARG_OID(0);
PG_RETURN_BOOL(TSDictionaryIsVisible(oid));
}
Datum
pg_ts_template_is_visible(PG_FUNCTION_ARGS)
{
Oid oid = PG_GETARG_OID(0);
PG_RETURN_BOOL(TSTemplateIsVisible(oid));
}
Datum
pg_ts_config_is_visible(PG_FUNCTION_ARGS)
{
Oid oid = PG_GETARG_OID(0);
PG_RETURN_BOOL(TSConfigIsVisible(oid));
}
Datum
pg_my_temp_schema(PG_FUNCTION_ARGS)
{

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.39 2007/07/25 22:16:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.40 2007/08/21 01:11:13 tgl Exp $
*/
CREATE VIEW pg_roles AS
@ -382,3 +382,74 @@ CREATE VIEW pg_stat_bgwriter AS
pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint,
pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean,
pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean;
-- Tsearch debug function. Defined here because it'd be pretty unwieldy
-- to put it into pg_proc.h
CREATE TYPE ts_debug AS (
"Alias" text,
"Description" text,
"Token" text,
"Dictionaries" regdictionary[],
"Lexized token" text
);
COMMENT ON TYPE ts_debug IS 'returned type from ts_debug() function';
CREATE FUNCTION ts_debug(regconfig, text)
RETURNS SETOF ts_debug AS
$$
SELECT
(
SELECT
tt.alias
FROM
pg_catalog.ts_token_type(
(SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 )
) AS tt
WHERE
tt.tokid = parse.tokid
) AS "Alias",
(
SELECT
tt.description
FROM
pg_catalog.ts_token_type(
(SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 )
) AS tt
WHERE
tt.tokid = parse.tokid
) AS "Description",
parse.token AS "Token",
ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary
FROM pg_catalog.pg_ts_config_map AS m
WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
ORDER BY m.mapcfg, m.maptokentype, m.mapseqno )
AS "Dictionaries",
(
SELECT
dl.mapdict::pg_catalog.regdictionary || ': ' || dl.lex::pg_catalog.text
FROM
( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex
FROM pg_catalog.pg_ts_config_map AS m
WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid
ORDER BY m.mapcfg, m.maptokentype, m.mapseqno ) dl
WHERE dl.lex IS NOT NULL
LIMIT 1
) AS "Lexized token"
FROM pg_catalog.ts_parse(
(SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2
) AS parse;
$$
LANGUAGE SQL RETURNS NULL ON NULL INPUT;
COMMENT ON FUNCTION ts_debug(regconfig,text) IS 'debug function for text search configuration';
CREATE FUNCTION ts_debug(text)
RETURNS SETOF ts_debug AS
$$
SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1 );
$$
LANGUAGE SQL RETURNS NULL ON NULL INPUT;
COMMENT ON FUNCTION ts_debug(text) IS 'debug function for current text search configuration';

View File

@ -4,7 +4,7 @@
# Makefile for backend/commands
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/commands/Makefile,v 1.36 2007/04/26 16:13:09 neilc Exp $
# $PostgreSQL: pgsql/src/backend/commands/Makefile,v 1.37 2007/08/21 01:11:14 tgl Exp $
#
#-------------------------------------------------------------------------
@ -18,7 +18,8 @@ OBJS = aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
indexcmds.o lockcmds.o operatorcmds.o opclasscmds.o \
portalcmds.o prepare.o proclang.o \
schemacmds.o sequence.o tablecmds.o tablespace.o trigger.o \
typecmds.o user.o vacuum.o vacuumlazy.o variable.o view.o
tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \
variable.o view.o
all: SUBSYS.o

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/alter.c,v 1.24 2007/07/03 01:30:36 neilc Exp $
* $PostgreSQL: pgsql/src/backend/commands/alter.c,v 1.25 2007/08/21 01:11:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -138,6 +138,22 @@ ExecRenameStmt(RenameStmt *stmt)
break;
}
case OBJECT_TSPARSER:
RenameTSParser(stmt->object, stmt->newname);
break;
case OBJECT_TSDICTIONARY:
RenameTSDictionary(stmt->object, stmt->newname);
break;
case OBJECT_TSTEMPLATE:
RenameTSTemplate(stmt->object, stmt->newname);
break;
case OBJECT_TSCONFIGURATION:
RenameTSConfiguration(stmt->object, stmt->newname);
break;
default:
elog(ERROR, "unrecognized rename stmt type: %d",
(int) stmt->renameType);
@ -240,6 +256,14 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
AlterTypeOwner(stmt->object, newowner);
break;
case OBJECT_TSDICTIONARY:
AlterTSDictionaryOwner(stmt->object, newowner);
break;
case OBJECT_TSCONFIGURATION:
AlterTSConfigurationOwner(stmt->object, newowner);
break;
default:
elog(ERROR, "unrecognized AlterOwnerStmt type: %d",
(int) stmt->objectType);

View File

@ -7,7 +7,7 @@
* Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/comment.c,v 1.96 2007/02/01 19:10:25 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/comment.c,v 1.97 2007/08/21 01:11:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -34,6 +34,10 @@
#include "catalog/pg_shdescription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
@ -78,6 +82,10 @@ static void CommentLargeObject(List *qualname, char *comment);
static void CommentCast(List *qualname, List *arguments, char *comment);
static void CommentTablespace(List *qualname, char *comment);
static void CommentRole(List *qualname, char *comment);
static void CommentTSParser(List *qualname, char *comment);
static void CommentTSDictionary(List *qualname, char *comment);
static void CommentTSTemplate(List *qualname, char *comment);
static void CommentTSConfiguration(List *qualname, char *comment);
/*
@ -151,6 +159,18 @@ CommentObject(CommentStmt *stmt)
case OBJECT_ROLE:
CommentRole(stmt->objname, stmt->comment);
break;
case OBJECT_TSPARSER:
CommentTSParser(stmt->objname, stmt->comment);
break;
case OBJECT_TSDICTIONARY:
CommentTSDictionary(stmt->objname, stmt->comment);
break;
case OBJECT_TSTEMPLATE:
CommentTSTemplate(stmt->objname, stmt->comment);
break;
case OBJECT_TSCONFIGURATION:
CommentTSConfiguration(stmt->objname, stmt->comment);
break;
default:
elog(ERROR, "unrecognized object type: %d",
(int) stmt->objtype);
@ -1462,3 +1482,61 @@ CommentCast(List *qualname, List *arguments, char *comment)
/* Call CreateComments() to create/drop the comments */
CreateComments(castOid, CastRelationId, 0, comment);
}
static void
CommentTSParser(List *qualname, char *comment)
{
Oid prsId;
prsId = TSParserGetPrsid(qualname, false);
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to comment on text search parser")));
CreateComments(prsId, TSParserRelationId, 0, comment);
}
static void
CommentTSDictionary(List *qualname, char *comment)
{
Oid dictId;
dictId = TSDictionaryGetDictid(qualname, false);
if (!pg_ts_dict_ownercheck(dictId, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY,
NameListToString(qualname));
CreateComments(dictId, TSDictionaryRelationId, 0, comment);
}
static void
CommentTSTemplate(List *qualname, char *comment)
{
Oid tmplId;
tmplId = TSTemplateGetTmplid(qualname, false);
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to comment on text search template")));
CreateComments(tmplId, TSTemplateRelationId, 0, comment);
}
static void
CommentTSConfiguration(List *qualname, char *comment)
{
Oid cfgId;
cfgId = TSConfigGetCfgid(qualname, false);
if (!pg_ts_config_ownercheck(cfgId, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION,
NameListToString(qualname));
CreateComments(cfgId, TSConfigRelationId, 0, comment);
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.230 2007/07/17 05:02:00 neilc Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.231 2007/08/21 01:11:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -5079,8 +5079,13 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
case OCLASS_LANGUAGE:
case OCLASS_OPERATOR:
case OCLASS_OPCLASS:
case OCLASS_OPFAMILY:
case OCLASS_TRIGGER:
case OCLASS_SCHEMA:
case OCLASS_TSPARSER:
case OCLASS_TSDICT:
case OCLASS_TSTEMPLATE:
case OCLASS_TSCONFIG:
/*
* We don't expect any of these sorts of objects to depend on

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.597 2007/07/03 01:30:36 neilc Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.598 2007/08/21 01:11:15 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -173,6 +173,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
ViewStmt CheckPointStmt CreateConversionStmt
DeallocateStmt PrepareStmt ExecuteStmt
DropOwnedStmt ReassignOwnedStmt
AlterTSConfigurationStmt AlterTSDictionaryStmt
%type <node> select_no_parens select_with_parens select_clause
simple_select values_clause
@ -375,14 +376,14 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS
COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS
CONTENT_P CONVERSION_P CONVERT COPY COST CREATE CREATEDB
CREATEROLE CREATEUSER CROSS CSV CURRENT_P CURRENT_DATE CURRENT_ROLE
CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS
DESC DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DESC
DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT EXCLUDING
EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT
@ -407,7 +408,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LOCK_P LOGIN_P
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB
NOCREATEROLE NOCREATEUSER NOINHERIT NOLOGIN_P NONE NOSUPERUSER
@ -416,7 +417,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OR
ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNED OWNER
PARTIAL PASSWORD PLACING PLANS POSITION
PARSER PARTIAL PASSWORD PLACING PLANS POSITION
PRECISION PRESERVE PREPARE PREPARED PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
@ -426,13 +427,13 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
REPEATABLE REPLACE REPLICA RESET RESTART RESTRICT RETURNING RETURNS REVOKE
RIGHT ROLE ROLLBACK ROW ROWS RULE
SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE
SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
SHOW SIMILAR SIMPLE SMALLINT SOME STABLE STANDALONE_P START STATEMENT
STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING SUPERUSER_P
SYMMETRIC SYSID SYSTEM_P
TABLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP
TABLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT THEN TIME TIMESTAMP
TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P
TRUNCATE TRUSTED TYPE_P
@ -537,6 +538,8 @@ stmt :
| AlterTableStmt
| AlterRoleSetStmt
| AlterRoleStmt
| AlterTSConfigurationStmt
| AlterTSDictionaryStmt
| AlterUserSetStmt
| AlterUserStmt
| AnalyzeStmt
@ -2972,6 +2975,42 @@ DefineStmt:
n->vals = $7;
$$ = (Node *)n;
}
| CREATE TEXT SEARCH PARSER any_name definition
{
DefineStmt *n = makeNode(DefineStmt);
n->kind = OBJECT_TSPARSER;
n->args = NIL;
n->defnames = $5;
n->definition = $6;
$$ = (Node *)n;
}
| CREATE TEXT SEARCH DICTIONARY any_name definition
{
DefineStmt *n = makeNode(DefineStmt);
n->kind = OBJECT_TSDICTIONARY;
n->args = NIL;
n->defnames = $5;
n->definition = $6;
$$ = (Node *)n;
}
| CREATE TEXT SEARCH TEMPLATE any_name definition
{
DefineStmt *n = makeNode(DefineStmt);
n->kind = OBJECT_TSTEMPLATE;
n->args = NIL;
n->defnames = $5;
n->definition = $6;
$$ = (Node *)n;
}
| CREATE TEXT SEARCH CONFIGURATION any_name definition
{
DefineStmt *n = makeNode(DefineStmt);
n->kind = OBJECT_TSCONFIGURATION;
n->args = NIL;
n->defnames = $5;
n->definition = $6;
$$ = (Node *)n;
}
;
definition: '(' def_list ')' { $$ = $2; }
@ -3281,6 +3320,10 @@ drop_type: TABLE { $$ = OBJECT_TABLE; }
| DOMAIN_P { $$ = OBJECT_DOMAIN; }
| CONVERSION_P { $$ = OBJECT_CONVERSION; }
| SCHEMA { $$ = OBJECT_SCHEMA; }
| TEXT SEARCH PARSER { $$ = OBJECT_TSPARSER; }
| TEXT SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; }
| TEXT SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; }
| TEXT SEARCH CONFIGURATION { $$ = OBJECT_TSCONFIGURATION; }
;
any_name_list:
@ -3323,7 +3366,10 @@ TruncateStmt:
*
* COMMENT ON [ [ DATABASE | DOMAIN | INDEX | SEQUENCE | TABLE | TYPE | VIEW |
* CONVERSION | LANGUAGE | OPERATOR CLASS | LARGE OBJECT |
* CAST | COLUMN | SCHEMA | TABLESPACE | ROLE ] <objname> |
* CAST | COLUMN | SCHEMA | TABLESPACE | ROLE |
* TEXT SEARCH PARSER | TEXT SEARCH DICTIONARY |
* TEXT SEARCH TEMPLATE |
* TEXT SEARCH CONFIGURATION ] <objname> |
* AGGREGATE <aggname> (arg1, ...) |
* FUNCTION <funcname> (arg1, arg2, ...) |
* OPERATOR <op> (leftoperand_typ, rightoperand_typ) |
@ -3454,6 +3500,38 @@ CommentStmt:
n->comment = $7;
$$ = (Node *) n;
}
| COMMENT ON TEXT SEARCH PARSER any_name IS comment_text
{
CommentStmt *n = makeNode(CommentStmt);
n->objtype = OBJECT_TSPARSER;
n->objname = $6;
n->comment = $8;
$$ = (Node *) n;
}
| COMMENT ON TEXT SEARCH DICTIONARY any_name IS comment_text
{
CommentStmt *n = makeNode(CommentStmt);
n->objtype = OBJECT_TSDICTIONARY;
n->objname = $6;
n->comment = $8;
$$ = (Node *) n;
}
| COMMENT ON TEXT SEARCH TEMPLATE any_name IS comment_text
{
CommentStmt *n = makeNode(CommentStmt);
n->objtype = OBJECT_TSTEMPLATE;
n->objname = $6;
n->comment = $8;
$$ = (Node *) n;
}
| COMMENT ON TEXT SEARCH CONFIGURATION any_name IS comment_text
{
CommentStmt *n = makeNode(CommentStmt);
n->objtype = OBJECT_TSCONFIGURATION;
n->objname = $6;
n->comment = $8;
$$ = (Node *) n;
}
;
comment_type:
@ -4615,6 +4693,38 @@ RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name
n->newname = $6;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH PARSER any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
n->renameType = OBJECT_TSPARSER;
n->object = $5;
n->newname = $8;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH DICTIONARY any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
n->renameType = OBJECT_TSDICTIONARY;
n->object = $5;
n->newname = $8;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH TEMPLATE any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
n->renameType = OBJECT_TSTEMPLATE;
n->object = $5;
n->newname = $8;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
n->renameType = OBJECT_TSCONFIGURATION;
n->object = $5;
n->newname = $8;
$$ = (Node *)n;
}
;
opt_column: COLUMN { $$ = COLUMN; }
@ -4787,6 +4897,22 @@ AlterOwnerStmt: ALTER AGGREGATE func_name aggr_args OWNER TO RoleId
n->newowner = $6;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH DICTIONARY any_name OWNER TO RoleId
{
AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
n->objectType = OBJECT_TSDICTIONARY;
n->object = $5;
n->newowner = $8;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name OWNER TO RoleId
{
AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
n->objectType = OBJECT_TSCONFIGURATION;
n->object = $5;
n->newowner = $8;
$$ = (Node *)n;
}
;
@ -5380,6 +5506,89 @@ opt_as: AS {}
;
/*****************************************************************************
*
* Manipulate a text search dictionary or configuration
*
*****************************************************************************/
AlterTSDictionaryStmt:
ALTER TEXT SEARCH DICTIONARY any_name definition
{
AlterTSDictionaryStmt *n = makeNode(AlterTSDictionaryStmt);
n->dictname = $5;
n->options = $6;
$$ = (Node *)n;
}
;
AlterTSConfigurationStmt:
ALTER TEXT SEARCH CONFIGURATION any_name definition
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->options = $6;
$$ = (Node *)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list WITH any_name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = $9;
n->dicts = $11;
n->override = false;
n->replace = false;
$$ = (Node*)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list WITH any_name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = $9;
n->dicts = $11;
n->override = true;
n->replace = false;
$$ = (Node*)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name WITH any_name
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = NIL;
n->dicts = list_make2($9,$11);
n->override = false;
n->replace = true;
$$ = (Node*)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name WITH any_name
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = $9;
n->dicts = list_make2($11,$13);
n->override = false;
n->replace = true;
$$ = (Node*)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name DROP MAPPING FOR name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = $9;
n->missing_ok = false;
$$ = (Node*)n;
}
| ALTER TEXT SEARCH CONFIGURATION any_name DROP MAPPING IF_P EXISTS FOR name_list
{
AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
n->cfgname = $5;
n->tokentype = $11;
n->missing_ok = true;
$$ = (Node*)n;
}
;
/*****************************************************************************
*
* Manipulate a conversion
@ -8853,6 +9062,7 @@ unreserved_keyword:
| COMMIT
| COMMITTED
| CONCURRENTLY
| CONFIGURATION
| CONNECTION
| CONSTRAINTS
| CONTENT_P
@ -8876,6 +9086,7 @@ unreserved_keyword:
| DELETE_P
| DELIMITER
| DELIMITERS
| DICTIONARY
| DISABLE_P
| DISCARD
| DOCUMENT_P
@ -8933,6 +9144,7 @@ unreserved_keyword:
| LOCATION
| LOCK_P
| LOGIN_P
| MAPPING
| MATCH
| MAXVALUE
| MINUTE_P
@ -8961,6 +9173,7 @@ unreserved_keyword:
| OPTION
| OWNED
| OWNER
| PARSER
| PARTIAL
| PASSWORD
| PLANS
@ -8994,6 +9207,7 @@ unreserved_keyword:
| SAVEPOINT
| SCHEMA
| SCROLL
| SEARCH
| SECOND_P
| SECURITY
| SEQUENCE
@ -9020,6 +9234,7 @@ unreserved_keyword:
| TEMP
| TEMPLATE
| TEMPORARY
| TEXT
| TRANSACTION
| TRIGGER
| TRUNCATE

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.189 2007/06/18 21:40:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.190 2007/08/21 01:11:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -87,6 +87,7 @@ static const ScanKeyword ScanKeywords[] = {
{"commit", COMMIT, UNRESERVED_KEYWORD},
{"committed", COMMITTED, UNRESERVED_KEYWORD},
{"concurrently", CONCURRENTLY, UNRESERVED_KEYWORD},
{"configuration", CONFIGURATION, UNRESERVED_KEYWORD},
{"connection", CONNECTION, UNRESERVED_KEYWORD},
{"constraint", CONSTRAINT, RESERVED_KEYWORD},
{"constraints", CONSTRAINTS, UNRESERVED_KEYWORD},
@ -124,6 +125,7 @@ static const ScanKeyword ScanKeywords[] = {
{"delimiter", DELIMITER, UNRESERVED_KEYWORD},
{"delimiters", DELIMITERS, UNRESERVED_KEYWORD},
{"desc", DESC, RESERVED_KEYWORD},
{"dictionary", DICTIONARY, UNRESERVED_KEYWORD},
{"disable", DISABLE_P, UNRESERVED_KEYWORD},
{"discard", DISCARD, UNRESERVED_KEYWORD},
{"distinct", DISTINCT, RESERVED_KEYWORD},
@ -219,6 +221,7 @@ static const ScanKeyword ScanKeywords[] = {
{"location", LOCATION, UNRESERVED_KEYWORD},
{"lock", LOCK_P, UNRESERVED_KEYWORD},
{"login", LOGIN_P, UNRESERVED_KEYWORD},
{"mapping", MAPPING, UNRESERVED_KEYWORD},
{"match", MATCH, UNRESERVED_KEYWORD},
{"maxvalue", MAXVALUE, UNRESERVED_KEYWORD},
{"minute", MINUTE_P, UNRESERVED_KEYWORD},
@ -268,6 +271,7 @@ static const ScanKeyword ScanKeywords[] = {
{"overlay", OVERLAY, COL_NAME_KEYWORD},
{"owned", OWNED, UNRESERVED_KEYWORD},
{"owner", OWNER, UNRESERVED_KEYWORD},
{"parser", PARSER, UNRESERVED_KEYWORD},
{"partial", PARTIAL, UNRESERVED_KEYWORD},
{"password", PASSWORD, UNRESERVED_KEYWORD},
{"placing", PLACING, RESERVED_KEYWORD},
@ -310,6 +314,7 @@ static const ScanKeyword ScanKeywords[] = {
{"savepoint", SAVEPOINT, UNRESERVED_KEYWORD},
{"schema", SCHEMA, UNRESERVED_KEYWORD},
{"scroll", SCROLL, UNRESERVED_KEYWORD},
{"search", SEARCH, UNRESERVED_KEYWORD},
{"second", SECOND_P, UNRESERVED_KEYWORD},
{"security", SECURITY, UNRESERVED_KEYWORD},
{"select", SELECT, RESERVED_KEYWORD},
@ -345,6 +350,7 @@ static const ScanKeyword ScanKeywords[] = {
{"temp", TEMP, UNRESERVED_KEYWORD},
{"template", TEMPLATE, UNRESERVED_KEYWORD},
{"temporary", TEMPORARY, UNRESERVED_KEYWORD},
{"text", TEXT, UNRESERVED_KEYWORD},
{"then", THEN, RESERVED_KEYWORD},
{"time", TIME, COL_NAME_KEYWORD},
{"timestamp", TIMESTAMP, COL_NAME_KEYWORD},

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.155 2007/06/06 23:00:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.156 2007/08/21 01:11:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1554,6 +1554,8 @@ TypeCategory(Oid inType)
case (REGOPERATOROID):
case (REGCLASSOID):
case (REGTYPEOID):
case (REGCONFIGOID):
case (REGDICTIONARYOID):
case (INT2OID):
case (INT4OID):
case (INT8OID):
@ -1672,7 +1674,9 @@ IsPreferredType(CATEGORY category, Oid type)
type == REGOPEROID ||
type == REGOPERATOROID ||
type == REGCLASSOID ||
type == REGTYPEOID)
type == REGTYPEOID ||
type == REGCONFIGOID ||
type == REGDICTIONARYOID)
preftype = OIDOID;
else
preftype = FLOAT8OID;

View File

@ -0,0 +1,144 @@
#-------------------------------------------------------------------------
#
# Makefile for src/backend/snowball
#
# $PostgreSQL: pgsql/src/backend/snowball/Makefile,v 1.1 2007/08/21 01:11:15 tgl Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/snowball
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(top_srcdir)/src/include/snowball \
-I$(top_srcdir)/src/include/snowball/libstemmer $(CPPFLAGS)
OBJS= dict_snowball.o api.o utilities.o \
stem_ISO_8859_1_danish.o \
stem_ISO_8859_1_dutch.o \
stem_ISO_8859_1_english.o \
stem_ISO_8859_1_finnish.o \
stem_ISO_8859_1_french.o \
stem_ISO_8859_1_german.o \
stem_ISO_8859_1_hungarian.o \
stem_ISO_8859_1_italian.o \
stem_ISO_8859_1_norwegian.o \
stem_ISO_8859_1_porter.o \
stem_ISO_8859_1_portuguese.o \
stem_ISO_8859_1_spanish.o \
stem_ISO_8859_1_swedish.o \
stem_ISO_8859_2_romanian.o \
stem_KOI8_R_russian.o \
stem_UTF_8_danish.o \
stem_UTF_8_dutch.o \
stem_UTF_8_english.o \
stem_UTF_8_finnish.o \
stem_UTF_8_french.o \
stem_UTF_8_german.o \
stem_UTF_8_hungarian.o \
stem_UTF_8_italian.o \
stem_UTF_8_norwegian.o \
stem_UTF_8_porter.o \
stem_UTF_8_portuguese.o \
stem_UTF_8_romanian.o \
stem_UTF_8_russian.o \
stem_UTF_8_spanish.o \
stem_UTF_8_swedish.o \
stem_UTF_8_turkish.o
# second column is name of latin dictionary, if different
LANGUAGES= \
danish danish \
dutch dutch \
english english \
finnish finnish \
french french \
german german \
hungarian hungarian \
italian italian \
norwegian norwegian \
portuguese portuguese \
romanian romanian \
russian english \
spanish spanish \
swedish swedish \
turkish turkish \
SQLSCRIPT= snowball_create.sql
DICTDIR=tsearch_data
ifdef VPATH
override VPATH := $(srcdir)/libstemmer:$(VPATH)
else
VPATH = $(srcdir)/libstemmer
endif
SHLIB_LINK := $(BE_DLLLIBS)
NAME := dict_snowball
SO_MAJOR_VERSION := 0
SO_MINOR_VERSION := 0
rpath =
all: all-shared-lib $(SQLSCRIPT)
include $(top_srcdir)/src/Makefile.shlib
$(SQLSCRIPT): Makefile snowball_func.sql.in snowball.sql.in
ifeq ($(enable_shared), yes)
echo '-- Language-specific snowball dictionaries' > $@
cat $(srcdir)/snowball_func.sql.in >> $@
@set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
stop=", StopWords=$${lang}" ; \
else \
stop=""; \
fi; \
nonlatdictname=$$lang; \
latdictname=$$1; shift; \
cat $(srcdir)/snowball.sql.in | \
sed -e "s#_DICTNAME_#$$lang#g" | \
sed -e "s#_CFGNAME_#$$lang#g" | \
sed -e "s#_LATDICTNAME_#$$latdictname#g" | \
sed -e "s#_NONLATDICTNAME_#$$nonlatdictname#g" | \
sed -e "s#_STOPWORDS_#$$stop#g" ; \
done >> $@
else
echo "-- No language-specific snowball dictionaries, for lack of shared library support" > $@
endif
install: all installdirs
ifeq ($(enable_shared), yes)
$(INSTALL_SHLIB) $(shlib) '$(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX)'
endif
$(INSTALL_DATA) $(SQLSCRIPT) '$(DESTDIR)$(datadir)'
@set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; shift; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
$(INSTALL_DATA) $(srcdir)/stopwords/$${lang}.stop '$(DESTDIR)$(datadir)/$(DICTDIR)' ; \
fi \
done
installdirs:
$(mkinstalldirs) '$(DESTDIR)$(pkglibdir)' '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)'
uninstall:
rm -f '$(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX)'
rm -f '$(DESTDIR)$(datadir)/$(SQLSCRIPT)'
@set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; shift; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
rm -f '$(DESTDIR)$(datadir)/$(DICTDIR)/'$${lang}.stop ; \
fi \
done
clean distclean maintainer-clean: clean-lib
rm -f $(OBJS) $(SQLSCRIPT)

View File

@ -0,0 +1,47 @@
Snowball-based stemming
-----------------------
This module uses the word stemming code developed by the Snowball project,
http://snowball.tartarus.org/
which is released by them under a BSD-style license.
The files under src/backend/snowball/libstemmer/ and
src/include/snowball/libstemmer/ are taken directly from their libstemmer_c
distribution, with only some minor adjustments of file inclusions. Note
that most of these files are in fact derived files, not master source.
The master sources are in the Snowball language, and are available along
with the Snowball-to-C compiler from the Snowball project. We choose to
include the derived files in the PostgreSQL distribution because most
installations will not have the Snowball compiler available.
To update the PostgreSQL sources from a new Snowball libstemmer_c
distribution:
1. Copy the *.c files in libstemmer_c/src_c/ to src/backend/snowball/libstemmer
with replacement of "../runtime/header.h" by "header.h", for example
for f in libstemmer_c/src_c/*.c
do
sed 's|\.\./runtime/header\.h|header.h|' $f >libstemmer/`basename $f`
done
(Alternatively, if you rebuild the stemmer files from the master Snowball
sources, just omit "-r ../runtime" from the Snowball compiler switches.)
2. Copy the *.c files in libstemmer_c/runtime/ to
src/backend/snowball/libstemmer, and edit them to remove direct inclusions
of system headers such as <stdio.h> --- they should only include "header.h".
(This removal avoids portability problems on some platforms where <stdio.h>
is sensitive to largefile compilation options.)
3. Copy the *.h files in libstemmer_c/src_c/ and libstemmer_c/runtime/
to src/include/snowball/libstemmer. At this writing the header files
do not require any changes.
4. Check whether any stemmer modules have been added or removed. If so, edit
the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the
stemmer_modules[] table in dict_snowball.c.
5. The various stopword files in stopwords/ must be downloaded
individually from pages on the snowball.tartarus.org website.
Be careful that these files must be stored in UTF-8 encoding.

View File

@ -0,0 +1,326 @@
/*-------------------------------------------------------------------------
*
* dict_snowball.c
* Snowball dictionary
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.1 2007/08/21 01:11:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
/* Some platforms define MAXINT and/or MININT, causing conflicts */
#ifdef MAXINT
#undef MAXINT
#endif
#ifdef MININT
#undef MININT
#endif
/* Now we can include the original Snowball header.h */
#include "snowball/libstemmer/header.h"
#include "snowball/libstemmer/stem_ISO_8859_1_danish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_dutch.h"
#include "snowball/libstemmer/stem_ISO_8859_1_english.h"
#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_french.h"
#include "snowball/libstemmer/stem_ISO_8859_1_german.h"
#include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_italian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h"
#include "snowball/libstemmer/stem_ISO_8859_1_porter.h"
#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h"
#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h"
#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h"
#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h"
#include "snowball/libstemmer/stem_KOI8_R_russian.h"
#include "snowball/libstemmer/stem_UTF_8_danish.h"
#include "snowball/libstemmer/stem_UTF_8_dutch.h"
#include "snowball/libstemmer/stem_UTF_8_english.h"
#include "snowball/libstemmer/stem_UTF_8_finnish.h"
#include "snowball/libstemmer/stem_UTF_8_french.h"
#include "snowball/libstemmer/stem_UTF_8_german.h"
#include "snowball/libstemmer/stem_UTF_8_hungarian.h"
#include "snowball/libstemmer/stem_UTF_8_italian.h"
#include "snowball/libstemmer/stem_UTF_8_norwegian.h"
#include "snowball/libstemmer/stem_UTF_8_porter.h"
#include "snowball/libstemmer/stem_UTF_8_portuguese.h"
#include "snowball/libstemmer/stem_UTF_8_romanian.h"
#include "snowball/libstemmer/stem_UTF_8_russian.h"
#include "snowball/libstemmer/stem_UTF_8_spanish.h"
#include "snowball/libstemmer/stem_UTF_8_swedish.h"
#include "snowball/libstemmer/stem_UTF_8_turkish.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(dsnowball_init);
Datum dsnowball_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dsnowball_lexize);
Datum dsnowball_lexize(PG_FUNCTION_ARGS);
/* List of supported modules */
typedef struct stemmer_module
{
const char *name;
pg_enc enc;
struct SN_env *(*create) (void);
void (*close) (struct SN_env *);
int (*stem) (struct SN_env *);
} stemmer_module;
static const stemmer_module stemmer_modules[] =
{
/*
* Stemmers list from Snowball distribution
*/
{"danish", PG_LATIN1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
{"dutch", PG_LATIN1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
{"english", PG_LATIN1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
{"finnish", PG_LATIN1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
{"french", PG_LATIN1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
{"german", PG_LATIN1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
{"hungarian", PG_LATIN1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
{"italian", PG_LATIN1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
{"norwegian", PG_LATIN1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
{"porter", PG_LATIN1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem},
{"portuguese", PG_LATIN1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
{"spanish", PG_LATIN1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"swedish", PG_LATIN1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
{"romanian", PG_LATIN2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
{"russian", PG_KOI8R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
{"danish", PG_UTF8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
{"dutch", PG_UTF8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
{"english", PG_UTF8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
{"finnish", PG_UTF8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
{"french", PG_UTF8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
{"german", PG_UTF8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
{"hungarian", PG_UTF8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"italian", PG_UTF8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
{"norwegian", PG_UTF8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
{"porter", PG_UTF8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
{"portuguese", PG_UTF8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
{"romanian", PG_UTF8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
{"russian", PG_UTF8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"spanish", PG_UTF8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"swedish", PG_UTF8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
{"turkish", PG_UTF8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
/*
* Stemmer with PG_SQL_ASCII encoding should be valid for any server
* encoding
*/
{"english", PG_SQL_ASCII, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
{NULL, 0, NULL, NULL, NULL} /* list end marker */
};
typedef struct DictSnowball
{
struct SN_env *z;
StopList stoplist;
bool needrecode; /* needs recoding before/after call stem */
int (*stem) (struct SN_env * z);
/*
* snowball saves alloced memory between calls, so we should run it in our
* private memory context. Note, init function is executed in long lived
* context, so we just remember CurrentMemoryContext
*/
MemoryContext dictCtx;
} DictSnowball;
static void
locate_stem_module(DictSnowball * d, char *lang)
{
const stemmer_module *m;
/*
* First, try to find exact match of stemmer module. Stemmer with
* PG_SQL_ASCII encoding is treated as working with any server encoding
*/
for (m = stemmer_modules; m->name; m++)
{
if ((m->enc == PG_SQL_ASCII || m->enc == GetDatabaseEncoding()) &&
pg_strcasecmp(m->name, lang) == 0)
{
d->stem = m->stem;
d->z = m->create();
d->needrecode = false;
return;
}
}
/*
* Second, try to find stemmer for needed language for UTF8 encoding.
*/
for (m = stemmer_modules; m->name; m++)
{
if (m->enc == PG_UTF8 && pg_strcasecmp(m->name, lang) == 0)
{
d->stem = m->stem;
d->z = m->create();
d->needrecode = true;
return;
}
}
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("no Snowball stemmer available for language \"%s\" and encoding \"%s\"",
lang, GetDatabaseEncodingName())));
}
Datum
dsnowball_init(PG_FUNCTION_ARGS)
{
text *in;
DictSnowball *d;
Map *cfg,
*pcfg;
bool stoploaded = false;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for Snowball")));
in = PG_GETARG_TEXT_P(0);
d = (DictSnowball *) palloc0(sizeof(DictSnowball));
d->stoplist.wordop = recode_and_lowerstr;
parse_keyvalpairs(in, &cfg);
pcfg = cfg;
PG_FREE_IF_COPY(in, 0);
while (pcfg && pcfg->key)
{
if (pg_strcasecmp("StopWords", pcfg->key) == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(pcfg->value, &d->stoplist);
sortstoplist(&d->stoplist);
stoploaded = true;
}
else if (pg_strcasecmp("Language", pcfg->key) == 0)
{
if (d->stem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Language parameters")));
locate_stem_module(d, pcfg->value);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized Snowball parameter: \"%s\"",
pcfg->key)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (!d->stem)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing Language parameter")));
d->dictCtx = CurrentMemoryContext;
PG_RETURN_POINTER(d);
}
Datum
dsnowball_lexize(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt = lowerstr_with_len(in, len);
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
}
else
{
MemoryContext saveCtx;
/*
* recode to utf8 if stemmer is utf8 and doesn't match server encoding
*/
if (d->needrecode)
{
char *recoded;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
strlen(txt),
GetDatabaseEncoding(),
PG_UTF8);
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
if (recoded != txt)
{
pfree(txt);
txt = recoded;
}
}
/* see comment about d->dictCtx */
saveCtx = MemoryContextSwitchTo(d->dictCtx);
SN_set_current(d->z, strlen(txt), (symbol *) txt);
d->stem(d->z);
MemoryContextSwitchTo(saveCtx);
if (d->z->p && d->z->l)
{
txt = repalloc(txt, d->z->l + 1);
memcpy(txt, d->z->p, d->z->l);
txt[d->z->l] = '\0';
}
/* back recode if needed */
if (d->needrecode)
{
char *recoded;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt,
strlen(txt),
PG_UTF8,
GetDatabaseEncoding());
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
if (recoded != txt)
{
pfree(txt);
txt = recoded;
}
}
res->lexeme = txt;
}
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,64 @@
#include "header.h"
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
if (z == NULL) return NULL;
z->p = create_s();
if (z->p == NULL) goto error;
if (S_size)
{
int i;
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
if (z->S == NULL) goto error;
for (i = 0; i < S_size; i++)
{
z->S[i] = create_s();
if (z->S[i] == NULL) goto error;
}
}
if (I_size)
{
z->I = (int *) calloc(I_size, sizeof(int));
if (z->I == NULL) goto error;
}
if (B_size)
{
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
if (z->B == NULL) goto error;
}
return z;
error:
SN_close_env(z, S_size);
return NULL;
}
extern void SN_close_env(struct SN_env * z, int S_size)
{
if (z == NULL) return;
if (S_size)
{
int i;
for (i = 0; i < S_size; i++)
{
lose_s(z->S[i]);
}
free(z->S);
}
free(z->I);
free(z->B);
if (z->p) lose_s(z->p);
free(z);
}
extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
{
int err = replace_s(z, 0, z->l, size, s, NULL);
z->c = 0;
return err;
}

View File

@ -0,0 +1,337 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int danish_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_undouble(struct SN_env * z);
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * danish_ISO_8859_1_create_env(void);
extern void danish_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[3] = { 'h', 'e', 'd' };
static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
static const symbol s_0_3[1] = { 'e' };
static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
static const symbol s_0_7[3] = { 'e', 'n', 'e' };
static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
static const symbol s_0_9[3] = { 'e', 'r', 'e' };
static const symbol s_0_10[2] = { 'e', 'n' };
static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
static const symbol s_0_13[2] = { 'e', 'r' };
static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
static const symbol s_0_16[1] = { 's' };
static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
static const symbol s_0_18[2] = { 'e', 's' };
static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
static const symbol s_0_24[3] = { 'e', 'n', 's' };
static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
static const symbol s_0_27[3] = { 'e', 'r', 's' };
static const symbol s_0_28[3] = { 'e', 't', 's' };
static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
static const symbol s_0_30[2] = { 'e', 't' };
static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
static const struct among a_0[32] =
{
/* 0 */ { 3, s_0_0, -1, 1, 0},
/* 1 */ { 5, s_0_1, 0, 1, 0},
/* 2 */ { 4, s_0_2, -1, 1, 0},
/* 3 */ { 1, s_0_3, -1, 1, 0},
/* 4 */ { 5, s_0_4, 3, 1, 0},
/* 5 */ { 4, s_0_5, 3, 1, 0},
/* 6 */ { 6, s_0_6, 5, 1, 0},
/* 7 */ { 3, s_0_7, 3, 1, 0},
/* 8 */ { 4, s_0_8, 3, 1, 0},
/* 9 */ { 3, s_0_9, 3, 1, 0},
/* 10 */ { 2, s_0_10, -1, 1, 0},
/* 11 */ { 5, s_0_11, 10, 1, 0},
/* 12 */ { 4, s_0_12, 10, 1, 0},
/* 13 */ { 2, s_0_13, -1, 1, 0},
/* 14 */ { 5, s_0_14, 13, 1, 0},
/* 15 */ { 4, s_0_15, 13, 1, 0},
/* 16 */ { 1, s_0_16, -1, 2, 0},
/* 17 */ { 4, s_0_17, 16, 1, 0},
/* 18 */ { 2, s_0_18, 16, 1, 0},
/* 19 */ { 5, s_0_19, 18, 1, 0},
/* 20 */ { 7, s_0_20, 19, 1, 0},
/* 21 */ { 4, s_0_21, 18, 1, 0},
/* 22 */ { 5, s_0_22, 18, 1, 0},
/* 23 */ { 4, s_0_23, 18, 1, 0},
/* 24 */ { 3, s_0_24, 16, 1, 0},
/* 25 */ { 6, s_0_25, 24, 1, 0},
/* 26 */ { 5, s_0_26, 24, 1, 0},
/* 27 */ { 3, s_0_27, 16, 1, 0},
/* 28 */ { 3, s_0_28, 16, 1, 0},
/* 29 */ { 5, s_0_29, 28, 1, 0},
/* 30 */ { 2, s_0_30, -1, 1, 0},
/* 31 */ { 4, s_0_31, 30, 1, 0}
};
static const symbol s_1_0[2] = { 'g', 'd' };
static const symbol s_1_1[2] = { 'd', 't' };
static const symbol s_1_2[2] = { 'g', 't' };
static const symbol s_1_3[2] = { 'k', 't' };
static const struct among a_1[4] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 2, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0}
};
static const symbol s_2_0[2] = { 'i', 'g' };
static const symbol s_2_1[3] = { 'l', 'i', 'g' };
static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
static const symbol s_2_3[3] = { 'e', 'l', 's' };
static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' };
static const struct among a_2[5] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 4, s_2_2, 1, 1, 0},
/* 3 */ { 3, s_2_3, -1, 1, 0},
/* 4 */ { 4, s_2_4, -1, 2, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
static const symbol s_0[] = { 's', 't' };
static const symbol s_1[] = { 'i', 'g' };
static const symbol s_2[] = { 'l', 0xF8, 's' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 33 */
{ int ret = z->c + 3;
if (0 > ret || ret > z->l) return 0;
z->c = ret; /* hop, line 33 */
}
z->I[1] = z->c; /* setmark x, line 33 */
z->c = c_test;
}
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
{ /* gopast */ /* non v, line 34 */
int ret = in_grouping(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 34 */
/* try, line 35 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 41 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 41 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 41 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 41 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 48 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0;
{ int ret = slice_del(z); /* delete, line 50 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 55 */
{ int mlimit; /* setlimit, line 56 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 56 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
z->bra = z->c; /* ], line 56 */
z->lb = mlimit;
}
z->c = z->l - m_test;
}
if (z->c <= z->lb) return 0;
z->c--; /* next, line 62 */
z->bra = z->c; /* ], line 62 */
{ int ret = slice_del(z); /* delete, line 62 */
if (ret < 0) return ret;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
z->ket = z->c; /* [, line 66 */
if (!(eq_s_b(z, 2, s_0))) goto lab0;
z->bra = z->c; /* ], line 66 */
if (!(eq_s_b(z, 2, s_1))) goto lab0;
{ int ret = slice_del(z); /* delete, line 66 */
if (ret < 0) return ret;
}
lab0:
z->c = z->l - m1;
}
{ int mlimit; /* setlimit, line 67 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 67 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
z->ket = z->c; /* [, line 67 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 67 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 70 */
if (ret < 0) return ret;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m3;
}
break;
case 2:
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_undouble(struct SN_env * z) {
{ int mlimit; /* setlimit, line 76 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 76 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 76 */
if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 76 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
z->lb = mlimit;
}
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
{ int ret = slice_del(z); /* delete, line 78 */
if (ret < 0) return ret;
}
return 1;
}
extern int danish_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 84 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 84 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 86 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 88 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
{ int ret = r_undouble(z);
if (ret == 0) goto lab4; /* call undouble, line 89 */
if (ret < 0) return ret;
}
lab4:
z->c = z->l - m5;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); }
extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); }

View File

@ -0,0 +1,624 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int dutch_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_standard_suffix(struct SN_env * z);
static int r_undouble(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_en_ending(struct SN_env * z);
static int r_e_ending(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * dutch_ISO_8859_1_create_env(void);
extern void dutch_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_1[1] = { 0xE1 };
static const symbol s_0_2[1] = { 0xE4 };
static const symbol s_0_3[1] = { 0xE9 };
static const symbol s_0_4[1] = { 0xEB };
static const symbol s_0_5[1] = { 0xED };
static const symbol s_0_6[1] = { 0xEF };
static const symbol s_0_7[1] = { 0xF3 };
static const symbol s_0_8[1] = { 0xF6 };
static const symbol s_0_9[1] = { 0xFA };
static const symbol s_0_10[1] = { 0xFC };
static const struct among a_0[11] =
{
/* 0 */ { 0, 0, -1, 6, 0},
/* 1 */ { 1, s_0_1, 0, 1, 0},
/* 2 */ { 1, s_0_2, 0, 1, 0},
/* 3 */ { 1, s_0_3, 0, 2, 0},
/* 4 */ { 1, s_0_4, 0, 2, 0},
/* 5 */ { 1, s_0_5, 0, 3, 0},
/* 6 */ { 1, s_0_6, 0, 3, 0},
/* 7 */ { 1, s_0_7, 0, 4, 0},
/* 8 */ { 1, s_0_8, 0, 4, 0},
/* 9 */ { 1, s_0_9, 0, 5, 0},
/* 10 */ { 1, s_0_10, 0, 5, 0}
};
static const symbol s_1_1[1] = { 'I' };
static const symbol s_1_2[1] = { 'Y' };
static const struct among a_1[3] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 1, s_1_1, 0, 2, 0},
/* 2 */ { 1, s_1_2, 0, 1, 0}
};
static const symbol s_2_0[2] = { 'd', 'd' };
static const symbol s_2_1[2] = { 'k', 'k' };
static const symbol s_2_2[2] = { 't', 't' };
static const struct among a_2[3] =
{
/* 0 */ { 2, s_2_0, -1, -1, 0},
/* 1 */ { 2, s_2_1, -1, -1, 0},
/* 2 */ { 2, s_2_2, -1, -1, 0}
};
static const symbol s_3_0[3] = { 'e', 'n', 'e' };
static const symbol s_3_1[2] = { 's', 'e' };
static const symbol s_3_2[2] = { 'e', 'n' };
static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
static const symbol s_3_4[1] = { 's' };
static const struct among a_3[5] =
{
/* 0 */ { 3, s_3_0, -1, 2, 0},
/* 1 */ { 2, s_3_1, -1, 3, 0},
/* 2 */ { 2, s_3_2, -1, 2, 0},
/* 3 */ { 5, s_3_3, 2, 1, 0},
/* 4 */ { 1, s_3_4, -1, 3, 0}
};
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
static const symbol s_4_1[2] = { 'i', 'g' };
static const symbol s_4_2[3] = { 'i', 'n', 'g' };
static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
static const symbol s_4_5[3] = { 'b', 'a', 'r' };
static const struct among a_4[6] =
{
/* 0 */ { 3, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 4, s_4_3, -1, 3, 0},
/* 4 */ { 4, s_4_4, -1, 4, 0},
/* 5 */ { 3, s_4_5, -1, 5, 0}
};
static const symbol s_5_0[2] = { 'a', 'a' };
static const symbol s_5_1[2] = { 'e', 'e' };
static const symbol s_5_2[2] = { 'o', 'o' };
static const symbol s_5_3[2] = { 'u', 'u' };
static const struct among a_5[4] =
{
/* 0 */ { 2, s_5_0, -1, -1, 0},
/* 1 */ { 2, s_5_1, -1, -1, 0},
/* 2 */ { 2, s_5_2, -1, -1, 0},
/* 3 */ { 2, s_5_3, -1, -1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const symbol s_0[] = { 'a' };
static const symbol s_1[] = { 'e' };
static const symbol s_2[] = { 'i' };
static const symbol s_3[] = { 'o' };
static const symbol s_4[] = { 'u' };
static const symbol s_5[] = { 'y' };
static const symbol s_6[] = { 'Y' };
static const symbol s_7[] = { 'i' };
static const symbol s_8[] = { 'I' };
static const symbol s_9[] = { 'y' };
static const symbol s_10[] = { 'Y' };
static const symbol s_11[] = { 'y' };
static const symbol s_12[] = { 'i' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'g', 'e', 'm' };
static const symbol s_15[] = { 'h', 'e', 'i', 'd' };
static const symbol s_16[] = { 'h', 'e', 'i', 'd' };
static const symbol s_17[] = { 'c' };
static const symbol s_18[] = { 'e', 'n' };
static const symbol s_19[] = { 'i', 'g' };
static const symbol s_20[] = { 'e' };
static const symbol s_21[] = { 'e' };
static int r_prelude(struct SN_env * z) {
int among_var;
{ int c_test = z->c; /* test, line 42 */
while(1) { /* repeat, line 42 */
int c1 = z->c;
z->bra = z->c; /* [, line 43 */
if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((340306450 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else
among_var = find_among(z, a_0, 11); /* substring, line 43 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 43 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */
if (ret < 0) return ret;
}
break;
case 6:
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 54 */
break;
}
continue;
lab0:
z->c = c1;
break;
}
z->c = c_test;
}
{ int c_keep = z->c; /* try, line 57 */
z->bra = z->c; /* [, line 57 */
if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; }
z->ket = z->c; /* ], line 57 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */
if (ret < 0) return ret;
}
lab1:
;
}
while(1) { /* repeat, line 58 */
int c2 = z->c;
while(1) { /* goto, line 58 */
int c3 = z->c;
if (in_grouping(z, g_v, 97, 232, 0)) goto lab3;
z->bra = z->c; /* [, line 59 */
{ int c4 = z->c; /* or, line 59 */
if (!(eq_s(z, 1, s_7))) goto lab5;
z->ket = z->c; /* ], line 59 */
if (in_grouping(z, g_v, 97, 232, 0)) goto lab5;
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */
if (ret < 0) return ret;
}
goto lab4;
lab5:
z->c = c4;
if (!(eq_s(z, 1, s_9))) goto lab3;
z->ket = z->c; /* ], line 60 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */
if (ret < 0) return ret;
}
}
lab4:
z->c = c3;
break;
lab3:
z->c = c3;
if (z->c >= z->l) goto lab2;
z->c++; /* goto, line 58 */
}
continue;
lab2:
z->c = c2;
break;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ /* gopast */ /* grouping v, line 69 */
int ret = out_grouping(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 69 */
int ret = in_grouping(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 69 */
/* try, line 70 */
if (!(z->I[0] < 3)) goto lab0;
z->I[0] = 3;
lab0:
{ /* gopast */ /* grouping v, line 71 */
int ret = out_grouping(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 71 */
int ret = in_grouping(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 71 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 75 */
int c1 = z->c;
z->bra = z->c; /* [, line 77 */
if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else
among_var = find_among(z, a_1, 3); /* substring, line 77 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 77 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */
if (ret < 0) return ret;
}
break;
case 3:
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 80 */
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_undouble(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 91 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
z->c = z->l - m_test;
}
z->ket = z->c; /* [, line 91 */
if (z->c <= z->lb) return 0;
z->c--; /* next, line 91 */
z->bra = z->c; /* ], line 91 */
{ int ret = slice_del(z); /* delete, line 91 */
if (ret < 0) return ret;
}
return 1;
}
static int r_e_ending(struct SN_env * z) {
z->B[0] = 0; /* unset e_found, line 95 */
z->ket = z->c; /* [, line 96 */
if (!(eq_s_b(z, 1, s_13))) return 0;
z->bra = z->c; /* ], line 96 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 96 */
if (ret < 0) return ret;
}
{ int m_test = z->l - z->c; /* test, line 96 */
if (out_grouping_b(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set e_found, line 97 */
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 98 */
if (ret < 0) return ret;
}
return 1;
}
static int r_en_ending(struct SN_env * z) {
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
}
{ int m1 = z->l - z->c; (void)m1; /* and, line 102 */
if (out_grouping_b(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* not, line 102 */
if (!(eq_s_b(z, 3, s_14))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
}
{ int ret = slice_del(z); /* delete, line 102 */
if (ret < 0) return ret;
}
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 103 */
if (ret < 0) return ret;
}
return 1;
}
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 107 */
z->ket = z->c; /* [, line 108 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 108 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 110 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = r_en_ending(z);
if (ret == 0) goto lab0; /* call en_ending, line 113 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 116 */
if (ret < 0) return ret;
}
if (out_grouping_b(z, g_v_j, 97, 232, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 116 */
if (ret < 0) return ret;
}
break;
}
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 120 */
{ int ret = r_e_ending(z);
if (ret == 0) goto lab1; /* call e_ending, line 120 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 122 */
z->ket = z->c; /* [, line 122 */
if (!(eq_s_b(z, 4, s_16))) goto lab2;
z->bra = z->c; /* ], line 122 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 122 */
if (ret < 0) return ret;
}
{ int m4 = z->l - z->c; (void)m4; /* not, line 122 */
if (!(eq_s_b(z, 1, s_17))) goto lab3;
goto lab2;
lab3:
z->c = z->l - m4;
}
{ int ret = slice_del(z); /* delete, line 122 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 123 */
if (!(eq_s_b(z, 2, s_18))) goto lab2;
z->bra = z->c; /* ], line 123 */
{ int ret = r_en_ending(z);
if (ret == 0) goto lab2; /* call en_ending, line 123 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 126 */
z->ket = z->c; /* [, line 127 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4;
among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
if (!(among_var)) goto lab4;
z->bra = z->c; /* ], line 127 */
switch(among_var) {
case 0: goto lab4;
case 1:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 129 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 129 */
if (ret < 0) return ret;
}
{ int m6 = z->l - z->c; (void)m6; /* or, line 130 */
z->ket = z->c; /* [, line 130 */
if (!(eq_s_b(z, 2, s_19))) goto lab6;
z->bra = z->c; /* ], line 130 */
{ int ret = r_R2(z);
if (ret == 0) goto lab6; /* call R2, line 130 */
if (ret < 0) return ret;
}
{ int m7 = z->l - z->c; (void)m7; /* not, line 130 */
if (!(eq_s_b(z, 1, s_20))) goto lab7;
goto lab6;
lab7:
z->c = z->l - m7;
}
{ int ret = slice_del(z); /* delete, line 130 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = z->l - m6;
{ int ret = r_undouble(z);
if (ret == 0) goto lab4; /* call undouble, line 130 */
if (ret < 0) return ret;
}
}
lab5:
break;
case 2:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 133 */
if (ret < 0) return ret;
}
{ int m8 = z->l - z->c; (void)m8; /* not, line 133 */
if (!(eq_s_b(z, 1, s_21))) goto lab8;
goto lab4;
lab8:
z->c = z->l - m8;
}
{ int ret = slice_del(z); /* delete, line 133 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 136 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 136 */
if (ret < 0) return ret;
}
{ int ret = r_e_ending(z);
if (ret == 0) goto lab4; /* call e_ending, line 136 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 139 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 139 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 142 */
if (ret < 0) return ret;
}
if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
{ int ret = slice_del(z); /* delete, line 142 */
if (ret < 0) return ret;
}
break;
}
lab4:
z->c = z->l - m5;
}
{ int m9 = z->l - z->c; (void)m9; /* do, line 146 */
if (out_grouping_b(z, g_v_I, 73, 232, 0)) goto lab9;
{ int m_test = z->l - z->c; /* test, line 148 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9;
if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
if (out_grouping_b(z, g_v, 97, 232, 0)) goto lab9;
z->c = z->l - m_test;
}
z->ket = z->c; /* [, line 152 */
if (z->c <= z->lb) goto lab9;
z->c--; /* next, line 152 */
z->bra = z->c; /* ], line 152 */
{ int ret = slice_del(z); /* delete, line 152 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m9;
}
return 1;
}
extern int dutch_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 159 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 159 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 160 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 160 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 161 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 162 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 162 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 163 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 163 */
if (ret < 0) return ret;
}
lab3:
z->c = c4;
}
return 1;
}
extern struct SN_env * dutch_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); }
extern void dutch_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,762 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int finnish_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_tidy(struct SN_env * z);
static int r_other_endings(struct SN_env * z);
static int r_t_plural(struct SN_env * z);
static int r_i_plural(struct SN_env * z);
static int r_case_ending(struct SN_env * z);
static int r_VI(struct SN_env * z);
static int r_LONG(struct SN_env * z);
static int r_possessive(struct SN_env * z);
static int r_particle_etc(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * finnish_ISO_8859_1_create_env(void);
extern void finnish_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[2] = { 'p', 'a' };
static const symbol s_0_1[3] = { 's', 't', 'i' };
static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' };
static const symbol s_0_3[3] = { 'h', 'a', 'n' };
static const symbol s_0_4[3] = { 'k', 'i', 'n' };
static const symbol s_0_5[3] = { 'h', 0xE4, 'n' };
static const symbol s_0_6[4] = { 'k', 0xE4, 0xE4, 'n' };
static const symbol s_0_7[2] = { 'k', 'o' };
static const symbol s_0_8[2] = { 'p', 0xE4 };
static const symbol s_0_9[2] = { 'k', 0xF6 };
static const struct among a_0[10] =
{
/* 0 */ { 2, s_0_0, -1, 1, 0},
/* 1 */ { 3, s_0_1, -1, 2, 0},
/* 2 */ { 4, s_0_2, -1, 1, 0},
/* 3 */ { 3, s_0_3, -1, 1, 0},
/* 4 */ { 3, s_0_4, -1, 1, 0},
/* 5 */ { 3, s_0_5, -1, 1, 0},
/* 6 */ { 4, s_0_6, -1, 1, 0},
/* 7 */ { 2, s_0_7, -1, 1, 0},
/* 8 */ { 2, s_0_8, -1, 1, 0},
/* 9 */ { 2, s_0_9, -1, 1, 0}
};
static const symbol s_1_0[3] = { 'l', 'l', 'a' };
static const symbol s_1_1[2] = { 'n', 'a' };
static const symbol s_1_2[3] = { 's', 's', 'a' };
static const symbol s_1_3[2] = { 't', 'a' };
static const symbol s_1_4[3] = { 'l', 't', 'a' };
static const symbol s_1_5[3] = { 's', 't', 'a' };
static const struct among a_1[6] =
{
/* 0 */ { 3, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 3, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0},
/* 4 */ { 3, s_1_4, 3, -1, 0},
/* 5 */ { 3, s_1_5, 3, -1, 0}
};
static const symbol s_2_0[3] = { 'l', 'l', 0xE4 };
static const symbol s_2_1[2] = { 'n', 0xE4 };
static const symbol s_2_2[3] = { 's', 's', 0xE4 };
static const symbol s_2_3[2] = { 't', 0xE4 };
static const symbol s_2_4[3] = { 'l', 't', 0xE4 };
static const symbol s_2_5[3] = { 's', 't', 0xE4 };
static const struct among a_2[6] =
{
/* 0 */ { 3, s_2_0, -1, -1, 0},
/* 1 */ { 2, s_2_1, -1, -1, 0},
/* 2 */ { 3, s_2_2, -1, -1, 0},
/* 3 */ { 2, s_2_3, -1, -1, 0},
/* 4 */ { 3, s_2_4, 3, -1, 0},
/* 5 */ { 3, s_2_5, 3, -1, 0}
};
static const symbol s_3_0[3] = { 'l', 'l', 'e' };
static const symbol s_3_1[3] = { 'i', 'n', 'e' };
static const struct among a_3[2] =
{
/* 0 */ { 3, s_3_0, -1, -1, 0},
/* 1 */ { 3, s_3_1, -1, -1, 0}
};
static const symbol s_4_0[3] = { 'n', 's', 'a' };
static const symbol s_4_1[3] = { 'm', 'm', 'e' };
static const symbol s_4_2[3] = { 'n', 'n', 'e' };
static const symbol s_4_3[2] = { 'n', 'i' };
static const symbol s_4_4[2] = { 's', 'i' };
static const symbol s_4_5[2] = { 'a', 'n' };
static const symbol s_4_6[2] = { 'e', 'n' };
static const symbol s_4_7[2] = { 0xE4, 'n' };
static const symbol s_4_8[3] = { 'n', 's', 0xE4 };
static const struct among a_4[9] =
{
/* 0 */ { 3, s_4_0, -1, 3, 0},
/* 1 */ { 3, s_4_1, -1, 3, 0},
/* 2 */ { 3, s_4_2, -1, 3, 0},
/* 3 */ { 2, s_4_3, -1, 2, 0},
/* 4 */ { 2, s_4_4, -1, 1, 0},
/* 5 */ { 2, s_4_5, -1, 4, 0},
/* 6 */ { 2, s_4_6, -1, 6, 0},
/* 7 */ { 2, s_4_7, -1, 5, 0},
/* 8 */ { 3, s_4_8, -1, 3, 0}
};
static const symbol s_5_0[2] = { 'a', 'a' };
static const symbol s_5_1[2] = { 'e', 'e' };
static const symbol s_5_2[2] = { 'i', 'i' };
static const symbol s_5_3[2] = { 'o', 'o' };
static const symbol s_5_4[2] = { 'u', 'u' };
static const symbol s_5_5[2] = { 0xE4, 0xE4 };
static const symbol s_5_6[2] = { 0xF6, 0xF6 };
static const struct among a_5[7] =
{
/* 0 */ { 2, s_5_0, -1, -1, 0},
/* 1 */ { 2, s_5_1, -1, -1, 0},
/* 2 */ { 2, s_5_2, -1, -1, 0},
/* 3 */ { 2, s_5_3, -1, -1, 0},
/* 4 */ { 2, s_5_4, -1, -1, 0},
/* 5 */ { 2, s_5_5, -1, -1, 0},
/* 6 */ { 2, s_5_6, -1, -1, 0}
};
static const symbol s_6_0[1] = { 'a' };
static const symbol s_6_1[3] = { 'l', 'l', 'a' };
static const symbol s_6_2[2] = { 'n', 'a' };
static const symbol s_6_3[3] = { 's', 's', 'a' };
static const symbol s_6_4[2] = { 't', 'a' };
static const symbol s_6_5[3] = { 'l', 't', 'a' };
static const symbol s_6_6[3] = { 's', 't', 'a' };
static const symbol s_6_7[3] = { 't', 't', 'a' };
static const symbol s_6_8[3] = { 'l', 'l', 'e' };
static const symbol s_6_9[3] = { 'i', 'n', 'e' };
static const symbol s_6_10[3] = { 'k', 's', 'i' };
static const symbol s_6_11[1] = { 'n' };
static const symbol s_6_12[3] = { 'h', 'a', 'n' };
static const symbol s_6_13[3] = { 'd', 'e', 'n' };
static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' };
static const symbol s_6_15[3] = { 'h', 'e', 'n' };
static const symbol s_6_16[4] = { 't', 't', 'e', 'n' };
static const symbol s_6_17[3] = { 'h', 'i', 'n' };
static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' };
static const symbol s_6_19[3] = { 'h', 'o', 'n' };
static const symbol s_6_20[3] = { 'h', 0xE4, 'n' };
static const symbol s_6_21[3] = { 'h', 0xF6, 'n' };
static const symbol s_6_22[1] = { 0xE4 };
static const symbol s_6_23[3] = { 'l', 'l', 0xE4 };
static const symbol s_6_24[2] = { 'n', 0xE4 };
static const symbol s_6_25[3] = { 's', 's', 0xE4 };
static const symbol s_6_26[2] = { 't', 0xE4 };
static const symbol s_6_27[3] = { 'l', 't', 0xE4 };
static const symbol s_6_28[3] = { 's', 't', 0xE4 };
static const symbol s_6_29[3] = { 't', 't', 0xE4 };
static const struct among a_6[30] =
{
/* 0 */ { 1, s_6_0, -1, 8, 0},
/* 1 */ { 3, s_6_1, 0, -1, 0},
/* 2 */ { 2, s_6_2, 0, -1, 0},
/* 3 */ { 3, s_6_3, 0, -1, 0},
/* 4 */ { 2, s_6_4, 0, -1, 0},
/* 5 */ { 3, s_6_5, 4, -1, 0},
/* 6 */ { 3, s_6_6, 4, -1, 0},
/* 7 */ { 3, s_6_7, 4, 9, 0},
/* 8 */ { 3, s_6_8, -1, -1, 0},
/* 9 */ { 3, s_6_9, -1, -1, 0},
/* 10 */ { 3, s_6_10, -1, -1, 0},
/* 11 */ { 1, s_6_11, -1, 7, 0},
/* 12 */ { 3, s_6_12, 11, 1, 0},
/* 13 */ { 3, s_6_13, 11, -1, r_VI},
/* 14 */ { 4, s_6_14, 11, -1, r_LONG},
/* 15 */ { 3, s_6_15, 11, 2, 0},
/* 16 */ { 4, s_6_16, 11, -1, r_VI},
/* 17 */ { 3, s_6_17, 11, 3, 0},
/* 18 */ { 4, s_6_18, 11, -1, r_VI},
/* 19 */ { 3, s_6_19, 11, 4, 0},
/* 20 */ { 3, s_6_20, 11, 5, 0},
/* 21 */ { 3, s_6_21, 11, 6, 0},
/* 22 */ { 1, s_6_22, -1, 8, 0},
/* 23 */ { 3, s_6_23, 22, -1, 0},
/* 24 */ { 2, s_6_24, 22, -1, 0},
/* 25 */ { 3, s_6_25, 22, -1, 0},
/* 26 */ { 2, s_6_26, 22, -1, 0},
/* 27 */ { 3, s_6_27, 26, -1, 0},
/* 28 */ { 3, s_6_28, 26, -1, 0},
/* 29 */ { 3, s_6_29, 26, 9, 0}
};
static const symbol s_7_0[3] = { 'e', 'j', 'a' };
static const symbol s_7_1[3] = { 'm', 'm', 'a' };
static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' };
static const symbol s_7_3[3] = { 'm', 'p', 'a' };
static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' };
static const symbol s_7_5[3] = { 'm', 'm', 'i' };
static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' };
static const symbol s_7_7[3] = { 'm', 'p', 'i' };
static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' };
static const symbol s_7_9[3] = { 'e', 'j', 0xE4 };
static const symbol s_7_10[3] = { 'm', 'm', 0xE4 };
static const symbol s_7_11[4] = { 'i', 'm', 'm', 0xE4 };
static const symbol s_7_12[3] = { 'm', 'p', 0xE4 };
static const symbol s_7_13[4] = { 'i', 'm', 'p', 0xE4 };
static const struct among a_7[14] =
{
/* 0 */ { 3, s_7_0, -1, -1, 0},
/* 1 */ { 3, s_7_1, -1, 1, 0},
/* 2 */ { 4, s_7_2, 1, -1, 0},
/* 3 */ { 3, s_7_3, -1, 1, 0},
/* 4 */ { 4, s_7_4, 3, -1, 0},
/* 5 */ { 3, s_7_5, -1, 1, 0},
/* 6 */ { 4, s_7_6, 5, -1, 0},
/* 7 */ { 3, s_7_7, -1, 1, 0},
/* 8 */ { 4, s_7_8, 7, -1, 0},
/* 9 */ { 3, s_7_9, -1, -1, 0},
/* 10 */ { 3, s_7_10, -1, 1, 0},
/* 11 */ { 4, s_7_11, 10, -1, 0},
/* 12 */ { 3, s_7_12, -1, 1, 0},
/* 13 */ { 4, s_7_13, 12, -1, 0}
};
static const symbol s_8_0[1] = { 'i' };
static const symbol s_8_1[1] = { 'j' };
static const struct among a_8[2] =
{
/* 0 */ { 1, s_8_0, -1, -1, 0},
/* 1 */ { 1, s_8_1, -1, -1, 0}
};
static const symbol s_9_0[3] = { 'm', 'm', 'a' };
static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' };
static const struct among a_9[2] =
{
/* 0 */ { 3, s_9_0, -1, 1, 0},
/* 1 */ { 4, s_9_1, 0, -1, 0}
};
static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 };
static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const symbol s_0[] = { 'k' };
static const symbol s_1[] = { 'k', 's', 'e' };
static const symbol s_2[] = { 'k', 's', 'i' };
static const symbol s_3[] = { 'i' };
static const symbol s_4[] = { 'a' };
static const symbol s_5[] = { 'e' };
static const symbol s_6[] = { 'i' };
static const symbol s_7[] = { 'o' };
static const symbol s_8[] = { 0xE4 };
static const symbol s_9[] = { 0xF6 };
static const symbol s_10[] = { 'i', 'e' };
static const symbol s_11[] = { 'e' };
static const symbol s_12[] = { 'p', 'o' };
static const symbol s_13[] = { 't' };
static const symbol s_14[] = { 'p', 'o' };
static const symbol s_15[] = { 'j' };
static const symbol s_16[] = { 'o' };
static const symbol s_17[] = { 'u' };
static const symbol s_18[] = { 'o' };
static const symbol s_19[] = { 'j' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */
{ /* gopast */ /* non V1, line 46 */
int ret = in_grouping(z, g_V1, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 46 */
if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */
{ /* gopast */ /* non V1, line 47 */
int ret = in_grouping(z, g_V1, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 47 */
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_particle_etc(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 55 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 55 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 55 */
among_var = find_among_b(z, a_0, 10); /* substring, line 55 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 55 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
if (in_grouping_b(z, g_particle_end, 97, 246, 0)) return 0;
break;
case 2:
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 64 */
if (ret < 0) return ret;
}
break;
}
{ int ret = slice_del(z); /* delete, line 66 */
if (ret < 0) return ret;
}
return 1;
}
static int r_possessive(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 69 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 69 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 69 */
among_var = find_among_b(z, a_4, 9); /* substring, line 69 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 69 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m2 = z->l - z->c; (void)m2; /* not, line 72 */
if (!(eq_s_b(z, 1, s_0))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
{ int ret = slice_del(z); /* delete, line 72 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 74 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 74 */
if (!(eq_s_b(z, 3, s_1))) return 0;
z->bra = z->c; /* ], line 74 */
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 78 */
if (ret < 0) return ret;
}
break;
case 4:
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0;
if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */
{ int ret = slice_del(z); /* delete, line 81 */
if (ret < 0) return ret;
}
break;
case 5:
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 228) return 0;
if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */
{ int ret = slice_del(z); /* delete, line 84 */
if (ret < 0) return ret;
}
break;
case 6:
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0;
if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */
{ int ret = slice_del(z); /* delete, line 86 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_LONG(struct SN_env * z) {
if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */
return 1;
}
static int r_VI(struct SN_env * z) {
if (!(eq_s_b(z, 1, s_3))) return 0;
if (in_grouping_b(z, g_V2, 97, 246, 0)) return 0;
return 1;
}
static int r_case_ending(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 96 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 96 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 96 */
among_var = find_among_b(z, a_6, 30); /* substring, line 96 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 96 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
if (!(eq_s_b(z, 1, s_4))) return 0;
break;
case 2:
if (!(eq_s_b(z, 1, s_5))) return 0;
break;
case 3:
if (!(eq_s_b(z, 1, s_6))) return 0;
break;
case 4:
if (!(eq_s_b(z, 1, s_7))) return 0;
break;
case 5:
if (!(eq_s_b(z, 1, s_8))) return 0;
break;
case 6:
if (!(eq_s_b(z, 1, s_9))) return 0;
break;
case 7:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
{ int m2 = z->l - z->c; (void)m2; /* and, line 113 */
{ int m3 = z->l - z->c; (void)m3; /* or, line 112 */
{ int ret = r_LONG(z);
if (ret == 0) goto lab2; /* call LONG, line 111 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
z->c = z->l - m3;
if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; }
}
lab1:
z->c = z->l - m2;
if (z->c <= z->lb) { z->c = z->l - m_keep; goto lab0; }
z->c--; /* next, line 113 */
}
z->bra = z->c; /* ], line 113 */
lab0:
;
}
break;
case 8:
if (in_grouping_b(z, g_V1, 97, 246, 0)) return 0;
if (out_grouping_b(z, g_V1, 97, 246, 0)) return 0;
break;
case 9:
if (!(eq_s_b(z, 1, s_11))) return 0;
break;
}
{ int ret = slice_del(z); /* delete, line 138 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set ending_removed, line 139 */
return 1;
}
static int r_other_endings(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 142 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[1]) return 0;
z->c = z->I[1]; /* tomark, line 142 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 142 */
among_var = find_among_b(z, a_7, 14); /* substring, line 142 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 142 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m2 = z->l - z->c; (void)m2; /* not, line 146 */
if (!(eq_s_b(z, 2, s_12))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
break;
}
{ int ret = slice_del(z); /* delete, line 151 */
if (ret < 0) return ret;
}
return 1;
}
static int r_i_plural(struct SN_env * z) {
{ int mlimit; /* setlimit, line 154 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 154 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 154 */
if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */
z->bra = z->c; /* ], line 154 */
z->lb = mlimit;
}
{ int ret = slice_del(z); /* delete, line 158 */
if (ret < 0) return ret;
}
return 1;
}
static int r_t_plural(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 161 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 161 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 162 */
if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 162 */
{ int m_test = z->l - z->c; /* test, line 162 */
if (in_grouping_b(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; }
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 163 */
if (ret < 0) return ret;
}
z->lb = mlimit;
}
{ int mlimit; /* setlimit, line 165 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[1]) return 0;
z->c = z->I[1]; /* tomark, line 165 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
z->ket = z->c; /* [, line 165 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_9, 2); /* substring, line 165 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 165 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m3 = z->l - z->c; (void)m3; /* not, line 167 */
if (!(eq_s_b(z, 2, s_14))) goto lab0;
return 0;
lab0:
z->c = z->l - m3;
}
break;
}
{ int ret = slice_del(z); /* delete, line 170 */
if (ret < 0) return ret;
}
return 1;
}
static int r_tidy(struct SN_env * z) {
{ int mlimit; /* setlimit, line 173 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 173 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* do, line 174 */
{ int m3 = z->l - z->c; (void)m3; /* and, line 174 */
{ int ret = r_LONG(z);
if (ret == 0) goto lab0; /* call LONG, line 174 */
if (ret < 0) return ret;
}
z->c = z->l - m3;
z->ket = z->c; /* [, line 174 */
if (z->c <= z->lb) goto lab0;
z->c--; /* next, line 174 */
z->bra = z->c; /* ], line 174 */
{ int ret = slice_del(z); /* delete, line 174 */
if (ret < 0) return ret;
}
}
lab0:
z->c = z->l - m2;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 175 */
z->ket = z->c; /* [, line 175 */
if (in_grouping_b(z, g_AEI, 97, 228, 0)) goto lab1;
z->bra = z->c; /* ], line 175 */
if (out_grouping_b(z, g_V1, 97, 246, 0)) goto lab1;
{ int ret = slice_del(z); /* delete, line 175 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 176 */
z->ket = z->c; /* [, line 176 */
if (!(eq_s_b(z, 1, s_15))) goto lab2;
z->bra = z->c; /* ], line 176 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 176 */
if (!(eq_s_b(z, 1, s_16))) goto lab4;
goto lab3;
lab4:
z->c = z->l - m6;
if (!(eq_s_b(z, 1, s_17))) goto lab2;
}
lab3:
{ int ret = slice_del(z); /* delete, line 176 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m5;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 177 */
z->ket = z->c; /* [, line 177 */
if (!(eq_s_b(z, 1, s_18))) goto lab5;
z->bra = z->c; /* ], line 177 */
if (!(eq_s_b(z, 1, s_19))) goto lab5;
{ int ret = slice_del(z); /* delete, line 177 */
if (ret < 0) return ret;
}
lab5:
z->c = z->l - m7;
}
z->lb = mlimit;
}
if (in_grouping_b(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */
z->ket = z->c; /* [, line 179 */
if (z->c <= z->lb) return 0;
z->c--; /* next, line 179 */
z->bra = z->c; /* ], line 179 */
z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */
if (z->S[0] == 0) return -1; /* -> x, line 179 */
if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */
{ int ret = slice_del(z); /* delete, line 179 */
if (ret < 0) return ret;
}
return 1;
}
extern int finnish_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 185 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 185 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->B[0] = 0; /* unset ending_removed, line 186 */
z->lb = z->c; z->c = z->l; /* backwards, line 187 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 188 */
{ int ret = r_particle_etc(z);
if (ret == 0) goto lab1; /* call particle_etc, line 188 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 189 */
{ int ret = r_possessive(z);
if (ret == 0) goto lab2; /* call possessive, line 189 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 190 */
{ int ret = r_case_ending(z);
if (ret == 0) goto lab3; /* call case_ending, line 190 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 191 */
{ int ret = r_other_endings(z);
if (ret == 0) goto lab4; /* call other_endings, line 191 */
if (ret < 0) return ret;
}
lab4:
z->c = z->l - m5;
}
{ int m6 = z->l - z->c; (void)m6; /* or, line 192 */
if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */
{ int m7 = z->l - z->c; (void)m7; /* do, line 192 */
{ int ret = r_i_plural(z);
if (ret == 0) goto lab7; /* call i_plural, line 192 */
if (ret < 0) return ret;
}
lab7:
z->c = z->l - m7;
}
goto lab5;
lab6:
z->c = z->l - m6;
{ int m8 = z->l - z->c; (void)m8; /* do, line 192 */
{ int ret = r_t_plural(z);
if (ret == 0) goto lab8; /* call t_plural, line 192 */
if (ret < 0) return ret;
}
lab8:
z->c = z->l - m8;
}
}
lab5:
{ int m9 = z->l - z->c; (void)m9; /* do, line 193 */
{ int ret = r_tidy(z);
if (ret == 0) goto lab9; /* call tidy, line 193 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m9;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * finnish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 1); }
extern void finnish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,503 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int german_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_standard_suffix(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * german_ISO_8859_1_create_env(void);
extern void german_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_1[1] = { 'U' };
static const symbol s_0_2[1] = { 'Y' };
static const symbol s_0_3[1] = { 0xE4 };
static const symbol s_0_4[1] = { 0xF6 };
static const symbol s_0_5[1] = { 0xFC };
static const struct among a_0[6] =
{
/* 0 */ { 0, 0, -1, 6, 0},
/* 1 */ { 1, s_0_1, 0, 2, 0},
/* 2 */ { 1, s_0_2, 0, 1, 0},
/* 3 */ { 1, s_0_3, 0, 3, 0},
/* 4 */ { 1, s_0_4, 0, 4, 0},
/* 5 */ { 1, s_0_5, 0, 5, 0}
};
static const symbol s_1_0[1] = { 'e' };
static const symbol s_1_1[2] = { 'e', 'm' };
static const symbol s_1_2[2] = { 'e', 'n' };
static const symbol s_1_3[3] = { 'e', 'r', 'n' };
static const symbol s_1_4[2] = { 'e', 'r' };
static const symbol s_1_5[1] = { 's' };
static const symbol s_1_6[2] = { 'e', 's' };
static const struct among a_1[7] =
{
/* 0 */ { 1, s_1_0, -1, 1, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 3 */ { 3, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 1, s_1_5, -1, 2, 0},
/* 6 */ { 2, s_1_6, 5, 1, 0}
};
static const symbol s_2_0[2] = { 'e', 'n' };
static const symbol s_2_1[2] = { 'e', 'r' };
static const symbol s_2_2[2] = { 's', 't' };
static const symbol s_2_3[3] = { 'e', 's', 't' };
static const struct among a_2[4] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 2, s_2_1, -1, 1, 0},
/* 2 */ { 2, s_2_2, -1, 2, 0},
/* 3 */ { 3, s_2_3, 2, 1, 0}
};
static const symbol s_3_0[2] = { 'i', 'g' };
static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' };
static const struct among a_3[2] =
{
/* 0 */ { 2, s_3_0, -1, 1, 0},
/* 1 */ { 4, s_3_1, -1, 1, 0}
};
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
static const symbol s_4_1[2] = { 'i', 'g' };
static const symbol s_4_2[3] = { 'u', 'n', 'g' };
static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' };
static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' };
static const symbol s_4_5[2] = { 'i', 'k' };
static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' };
static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' };
static const struct among a_4[8] =
{
/* 0 */ { 3, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 4, s_4_3, -1, 3, 0},
/* 4 */ { 4, s_4_4, -1, 2, 0},
/* 5 */ { 2, s_4_5, -1, 2, 0},
/* 6 */ { 4, s_4_6, -1, 3, 0},
/* 7 */ { 4, s_4_7, -1, 4, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
static const unsigned char g_s_ending[] = { 117, 30, 5 };
static const unsigned char g_st_ending[] = { 117, 30, 4 };
static const symbol s_0[] = { 0xDF };
static const symbol s_1[] = { 's', 's' };
static const symbol s_2[] = { 'u' };
static const symbol s_3[] = { 'U' };
static const symbol s_4[] = { 'y' };
static const symbol s_5[] = { 'Y' };
static const symbol s_6[] = { 'y' };
static const symbol s_7[] = { 'u' };
static const symbol s_8[] = { 'a' };
static const symbol s_9[] = { 'o' };
static const symbol s_10[] = { 'u' };
static const symbol s_11[] = { 'i', 'g' };
static const symbol s_12[] = { 'e' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'e', 'r' };
static const symbol s_15[] = { 'e', 'n' };
static int r_prelude(struct SN_env * z) {
{ int c_test = z->c; /* test, line 30 */
while(1) { /* repeat, line 30 */
int c1 = z->c;
{ int c2 = z->c; /* or, line 33 */
z->bra = z->c; /* [, line 32 */
if (!(eq_s(z, 1, s_0))) goto lab2;
z->ket = z->c; /* ], line 32 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
z->c = c2;
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 33 */
}
lab1:
continue;
lab0:
z->c = c1;
break;
}
z->c = c_test;
}
while(1) { /* repeat, line 36 */
int c3 = z->c;
while(1) { /* goto, line 36 */
int c4 = z->c;
if (in_grouping(z, g_v, 97, 252, 0)) goto lab4;
z->bra = z->c; /* [, line 37 */
{ int c5 = z->c; /* or, line 37 */
if (!(eq_s(z, 1, s_2))) goto lab6;
z->ket = z->c; /* ], line 37 */
if (in_grouping(z, g_v, 97, 252, 0)) goto lab6;
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = c5;
if (!(eq_s(z, 1, s_4))) goto lab4;
z->ket = z->c; /* ], line 38 */
if (in_grouping(z, g_v, 97, 252, 0)) goto lab4;
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
if (ret < 0) return ret;
}
}
lab5:
z->c = c4;
break;
lab4:
z->c = c4;
if (z->c >= z->l) goto lab3;
z->c++; /* goto, line 36 */
}
continue;
lab3:
z->c = c3;
break;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c_test = z->c; /* test, line 47 */
{ int ret = z->c + 3;
if (0 > ret || ret > z->l) return 0;
z->c = ret; /* hop, line 47 */
}
z->I[2] = z->c; /* setmark x, line 47 */
z->c = c_test;
}
{ /* gopast */ /* grouping v, line 49 */
int ret = out_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 49 */
int ret = in_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 49 */
/* try, line 50 */
if (!(z->I[0] < z->I[2])) goto lab0;
z->I[0] = z->I[2];
lab0:
{ /* gopast */ /* grouping v, line 51 */
int ret = out_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 51 */
int ret = in_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 51 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 55 */
int c1 = z->c;
z->bra = z->c; /* [, line 57 */
among_var = find_among(z, a_0, 6); /* substring, line 57 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 57 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
if (ret < 0) return ret;
}
break;
case 6:
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 63 */
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 74 */
z->ket = z->c; /* [, line 75 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_1, 7); /* substring, line 75 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 75 */
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 75 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_del(z); /* delete, line 77 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 80 */
if (ret < 0) return ret;
}
break;
}
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 84 */
z->ket = z->c; /* [, line 85 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
among_var = find_among_b(z, a_2, 4); /* substring, line 85 */
if (!(among_var)) goto lab1;
z->bra = z->c; /* ], line 85 */
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 85 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab1;
case 1:
{ int ret = slice_del(z); /* delete, line 87 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab1;
{ int ret = z->c - 3;
if (z->lb > ret || ret > z->l) goto lab1;
z->c = ret; /* hop, line 90 */
}
{ int ret = slice_del(z); /* delete, line 90 */
if (ret < 0) return ret;
}
break;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 94 */
z->ket = z->c; /* [, line 95 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_4, 8); /* substring, line 95 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 95 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 95 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab2;
case 1:
{ int ret = slice_del(z); /* delete, line 97 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */
z->ket = z->c; /* [, line 98 */
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; }
z->bra = z->c; /* ], line 98 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 98 */
if (!(eq_s_b(z, 1, s_12))) goto lab4;
{ z->c = z->l - m_keep; goto lab3; }
lab4:
z->c = z->l - m4;
}
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 98 */
if (ret < 0) return ret;
}
lab3:
;
}
break;
case 2:
{ int m5 = z->l - z->c; (void)m5; /* not, line 101 */
if (!(eq_s_b(z, 1, s_13))) goto lab5;
goto lab2;
lab5:
z->c = z->l - m5;
}
{ int ret = slice_del(z); /* delete, line 101 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 104 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */
z->ket = z->c; /* [, line 106 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 106 */
if (!(eq_s_b(z, 2, s_14))) goto lab8;
goto lab7;
lab8:
z->c = z->l - m6;
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; }
}
lab7:
z->bra = z->c; /* ], line 106 */
{ int ret = r_R1(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 106 */
if (ret < 0) return ret;
}
lab6:
;
}
break;
case 4:
{ int ret = slice_del(z); /* delete, line 110 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
z->ket = z->c; /* [, line 112 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; }
among_var = find_among_b(z, a_3, 2); /* substring, line 112 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab9; }
z->bra = z->c; /* ], line 112 */
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab9; }
case 1:
{ int ret = slice_del(z); /* delete, line 114 */
if (ret < 0) return ret;
}
break;
}
lab9:
;
}
break;
}
lab2:
z->c = z->l - m3;
}
return 1;
}
extern int german_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 125 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 125 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 126 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 126 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 127 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 128 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 128 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 129 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 129 */
if (ret < 0) return ret;
}
lab3:
z->c = c4;
}
return 1;
}
extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); }
extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,297 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int norwegian_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * norwegian_ISO_8859_1_create_env(void);
extern void norwegian_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 'a' };
static const symbol s_0_1[1] = { 'e' };
static const symbol s_0_2[3] = { 'e', 'd', 'e' };
static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' };
static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' };
static const symbol s_0_5[3] = { 'a', 'n', 'e' };
static const symbol s_0_6[3] = { 'e', 'n', 'e' };
static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' };
static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' };
static const symbol s_0_9[2] = { 'e', 'n' };
static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' };
static const symbol s_0_11[2] = { 'a', 'r' };
static const symbol s_0_12[2] = { 'e', 'r' };
static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' };
static const symbol s_0_14[1] = { 's' };
static const symbol s_0_15[2] = { 'a', 's' };
static const symbol s_0_16[2] = { 'e', 's' };
static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' };
static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' };
static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' };
static const symbol s_0_21[3] = { 'e', 'n', 's' };
static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' };
static const symbol s_0_23[3] = { 'e', 'r', 's' };
static const symbol s_0_24[3] = { 'e', 't', 's' };
static const symbol s_0_25[2] = { 'e', 't' };
static const symbol s_0_26[3] = { 'h', 'e', 't' };
static const symbol s_0_27[3] = { 'e', 'r', 't' };
static const symbol s_0_28[3] = { 'a', 's', 't' };
static const struct among a_0[29] =
{
/* 0 */ { 1, s_0_0, -1, 1, 0},
/* 1 */ { 1, s_0_1, -1, 1, 0},
/* 2 */ { 3, s_0_2, 1, 1, 0},
/* 3 */ { 4, s_0_3, 1, 1, 0},
/* 4 */ { 4, s_0_4, 1, 1, 0},
/* 5 */ { 3, s_0_5, 1, 1, 0},
/* 6 */ { 3, s_0_6, 1, 1, 0},
/* 7 */ { 6, s_0_7, 6, 1, 0},
/* 8 */ { 4, s_0_8, 1, 3, 0},
/* 9 */ { 2, s_0_9, -1, 1, 0},
/* 10 */ { 5, s_0_10, 9, 1, 0},
/* 11 */ { 2, s_0_11, -1, 1, 0},
/* 12 */ { 2, s_0_12, -1, 1, 0},
/* 13 */ { 5, s_0_13, 12, 1, 0},
/* 14 */ { 1, s_0_14, -1, 2, 0},
/* 15 */ { 2, s_0_15, 14, 1, 0},
/* 16 */ { 2, s_0_16, 14, 1, 0},
/* 17 */ { 4, s_0_17, 16, 1, 0},
/* 18 */ { 5, s_0_18, 16, 1, 0},
/* 19 */ { 4, s_0_19, 16, 1, 0},
/* 20 */ { 7, s_0_20, 19, 1, 0},
/* 21 */ { 3, s_0_21, 14, 1, 0},
/* 22 */ { 6, s_0_22, 21, 1, 0},
/* 23 */ { 3, s_0_23, 14, 1, 0},
/* 24 */ { 3, s_0_24, 14, 1, 0},
/* 25 */ { 2, s_0_25, -1, 1, 0},
/* 26 */ { 3, s_0_26, 25, 1, 0},
/* 27 */ { 3, s_0_27, -1, 3, 0},
/* 28 */ { 3, s_0_28, -1, 1, 0}
};
static const symbol s_1_0[2] = { 'd', 't' };
static const symbol s_1_1[2] = { 'v', 't' };
static const struct among a_1[2] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0}
};
static const symbol s_2_0[3] = { 'l', 'e', 'g' };
static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' };
static const symbol s_2_2[2] = { 'i', 'g' };
static const symbol s_2_3[3] = { 'e', 'i', 'g' };
static const symbol s_2_4[3] = { 'l', 'i', 'g' };
static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' };
static const symbol s_2_6[3] = { 'e', 'l', 's' };
static const symbol s_2_7[3] = { 'l', 'o', 'v' };
static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' };
static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' };
static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' };
static const struct among a_2[11] =
{
/* 0 */ { 3, s_2_0, -1, 1, 0},
/* 1 */ { 4, s_2_1, 0, 1, 0},
/* 2 */ { 2, s_2_2, -1, 1, 0},
/* 3 */ { 3, s_2_3, 2, 1, 0},
/* 4 */ { 3, s_2_4, 2, 1, 0},
/* 5 */ { 4, s_2_5, 4, 1, 0},
/* 6 */ { 3, s_2_6, -1, 1, 0},
/* 7 */ { 3, s_2_7, -1, 1, 0},
/* 8 */ { 4, s_2_8, 7, 1, 0},
/* 9 */ { 4, s_2_9, 7, 1, 0},
/* 10 */ { 7, s_2_10, 9, 1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 119, 125, 149, 1 };
static const symbol s_0[] = { 'k' };
static const symbol s_1[] = { 'e', 'r' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 30 */
{ int ret = z->c + 3;
if (0 > ret || ret > z->l) return 0;
z->c = ret; /* hop, line 30 */
}
z->I[1] = z->c; /* setmark x, line 30 */
z->c = c_test;
}
if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */
{ /* gopast */ /* non v, line 31 */
int ret = in_grouping(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 31 */
/* try, line 32 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 38 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 38 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 38 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 29); /* substring, line 38 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 38 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m2 = z->l - z->c; (void)m2; /* or, line 46 */
if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1;
goto lab0;
lab1:
z->c = z->l - m2;
if (!(eq_s_b(z, 1, s_0))) return 0;
if (out_grouping_b(z, g_v, 97, 248, 0)) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 46 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 53 */
{ int mlimit; /* setlimit, line 54 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 54 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 54 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */
z->bra = z->c; /* ], line 54 */
z->lb = mlimit;
}
z->c = z->l - m_test;
}
if (z->c <= z->lb) return 0;
z->c--; /* next, line 59 */
z->bra = z->c; /* ], line 59 */
{ int ret = slice_del(z); /* delete, line 59 */
if (ret < 0) return ret;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 63 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 63 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 63 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 11); /* substring, line 63 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 63 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 67 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
extern int norwegian_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 74 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 74 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 75 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 76 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 76 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 77 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 77 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 78 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 78 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); }
extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }

View File

@ -0,0 +1,749 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int porter_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_Step_5b(struct SN_env * z);
static int r_Step_5a(struct SN_env * z);
static int r_Step_4(struct SN_env * z);
static int r_Step_3(struct SN_env * z);
static int r_Step_2(struct SN_env * z);
static int r_Step_1c(struct SN_env * z);
static int r_Step_1b(struct SN_env * z);
static int r_Step_1a(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_shortv(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * porter_ISO_8859_1_create_env(void);
extern void porter_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 's' };
static const symbol s_0_1[3] = { 'i', 'e', 's' };
static const symbol s_0_2[4] = { 's', 's', 'e', 's' };
static const symbol s_0_3[2] = { 's', 's' };
static const struct among a_0[4] =
{
/* 0 */ { 1, s_0_0, -1, 3, 0},
/* 1 */ { 3, s_0_1, 0, 2, 0},
/* 2 */ { 4, s_0_2, 0, 1, 0},
/* 3 */ { 2, s_0_3, 0, -1, 0}
};
static const symbol s_1_1[2] = { 'b', 'b' };
static const symbol s_1_2[2] = { 'd', 'd' };
static const symbol s_1_3[2] = { 'f', 'f' };
static const symbol s_1_4[2] = { 'g', 'g' };
static const symbol s_1_5[2] = { 'b', 'l' };
static const symbol s_1_6[2] = { 'm', 'm' };
static const symbol s_1_7[2] = { 'n', 'n' };
static const symbol s_1_8[2] = { 'p', 'p' };
static const symbol s_1_9[2] = { 'r', 'r' };
static const symbol s_1_10[2] = { 'a', 't' };
static const symbol s_1_11[2] = { 't', 't' };
static const symbol s_1_12[2] = { 'i', 'z' };
static const struct among a_1[13] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 2, s_1_1, 0, 2, 0},
/* 2 */ { 2, s_1_2, 0, 2, 0},
/* 3 */ { 2, s_1_3, 0, 2, 0},
/* 4 */ { 2, s_1_4, 0, 2, 0},
/* 5 */ { 2, s_1_5, 0, 1, 0},
/* 6 */ { 2, s_1_6, 0, 2, 0},
/* 7 */ { 2, s_1_7, 0, 2, 0},
/* 8 */ { 2, s_1_8, 0, 2, 0},
/* 9 */ { 2, s_1_9, 0, 2, 0},
/* 10 */ { 2, s_1_10, 0, 1, 0},
/* 11 */ { 2, s_1_11, 0, 2, 0},
/* 12 */ { 2, s_1_12, 0, 1, 0}
};
static const symbol s_2_0[2] = { 'e', 'd' };
static const symbol s_2_1[3] = { 'e', 'e', 'd' };
static const symbol s_2_2[3] = { 'i', 'n', 'g' };
static const struct among a_2[3] =
{
/* 0 */ { 2, s_2_0, -1, 2, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 3, s_2_2, -1, 2, 0}
};
static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' };
static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' };
static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' };
static const symbol s_3_3[3] = { 'e', 'l', 'i' };
static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' };
static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' };
static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' };
static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' };
static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' };
static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' };
static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' };
static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' };
static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' };
static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
static const struct among a_3[20] =
{
/* 0 */ { 4, s_3_0, -1, 3, 0},
/* 1 */ { 4, s_3_1, -1, 2, 0},
/* 2 */ { 4, s_3_2, -1, 4, 0},
/* 3 */ { 3, s_3_3, -1, 6, 0},
/* 4 */ { 4, s_3_4, -1, 9, 0},
/* 5 */ { 5, s_3_5, -1, 12, 0},
/* 6 */ { 5, s_3_6, -1, 5, 0},
/* 7 */ { 5, s_3_7, -1, 10, 0},
/* 8 */ { 6, s_3_8, -1, 14, 0},
/* 9 */ { 5, s_3_9, -1, 13, 0},
/* 10 */ { 6, s_3_10, -1, 1, 0},
/* 11 */ { 7, s_3_11, 10, 8, 0},
/* 12 */ { 5, s_3_12, -1, 10, 0},
/* 13 */ { 5, s_3_13, -1, 8, 0},
/* 14 */ { 7, s_3_14, 13, 7, 0},
/* 15 */ { 4, s_3_15, -1, 7, 0},
/* 16 */ { 4, s_3_16, -1, 8, 0},
/* 17 */ { 7, s_3_17, -1, 13, 0},
/* 18 */ { 7, s_3_18, -1, 11, 0},
/* 19 */ { 7, s_3_19, -1, 12, 0}
};
static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' };
static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' };
static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' };
static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' };
static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' };
static const symbol s_4_5[3] = { 'f', 'u', 'l' };
static const symbol s_4_6[4] = { 'n', 'e', 's', 's' };
static const struct among a_4[7] =
{
/* 0 */ { 5, s_4_0, -1, 2, 0},
/* 1 */ { 5, s_4_1, -1, 3, 0},
/* 2 */ { 5, s_4_2, -1, 1, 0},
/* 3 */ { 5, s_4_3, -1, 2, 0},
/* 4 */ { 4, s_4_4, -1, 2, 0},
/* 5 */ { 3, s_4_5, -1, 3, 0},
/* 6 */ { 4, s_4_6, -1, 3, 0}
};
static const symbol s_5_0[2] = { 'i', 'c' };
static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' };
static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' };
static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' };
static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' };
static const symbol s_5_5[3] = { 'a', 't', 'e' };
static const symbol s_5_6[3] = { 'i', 'v', 'e' };
static const symbol s_5_7[3] = { 'i', 'z', 'e' };
static const symbol s_5_8[3] = { 'i', 't', 'i' };
static const symbol s_5_9[2] = { 'a', 'l' };
static const symbol s_5_10[3] = { 'i', 's', 'm' };
static const symbol s_5_11[3] = { 'i', 'o', 'n' };
static const symbol s_5_12[2] = { 'e', 'r' };
static const symbol s_5_13[3] = { 'o', 'u', 's' };
static const symbol s_5_14[3] = { 'a', 'n', 't' };
static const symbol s_5_15[3] = { 'e', 'n', 't' };
static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' };
static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' };
static const symbol s_5_18[2] = { 'o', 'u' };
static const struct among a_5[19] =
{
/* 0 */ { 2, s_5_0, -1, 1, 0},
/* 1 */ { 4, s_5_1, -1, 1, 0},
/* 2 */ { 4, s_5_2, -1, 1, 0},
/* 3 */ { 4, s_5_3, -1, 1, 0},
/* 4 */ { 4, s_5_4, -1, 1, 0},
/* 5 */ { 3, s_5_5, -1, 1, 0},
/* 6 */ { 3, s_5_6, -1, 1, 0},
/* 7 */ { 3, s_5_7, -1, 1, 0},
/* 8 */ { 3, s_5_8, -1, 1, 0},
/* 9 */ { 2, s_5_9, -1, 1, 0},
/* 10 */ { 3, s_5_10, -1, 1, 0},
/* 11 */ { 3, s_5_11, -1, 2, 0},
/* 12 */ { 2, s_5_12, -1, 1, 0},
/* 13 */ { 3, s_5_13, -1, 1, 0},
/* 14 */ { 3, s_5_14, -1, 1, 0},
/* 15 */ { 3, s_5_15, -1, 1, 0},
/* 16 */ { 4, s_5_16, 15, 1, 0},
/* 17 */ { 5, s_5_17, 16, 1, 0},
/* 18 */ { 2, s_5_18, -1, 1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1 };
static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
static const symbol s_0[] = { 's', 's' };
static const symbol s_1[] = { 'i' };
static const symbol s_2[] = { 'e', 'e' };
static const symbol s_3[] = { 'e' };
static const symbol s_4[] = { 'e' };
static const symbol s_5[] = { 'y' };
static const symbol s_6[] = { 'Y' };
static const symbol s_7[] = { 'i' };
static const symbol s_8[] = { 't', 'i', 'o', 'n' };
static const symbol s_9[] = { 'e', 'n', 'c', 'e' };
static const symbol s_10[] = { 'a', 'n', 'c', 'e' };
static const symbol s_11[] = { 'a', 'b', 'l', 'e' };
static const symbol s_12[] = { 'e', 'n', 't' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'i', 'z', 'e' };
static const symbol s_15[] = { 'a', 't', 'e' };
static const symbol s_16[] = { 'a', 'l' };
static const symbol s_17[] = { 'a', 'l' };
static const symbol s_18[] = { 'f', 'u', 'l' };
static const symbol s_19[] = { 'o', 'u', 's' };
static const symbol s_20[] = { 'i', 'v', 'e' };
static const symbol s_21[] = { 'b', 'l', 'e' };
static const symbol s_22[] = { 'a', 'l' };
static const symbol s_23[] = { 'i', 'c' };
static const symbol s_24[] = { 's' };
static const symbol s_25[] = { 't' };
static const symbol s_26[] = { 'e' };
static const symbol s_27[] = { 'l' };
static const symbol s_28[] = { 'l' };
static const symbol s_29[] = { 'y' };
static const symbol s_30[] = { 'Y' };
static const symbol s_31[] = { 'y' };
static const symbol s_32[] = { 'Y' };
static const symbol s_33[] = { 'Y' };
static const symbol s_34[] = { 'y' };
static int r_shortv(struct SN_env * z) {
if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) return 0;
if (in_grouping_b(z, g_v, 97, 121, 0)) return 0;
if (out_grouping_b(z, g_v, 97, 121, 0)) return 0;
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_Step_1a(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 25 */
if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0;
among_var = find_among_b(z, a_0, 4); /* substring, line 25 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 25 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 29 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_1b(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 34 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0;
among_var = find_among_b(z, a_2, 3); /* substring, line 34 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 34 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 35 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m_test = z->l - z->c; /* test, line 38 */
{ /* gopast */ /* grouping v, line 38 */
int ret = out_grouping_b(z, g_v, 97, 121, 1);
if (ret < 0) return 0;
z->c -= ret;
}
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 38 */
if (ret < 0) return ret;
}
{ int m_test = z->l - z->c; /* test, line 39 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else
among_var = find_among_b(z, a_1, 13); /* substring, line 39 */
if (!(among_var)) return 0;
z->c = z->l - m_test;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int c_keep = z->c;
int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */
z->c = c_keep;
if (ret < 0) return ret;
}
break;
case 2:
z->ket = z->c; /* [, line 44 */
if (z->c <= z->lb) return 0;
z->c--; /* next, line 44 */
z->bra = z->c; /* ], line 44 */
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 3:
if (z->c != z->I[0]) return 0; /* atmark, line 45 */
{ int m_test = z->l - z->c; /* test, line 45 */
{ int ret = r_shortv(z);
if (ret == 0) return 0; /* call shortv, line 45 */
if (ret < 0) return ret;
}
z->c = z->l - m_test;
}
{ int c_keep = z->c;
int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */
z->c = c_keep;
if (ret < 0) return ret;
}
break;
}
break;
}
return 1;
}
static int r_Step_1c(struct SN_env * z) {
z->ket = z->c; /* [, line 52 */
{ int m1 = z->l - z->c; (void)m1; /* or, line 52 */
if (!(eq_s_b(z, 1, s_5))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_6))) return 0;
}
lab0:
z->bra = z->c; /* ], line 52 */
{ /* gopast */ /* grouping v, line 53 */
int ret = out_grouping_b(z, g_v, 97, 121, 1);
if (ret < 0) return 0;
z->c -= ret;
}
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */
if (ret < 0) return ret;
}
return 1;
}
static int r_Step_2(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 58 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_3, 20); /* substring, line 58 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 58 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 58 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */
if (ret < 0) return ret;
}
break;
case 11:
{ int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */
if (ret < 0) return ret;
}
break;
case 12:
{ int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
case 13:
{ int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */
if (ret < 0) return ret;
}
break;
case 14:
{ int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_3(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 82 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_4, 7); /* substring, line 82 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 82 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 82 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 87 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_4(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 92 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_5, 19); /* substring, line 92 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 92 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 92 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 95 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m1 = z->l - z->c; (void)m1; /* or, line 96 */
if (!(eq_s_b(z, 1, s_24))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_25))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_5a(struct SN_env * z) {
z->ket = z->c; /* [, line 101 */
if (!(eq_s_b(z, 1, s_26))) return 0;
z->bra = z->c; /* ], line 101 */
{ int m1 = z->l - z->c; (void)m1; /* or, line 102 */
{ int ret = r_R2(z);
if (ret == 0) goto lab1; /* call R2, line 102 */
if (ret < 0) return ret;
}
goto lab0;
lab1:
z->c = z->l - m1;
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
}
{ int m2 = z->l - z->c; (void)m2; /* not, line 102 */
{ int ret = r_shortv(z);
if (ret == 0) goto lab2; /* call shortv, line 102 */
if (ret < 0) return ret;
}
return 0;
lab2:
z->c = z->l - m2;
}
}
lab0:
{ int ret = slice_del(z); /* delete, line 103 */
if (ret < 0) return ret;
}
return 1;
}
static int r_Step_5b(struct SN_env * z) {
z->ket = z->c; /* [, line 107 */
if (!(eq_s_b(z, 1, s_27))) return 0;
z->bra = z->c; /* ], line 107 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 108 */
if (ret < 0) return ret;
}
if (!(eq_s_b(z, 1, s_28))) return 0;
{ int ret = slice_del(z); /* delete, line 109 */
if (ret < 0) return ret;
}
return 1;
}
extern int porter_ISO_8859_1_stem(struct SN_env * z) {
z->B[0] = 0; /* unset Y_found, line 115 */
{ int c1 = z->c; /* do, line 116 */
z->bra = z->c; /* [, line 116 */
if (!(eq_s(z, 1, s_29))) goto lab0;
z->ket = z->c; /* ], line 116 */
{ int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 116 */
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 117 */
while(1) { /* repeat, line 117 */
int c3 = z->c;
while(1) { /* goto, line 117 */
int c4 = z->c;
if (in_grouping(z, g_v, 97, 121, 0)) goto lab3;
z->bra = z->c; /* [, line 117 */
if (!(eq_s(z, 1, s_31))) goto lab3;
z->ket = z->c; /* ], line 117 */
z->c = c4;
break;
lab3:
z->c = c4;
if (z->c >= z->l) goto lab2;
z->c++; /* goto, line 117 */
}
{ int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 117 */
continue;
lab2:
z->c = c3;
break;
}
z->c = c2;
}
z->I[0] = z->l;
z->I[1] = z->l;
{ int c5 = z->c; /* do, line 121 */
{ /* gopast */ /* grouping v, line 122 */
int ret = out_grouping(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
{ /* gopast */ /* non v, line 122 */
int ret = in_grouping(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 122 */
{ /* gopast */ /* grouping v, line 123 */
int ret = out_grouping(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
{ /* gopast */ /* non v, line 123 */
int ret = in_grouping(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 123 */
lab4:
z->c = c5;
}
z->lb = z->c; z->c = z->l; /* backwards, line 126 */
{ int m6 = z->l - z->c; (void)m6; /* do, line 127 */
{ int ret = r_Step_1a(z);
if (ret == 0) goto lab5; /* call Step_1a, line 127 */
if (ret < 0) return ret;
}
lab5:
z->c = z->l - m6;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 128 */
{ int ret = r_Step_1b(z);
if (ret == 0) goto lab6; /* call Step_1b, line 128 */
if (ret < 0) return ret;
}
lab6:
z->c = z->l - m7;
}
{ int m8 = z->l - z->c; (void)m8; /* do, line 129 */
{ int ret = r_Step_1c(z);
if (ret == 0) goto lab7; /* call Step_1c, line 129 */
if (ret < 0) return ret;
}
lab7:
z->c = z->l - m8;
}
{ int m9 = z->l - z->c; (void)m9; /* do, line 130 */
{ int ret = r_Step_2(z);
if (ret == 0) goto lab8; /* call Step_2, line 130 */
if (ret < 0) return ret;
}
lab8:
z->c = z->l - m9;
}
{ int m10 = z->l - z->c; (void)m10; /* do, line 131 */
{ int ret = r_Step_3(z);
if (ret == 0) goto lab9; /* call Step_3, line 131 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m10;
}
{ int m11 = z->l - z->c; (void)m11; /* do, line 132 */
{ int ret = r_Step_4(z);
if (ret == 0) goto lab10; /* call Step_4, line 132 */
if (ret < 0) return ret;
}
lab10:
z->c = z->l - m11;
}
{ int m12 = z->l - z->c; (void)m12; /* do, line 133 */
{ int ret = r_Step_5a(z);
if (ret == 0) goto lab11; /* call Step_5a, line 133 */
if (ret < 0) return ret;
}
lab11:
z->c = z->l - m12;
}
{ int m13 = z->l - z->c; (void)m13; /* do, line 134 */
{ int ret = r_Step_5b(z);
if (ret == 0) goto lab12; /* call Step_5b, line 134 */
if (ret < 0) return ret;
}
lab12:
z->c = z->l - m13;
}
z->c = z->lb;
{ int c14 = z->c; /* do, line 137 */
if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */
while(1) { /* repeat, line 137 */
int c15 = z->c;
while(1) { /* goto, line 137 */
int c16 = z->c;
z->bra = z->c; /* [, line 137 */
if (!(eq_s(z, 1, s_33))) goto lab15;
z->ket = z->c; /* ], line 137 */
z->c = c16;
break;
lab15:
z->c = c16;
if (z->c >= z->l) goto lab14;
z->c++; /* goto, line 137 */
}
{ int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */
if (ret < 0) return ret;
}
continue;
lab14:
z->c = c15;
break;
}
lab13:
z->c = c14;
}
return 1;
}
extern struct SN_env * porter_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); }
extern void porter_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,307 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int swedish_ISO_8859_1_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * swedish_ISO_8859_1_create_env(void);
extern void swedish_ISO_8859_1_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 'a' };
static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
static const symbol s_0_5[2] = { 'a', 'd' };
static const symbol s_0_6[1] = { 'e' };
static const symbol s_0_7[3] = { 'a', 'd', 'e' };
static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
static const symbol s_0_10[3] = { 'a', 'r', 'e' };
static const symbol s_0_11[4] = { 'a', 's', 't', 'e' };
static const symbol s_0_12[2] = { 'e', 'n' };
static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
static const symbol s_0_16[3] = { 'e', 'r', 'n' };
static const symbol s_0_17[2] = { 'a', 'r' };
static const symbol s_0_18[2] = { 'e', 'r' };
static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
static const symbol s_0_20[2] = { 'o', 'r' };
static const symbol s_0_21[1] = { 's' };
static const symbol s_0_22[2] = { 'a', 's' };
static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
static const symbol s_0_26[2] = { 'e', 's' };
static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
static const symbol s_0_29[3] = { 'e', 'n', 's' };
static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
static const symbol s_0_33[2] = { 'a', 't' };
static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
static const symbol s_0_35[3] = { 'h', 'e', 't' };
static const symbol s_0_36[3] = { 'a', 's', 't' };
static const struct among a_0[37] =
{
/* 0 */ { 1, s_0_0, -1, 1, 0},
/* 1 */ { 4, s_0_1, 0, 1, 0},
/* 2 */ { 4, s_0_2, 0, 1, 0},
/* 3 */ { 7, s_0_3, 2, 1, 0},
/* 4 */ { 4, s_0_4, 0, 1, 0},
/* 5 */ { 2, s_0_5, -1, 1, 0},
/* 6 */ { 1, s_0_6, -1, 1, 0},
/* 7 */ { 3, s_0_7, 6, 1, 0},
/* 8 */ { 4, s_0_8, 6, 1, 0},
/* 9 */ { 4, s_0_9, 6, 1, 0},
/* 10 */ { 3, s_0_10, 6, 1, 0},
/* 11 */ { 4, s_0_11, 6, 1, 0},
/* 12 */ { 2, s_0_12, -1, 1, 0},
/* 13 */ { 5, s_0_13, 12, 1, 0},
/* 14 */ { 4, s_0_14, 12, 1, 0},
/* 15 */ { 5, s_0_15, 12, 1, 0},
/* 16 */ { 3, s_0_16, -1, 1, 0},
/* 17 */ { 2, s_0_17, -1, 1, 0},
/* 18 */ { 2, s_0_18, -1, 1, 0},
/* 19 */ { 5, s_0_19, 18, 1, 0},
/* 20 */ { 2, s_0_20, -1, 1, 0},
/* 21 */ { 1, s_0_21, -1, 2, 0},
/* 22 */ { 2, s_0_22, 21, 1, 0},
/* 23 */ { 5, s_0_23, 22, 1, 0},
/* 24 */ { 5, s_0_24, 22, 1, 0},
/* 25 */ { 5, s_0_25, 22, 1, 0},
/* 26 */ { 2, s_0_26, 21, 1, 0},
/* 27 */ { 4, s_0_27, 26, 1, 0},
/* 28 */ { 5, s_0_28, 26, 1, 0},
/* 29 */ { 3, s_0_29, 21, 1, 0},
/* 30 */ { 5, s_0_30, 29, 1, 0},
/* 31 */ { 6, s_0_31, 29, 1, 0},
/* 32 */ { 4, s_0_32, 21, 1, 0},
/* 33 */ { 2, s_0_33, -1, 1, 0},
/* 34 */ { 5, s_0_34, -1, 1, 0},
/* 35 */ { 3, s_0_35, -1, 1, 0},
/* 36 */ { 3, s_0_36, -1, 1, 0}
};
static const symbol s_1_0[2] = { 'd', 'd' };
static const symbol s_1_1[2] = { 'g', 'd' };
static const symbol s_1_2[2] = { 'n', 'n' };
static const symbol s_1_3[2] = { 'd', 't' };
static const symbol s_1_4[2] = { 'g', 't' };
static const symbol s_1_5[2] = { 'k', 't' };
static const symbol s_1_6[2] = { 't', 't' };
static const struct among a_1[7] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 2, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0},
/* 4 */ { 2, s_1_4, -1, -1, 0},
/* 5 */ { 2, s_1_5, -1, -1, 0},
/* 6 */ { 2, s_1_6, -1, -1, 0}
};
static const symbol s_2_0[2] = { 'i', 'g' };
static const symbol s_2_1[3] = { 'l', 'i', 'g' };
static const symbol s_2_2[3] = { 'e', 'l', 's' };
static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' };
static const struct among a_2[5] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 3, s_2_2, -1, 1, 0},
/* 3 */ { 5, s_2_3, -1, 3, 0},
/* 4 */ { 4, s_2_4, -1, 2, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
static const unsigned char g_s_ending[] = { 119, 127, 149 };
static const symbol s_0[] = { 'l', 0xF6, 's' };
static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 29 */
{ int ret = z->c + 3;
if (0 > ret || ret > z->l) return 0;
z->c = ret; /* hop, line 29 */
}
z->I[1] = z->c; /* setmark x, line 29 */
z->c = c_test;
}
if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */
{ /* gopast */ /* non v, line 30 */
int ret = in_grouping(z, g_v, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 30 */
/* try, line 31 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 37 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 37 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 37 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 37 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0;
{ int ret = slice_del(z); /* delete, line 46 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int mlimit; /* setlimit, line 50 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 50 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* and, line 52 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */
z->c = z->l - m2;
z->ket = z->c; /* [, line 52 */
if (z->c <= z->lb) { z->lb = mlimit; return 0; }
z->c--; /* next, line 52 */
z->bra = z->c; /* ], line 52 */
{ int ret = slice_del(z); /* delete, line 52 */
if (ret < 0) return ret;
}
}
z->lb = mlimit;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 55 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 55 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 56 */
switch(among_var) {
case 0: { z->lb = mlimit; return 0; }
case 1:
{ int ret = slice_del(z); /* delete, line 57 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
}
z->lb = mlimit;
}
return 1;
}
extern int swedish_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 66 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 66 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 67 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 68 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 68 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 69 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 70 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 70 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); }
extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }

View File

@ -0,0 +1,998 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int romanian_ISO_8859_2_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_vowel_suffix(struct SN_env * z);
static int r_verb_suffix(struct SN_env * z);
static int r_combo_suffix(struct SN_env * z);
static int r_standard_suffix(struct SN_env * z);
static int r_step_0(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_RV(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * romanian_ISO_8859_2_create_env(void);
extern void romanian_ISO_8859_2_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_1[1] = { 'I' };
static const symbol s_0_2[1] = { 'U' };
static const struct among a_0[3] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 1, s_0_1, 0, 1, 0},
/* 2 */ { 1, s_0_2, 0, 2, 0}
};
static const symbol s_1_0[2] = { 'e', 'a' };
static const symbol s_1_1[4] = { 'a', 0xFE, 'i', 'a' };
static const symbol s_1_2[3] = { 'a', 'u', 'a' };
static const symbol s_1_3[3] = { 'i', 'u', 'a' };
static const symbol s_1_4[4] = { 'a', 0xFE, 'i', 'e' };
static const symbol s_1_5[3] = { 'e', 'l', 'e' };
static const symbol s_1_6[3] = { 'i', 'l', 'e' };
static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' };
static const symbol s_1_8[3] = { 'i', 'e', 'i' };
static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' };
static const symbol s_1_10[2] = { 'i', 'i' };
static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' };
static const symbol s_1_12[2] = { 'u', 'l' };
static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' };
static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' };
static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' };
static const struct among a_1[16] =
{
/* 0 */ { 2, s_1_0, -1, 3, 0},
/* 1 */ { 4, s_1_1, -1, 7, 0},
/* 2 */ { 3, s_1_2, -1, 2, 0},
/* 3 */ { 3, s_1_3, -1, 4, 0},
/* 4 */ { 4, s_1_4, -1, 7, 0},
/* 5 */ { 3, s_1_5, -1, 3, 0},
/* 6 */ { 3, s_1_6, -1, 5, 0},
/* 7 */ { 4, s_1_7, 6, 4, 0},
/* 8 */ { 3, s_1_8, -1, 4, 0},
/* 9 */ { 4, s_1_9, -1, 6, 0},
/* 10 */ { 2, s_1_10, -1, 4, 0},
/* 11 */ { 4, s_1_11, -1, 1, 0},
/* 12 */ { 2, s_1_12, -1, 1, 0},
/* 13 */ { 4, s_1_13, -1, 3, 0},
/* 14 */ { 4, s_1_14, -1, 4, 0},
/* 15 */ { 5, s_1_15, 14, 4, 0}
};
static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' };
static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' };
static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' };
static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' };
static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' };
static const symbol s_2_5[6] = { 'a', 0xFE, 'i', 'u', 'n', 'e' };
static const symbol s_2_6[6] = { 'i', 0xFE, 'i', 'u', 'n', 'e' };
static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' };
static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' };
static const symbol s_2_9[6] = { 0xE3, 't', 'o', 'a', 'r', 'e' };
static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' };
static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' };
static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' };
static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' };
static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' };
static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' };
static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' };
static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' };
static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' };
static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' };
static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' };
static const symbol s_2_21[5] = { 0xE3, 't', 'o', 'r', 'i' };
static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' };
static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' };
static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' };
static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' };
static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' };
static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' };
static const symbol s_2_28[6] = { 'i', 'c', 'i', 't', 0xE3, 'i' };
static const symbol s_2_29[8] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 'i' };
static const symbol s_2_30[6] = { 'i', 'v', 'i', 't', 0xE3, 'i' };
static const symbol s_2_31[7] = { 'i', 'c', 'i', 't', 0xE3, 0xFE, 'i' };
static const symbol s_2_32[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 0xFE, 'i' };
static const symbol s_2_33[7] = { 'i', 'v', 'i', 't', 0xE3, 0xFE, 'i' };
static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' };
static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' };
static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' };
static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' };
static const symbol s_2_38[4] = { 0xE3, 't', 'o', 'r' };
static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' };
static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' };
static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' };
static const symbol s_2_42[5] = { 'i', 'c', 'a', 'l', 0xE3 };
static const symbol s_2_43[5] = { 'i', 'c', 'i', 'v', 0xE3 };
static const symbol s_2_44[5] = { 'a', 't', 'i', 'v', 0xE3 };
static const symbol s_2_45[5] = { 'i', 't', 'i', 'v', 0xE3 };
static const struct among a_2[46] =
{
/* 0 */ { 5, s_2_0, -1, 4, 0},
/* 1 */ { 5, s_2_1, -1, 4, 0},
/* 2 */ { 5, s_2_2, -1, 5, 0},
/* 3 */ { 5, s_2_3, -1, 6, 0},
/* 4 */ { 5, s_2_4, -1, 4, 0},
/* 5 */ { 6, s_2_5, -1, 5, 0},
/* 6 */ { 6, s_2_6, -1, 6, 0},
/* 7 */ { 6, s_2_7, -1, 5, 0},
/* 8 */ { 6, s_2_8, -1, 6, 0},
/* 9 */ { 6, s_2_9, -1, 5, 0},
/* 10 */ { 7, s_2_10, -1, 4, 0},
/* 11 */ { 9, s_2_11, -1, 1, 0},
/* 12 */ { 9, s_2_12, -1, 2, 0},
/* 13 */ { 7, s_2_13, -1, 3, 0},
/* 14 */ { 5, s_2_14, -1, 4, 0},
/* 15 */ { 5, s_2_15, -1, 5, 0},
/* 16 */ { 5, s_2_16, -1, 6, 0},
/* 17 */ { 5, s_2_17, -1, 4, 0},
/* 18 */ { 5, s_2_18, -1, 5, 0},
/* 19 */ { 7, s_2_19, 18, 4, 0},
/* 20 */ { 5, s_2_20, -1, 6, 0},
/* 21 */ { 5, s_2_21, -1, 5, 0},
/* 22 */ { 7, s_2_22, -1, 4, 0},
/* 23 */ { 9, s_2_23, -1, 1, 0},
/* 24 */ { 7, s_2_24, -1, 3, 0},
/* 25 */ { 5, s_2_25, -1, 4, 0},
/* 26 */ { 5, s_2_26, -1, 5, 0},
/* 27 */ { 5, s_2_27, -1, 6, 0},
/* 28 */ { 6, s_2_28, -1, 4, 0},
/* 29 */ { 8, s_2_29, -1, 1, 0},
/* 30 */ { 6, s_2_30, -1, 3, 0},
/* 31 */ { 7, s_2_31, -1, 4, 0},
/* 32 */ { 9, s_2_32, -1, 1, 0},
/* 33 */ { 7, s_2_33, -1, 3, 0},
/* 34 */ { 4, s_2_34, -1, 4, 0},
/* 35 */ { 4, s_2_35, -1, 5, 0},
/* 36 */ { 6, s_2_36, 35, 4, 0},
/* 37 */ { 4, s_2_37, -1, 6, 0},
/* 38 */ { 4, s_2_38, -1, 5, 0},
/* 39 */ { 4, s_2_39, -1, 4, 0},
/* 40 */ { 4, s_2_40, -1, 5, 0},
/* 41 */ { 4, s_2_41, -1, 6, 0},
/* 42 */ { 5, s_2_42, -1, 4, 0},
/* 43 */ { 5, s_2_43, -1, 4, 0},
/* 44 */ { 5, s_2_44, -1, 5, 0},
/* 45 */ { 5, s_2_45, -1, 6, 0}
};
static const symbol s_3_0[3] = { 'i', 'c', 'a' };
static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' };
static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' };
static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' };
static const symbol s_3_4[3] = { 'a', 't', 'a' };
static const symbol s_3_5[3] = { 'i', 't', 'a' };
static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' };
static const symbol s_3_7[4] = { 'i', 's', 't', 'a' };
static const symbol s_3_8[3] = { 'u', 't', 'a' };
static const symbol s_3_9[3] = { 'i', 'v', 'a' };
static const symbol s_3_10[2] = { 'i', 'c' };
static const symbol s_3_11[3] = { 'i', 'c', 'e' };
static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' };
static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' };
static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' };
static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' };
static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' };
static const symbol s_3_17[3] = { 'a', 't', 'e' };
static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' };
static const symbol s_3_19[3] = { 'i', 't', 'e' };
static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' };
static const symbol s_3_21[4] = { 'i', 's', 't', 'e' };
static const symbol s_3_22[3] = { 'u', 't', 'e' };
static const symbol s_3_23[3] = { 'i', 'v', 'e' };
static const symbol s_3_24[3] = { 'i', 'c', 'i' };
static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' };
static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' };
static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' };
static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' };
static const symbol s_3_29[3] = { 'o', 's', 'i' };
static const symbol s_3_30[3] = { 'a', 't', 'i' };
static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' };
static const symbol s_3_32[3] = { 'i', 't', 'i' };
static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' };
static const symbol s_3_34[4] = { 'i', 's', 't', 'i' };
static const symbol s_3_35[3] = { 'u', 't', 'i' };
static const symbol s_3_36[4] = { 'i', 0xBA, 't', 'i' };
static const symbol s_3_37[3] = { 'i', 'v', 'i' };
static const symbol s_3_38[3] = { 'o', 0xBA, 'i' };
static const symbol s_3_39[4] = { 'i', 't', 0xE3, 'i' };
static const symbol s_3_40[5] = { 'i', 't', 0xE3, 0xFE, 'i' };
static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' };
static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' };
static const symbol s_3_43[3] = { 'i', 's', 'm' };
static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' };
static const symbol s_3_45[2] = { 'o', 's' };
static const symbol s_3_46[2] = { 'a', 't' };
static const symbol s_3_47[2] = { 'i', 't' };
static const symbol s_3_48[3] = { 'a', 'n', 't' };
static const symbol s_3_49[3] = { 'i', 's', 't' };
static const symbol s_3_50[2] = { 'u', 't' };
static const symbol s_3_51[2] = { 'i', 'v' };
static const symbol s_3_52[3] = { 'i', 'c', 0xE3 };
static const symbol s_3_53[5] = { 'a', 'b', 'i', 'l', 0xE3 };
static const symbol s_3_54[5] = { 'i', 'b', 'i', 'l', 0xE3 };
static const symbol s_3_55[4] = { 'o', 'a', 's', 0xE3 };
static const symbol s_3_56[3] = { 'a', 't', 0xE3 };
static const symbol s_3_57[3] = { 'i', 't', 0xE3 };
static const symbol s_3_58[4] = { 'a', 'n', 't', 0xE3 };
static const symbol s_3_59[4] = { 'i', 's', 't', 0xE3 };
static const symbol s_3_60[3] = { 'u', 't', 0xE3 };
static const symbol s_3_61[3] = { 'i', 'v', 0xE3 };
static const struct among a_3[62] =
{
/* 0 */ { 3, s_3_0, -1, 1, 0},
/* 1 */ { 5, s_3_1, -1, 1, 0},
/* 2 */ { 5, s_3_2, -1, 1, 0},
/* 3 */ { 4, s_3_3, -1, 1, 0},
/* 4 */ { 3, s_3_4, -1, 1, 0},
/* 5 */ { 3, s_3_5, -1, 1, 0},
/* 6 */ { 4, s_3_6, -1, 1, 0},
/* 7 */ { 4, s_3_7, -1, 3, 0},
/* 8 */ { 3, s_3_8, -1, 1, 0},
/* 9 */ { 3, s_3_9, -1, 1, 0},
/* 10 */ { 2, s_3_10, -1, 1, 0},
/* 11 */ { 3, s_3_11, -1, 1, 0},
/* 12 */ { 5, s_3_12, -1, 1, 0},
/* 13 */ { 5, s_3_13, -1, 1, 0},
/* 14 */ { 4, s_3_14, -1, 3, 0},
/* 15 */ { 4, s_3_15, -1, 2, 0},
/* 16 */ { 4, s_3_16, -1, 1, 0},
/* 17 */ { 3, s_3_17, -1, 1, 0},
/* 18 */ { 5, s_3_18, 17, 1, 0},
/* 19 */ { 3, s_3_19, -1, 1, 0},
/* 20 */ { 4, s_3_20, -1, 1, 0},
/* 21 */ { 4, s_3_21, -1, 3, 0},
/* 22 */ { 3, s_3_22, -1, 1, 0},
/* 23 */ { 3, s_3_23, -1, 1, 0},
/* 24 */ { 3, s_3_24, -1, 1, 0},
/* 25 */ { 5, s_3_25, -1, 1, 0},
/* 26 */ { 5, s_3_26, -1, 1, 0},
/* 27 */ { 4, s_3_27, -1, 2, 0},
/* 28 */ { 5, s_3_28, -1, 1, 0},
/* 29 */ { 3, s_3_29, -1, 1, 0},
/* 30 */ { 3, s_3_30, -1, 1, 0},
/* 31 */ { 5, s_3_31, 30, 1, 0},
/* 32 */ { 3, s_3_32, -1, 1, 0},
/* 33 */ { 4, s_3_33, -1, 1, 0},
/* 34 */ { 4, s_3_34, -1, 3, 0},
/* 35 */ { 3, s_3_35, -1, 1, 0},
/* 36 */ { 4, s_3_36, -1, 3, 0},
/* 37 */ { 3, s_3_37, -1, 1, 0},
/* 38 */ { 3, s_3_38, -1, 1, 0},
/* 39 */ { 4, s_3_39, -1, 1, 0},
/* 40 */ { 5, s_3_40, -1, 1, 0},
/* 41 */ { 4, s_3_41, -1, 1, 0},
/* 42 */ { 4, s_3_42, -1, 1, 0},
/* 43 */ { 3, s_3_43, -1, 3, 0},
/* 44 */ { 4, s_3_44, -1, 1, 0},
/* 45 */ { 2, s_3_45, -1, 1, 0},
/* 46 */ { 2, s_3_46, -1, 1, 0},
/* 47 */ { 2, s_3_47, -1, 1, 0},
/* 48 */ { 3, s_3_48, -1, 1, 0},
/* 49 */ { 3, s_3_49, -1, 3, 0},
/* 50 */ { 2, s_3_50, -1, 1, 0},
/* 51 */ { 2, s_3_51, -1, 1, 0},
/* 52 */ { 3, s_3_52, -1, 1, 0},
/* 53 */ { 5, s_3_53, -1, 1, 0},
/* 54 */ { 5, s_3_54, -1, 1, 0},
/* 55 */ { 4, s_3_55, -1, 1, 0},
/* 56 */ { 3, s_3_56, -1, 1, 0},
/* 57 */ { 3, s_3_57, -1, 1, 0},
/* 58 */ { 4, s_3_58, -1, 1, 0},
/* 59 */ { 4, s_3_59, -1, 3, 0},
/* 60 */ { 3, s_3_60, -1, 1, 0},
/* 61 */ { 3, s_3_61, -1, 1, 0}
};
static const symbol s_4_0[2] = { 'e', 'a' };
static const symbol s_4_1[2] = { 'i', 'a' };
static const symbol s_4_2[3] = { 'e', 's', 'c' };
static const symbol s_4_3[3] = { 0xE3, 's', 'c' };
static const symbol s_4_4[3] = { 'i', 'n', 'd' };
static const symbol s_4_5[3] = { 0xE2, 'n', 'd' };
static const symbol s_4_6[3] = { 'a', 'r', 'e' };
static const symbol s_4_7[3] = { 'e', 'r', 'e' };
static const symbol s_4_8[3] = { 'i', 'r', 'e' };
static const symbol s_4_9[3] = { 0xE2, 'r', 'e' };
static const symbol s_4_10[2] = { 's', 'e' };
static const symbol s_4_11[3] = { 'a', 's', 'e' };
static const symbol s_4_12[4] = { 's', 'e', 's', 'e' };
static const symbol s_4_13[3] = { 'i', 's', 'e' };
static const symbol s_4_14[3] = { 'u', 's', 'e' };
static const symbol s_4_15[3] = { 0xE2, 's', 'e' };
static const symbol s_4_16[4] = { 'e', 0xBA, 't', 'e' };
static const symbol s_4_17[4] = { 0xE3, 0xBA, 't', 'e' };
static const symbol s_4_18[3] = { 'e', 'z', 'e' };
static const symbol s_4_19[2] = { 'a', 'i' };
static const symbol s_4_20[3] = { 'e', 'a', 'i' };
static const symbol s_4_21[3] = { 'i', 'a', 'i' };
static const symbol s_4_22[3] = { 's', 'e', 'i' };
static const symbol s_4_23[4] = { 'e', 0xBA, 't', 'i' };
static const symbol s_4_24[4] = { 0xE3, 0xBA, 't', 'i' };
static const symbol s_4_25[2] = { 'u', 'i' };
static const symbol s_4_26[3] = { 'e', 'z', 'i' };
static const symbol s_4_27[3] = { 'a', 0xBA, 'i' };
static const symbol s_4_28[4] = { 's', 'e', 0xBA, 'i' };
static const symbol s_4_29[5] = { 'a', 's', 'e', 0xBA, 'i' };
static const symbol s_4_30[6] = { 's', 'e', 's', 'e', 0xBA, 'i' };
static const symbol s_4_31[5] = { 'i', 's', 'e', 0xBA, 'i' };
static const symbol s_4_32[5] = { 'u', 's', 'e', 0xBA, 'i' };
static const symbol s_4_33[5] = { 0xE2, 's', 'e', 0xBA, 'i' };
static const symbol s_4_34[3] = { 'i', 0xBA, 'i' };
static const symbol s_4_35[3] = { 'u', 0xBA, 'i' };
static const symbol s_4_36[3] = { 0xE2, 0xBA, 'i' };
static const symbol s_4_37[2] = { 0xE2, 'i' };
static const symbol s_4_38[3] = { 'a', 0xFE, 'i' };
static const symbol s_4_39[4] = { 'e', 'a', 0xFE, 'i' };
static const symbol s_4_40[4] = { 'i', 'a', 0xFE, 'i' };
static const symbol s_4_41[3] = { 'e', 0xFE, 'i' };
static const symbol s_4_42[3] = { 'i', 0xFE, 'i' };
static const symbol s_4_43[3] = { 0xE2, 0xFE, 'i' };
static const symbol s_4_44[5] = { 'a', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_45[6] = { 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_46[7] = { 'a', 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_47[8] = { 's', 'e', 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_48[7] = { 'i', 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_49[7] = { 'u', 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_50[7] = { 0xE2, 's', 'e', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_51[5] = { 'i', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_52[5] = { 'u', 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_53[5] = { 0xE2, 'r', 0xE3, 0xFE, 'i' };
static const symbol s_4_54[2] = { 'a', 'm' };
static const symbol s_4_55[3] = { 'e', 'a', 'm' };
static const symbol s_4_56[3] = { 'i', 'a', 'm' };
static const symbol s_4_57[2] = { 'e', 'm' };
static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' };
static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' };
static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' };
static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' };
static const symbol s_4_62[4] = { 0xE2, 's', 'e', 'm' };
static const symbol s_4_63[2] = { 'i', 'm' };
static const symbol s_4_64[2] = { 0xE2, 'm' };
static const symbol s_4_65[2] = { 0xE3, 'm' };
static const symbol s_4_66[4] = { 'a', 'r', 0xE3, 'm' };
static const symbol s_4_67[5] = { 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_68[6] = { 'a', 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_69[7] = { 's', 'e', 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_70[6] = { 'i', 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_71[6] = { 'u', 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_72[6] = { 0xE2, 's', 'e', 'r', 0xE3, 'm' };
static const symbol s_4_73[4] = { 'i', 'r', 0xE3, 'm' };
static const symbol s_4_74[4] = { 'u', 'r', 0xE3, 'm' };
static const symbol s_4_75[4] = { 0xE2, 'r', 0xE3, 'm' };
static const symbol s_4_76[2] = { 'a', 'u' };
static const symbol s_4_77[3] = { 'e', 'a', 'u' };
static const symbol s_4_78[3] = { 'i', 'a', 'u' };
static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' };
static const symbol s_4_80[4] = { 0xE2, 'n', 'd', 'u' };
static const symbol s_4_81[2] = { 'e', 'z' };
static const symbol s_4_82[5] = { 'e', 'a', 's', 'c', 0xE3 };
static const symbol s_4_83[3] = { 'a', 'r', 0xE3 };
static const symbol s_4_84[4] = { 's', 'e', 'r', 0xE3 };
static const symbol s_4_85[5] = { 'a', 's', 'e', 'r', 0xE3 };
static const symbol s_4_86[6] = { 's', 'e', 's', 'e', 'r', 0xE3 };
static const symbol s_4_87[5] = { 'i', 's', 'e', 'r', 0xE3 };
static const symbol s_4_88[5] = { 'u', 's', 'e', 'r', 0xE3 };
static const symbol s_4_89[5] = { 0xE2, 's', 'e', 'r', 0xE3 };
static const symbol s_4_90[3] = { 'i', 'r', 0xE3 };
static const symbol s_4_91[3] = { 'u', 'r', 0xE3 };
static const symbol s_4_92[3] = { 0xE2, 'r', 0xE3 };
static const symbol s_4_93[4] = { 'e', 'a', 'z', 0xE3 };
static const struct among a_4[94] =
{
/* 0 */ { 2, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 1, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 3, s_4_3, -1, 1, 0},
/* 4 */ { 3, s_4_4, -1, 1, 0},
/* 5 */ { 3, s_4_5, -1, 1, 0},
/* 6 */ { 3, s_4_6, -1, 1, 0},
/* 7 */ { 3, s_4_7, -1, 1, 0},
/* 8 */ { 3, s_4_8, -1, 1, 0},
/* 9 */ { 3, s_4_9, -1, 1, 0},
/* 10 */ { 2, s_4_10, -1, 2, 0},
/* 11 */ { 3, s_4_11, 10, 1, 0},
/* 12 */ { 4, s_4_12, 10, 2, 0},
/* 13 */ { 3, s_4_13, 10, 1, 0},
/* 14 */ { 3, s_4_14, 10, 1, 0},
/* 15 */ { 3, s_4_15, 10, 1, 0},
/* 16 */ { 4, s_4_16, -1, 1, 0},
/* 17 */ { 4, s_4_17, -1, 1, 0},
/* 18 */ { 3, s_4_18, -1, 1, 0},
/* 19 */ { 2, s_4_19, -1, 1, 0},
/* 20 */ { 3, s_4_20, 19, 1, 0},
/* 21 */ { 3, s_4_21, 19, 1, 0},
/* 22 */ { 3, s_4_22, -1, 2, 0},
/* 23 */ { 4, s_4_23, -1, 1, 0},
/* 24 */ { 4, s_4_24, -1, 1, 0},
/* 25 */ { 2, s_4_25, -1, 1, 0},
/* 26 */ { 3, s_4_26, -1, 1, 0},
/* 27 */ { 3, s_4_27, -1, 1, 0},
/* 28 */ { 4, s_4_28, -1, 2, 0},
/* 29 */ { 5, s_4_29, 28, 1, 0},
/* 30 */ { 6, s_4_30, 28, 2, 0},
/* 31 */ { 5, s_4_31, 28, 1, 0},
/* 32 */ { 5, s_4_32, 28, 1, 0},
/* 33 */ { 5, s_4_33, 28, 1, 0},
/* 34 */ { 3, s_4_34, -1, 1, 0},
/* 35 */ { 3, s_4_35, -1, 1, 0},
/* 36 */ { 3, s_4_36, -1, 1, 0},
/* 37 */ { 2, s_4_37, -1, 1, 0},
/* 38 */ { 3, s_4_38, -1, 2, 0},
/* 39 */ { 4, s_4_39, 38, 1, 0},
/* 40 */ { 4, s_4_40, 38, 1, 0},
/* 41 */ { 3, s_4_41, -1, 2, 0},
/* 42 */ { 3, s_4_42, -1, 2, 0},
/* 43 */ { 3, s_4_43, -1, 2, 0},
/* 44 */ { 5, s_4_44, -1, 1, 0},
/* 45 */ { 6, s_4_45, -1, 2, 0},
/* 46 */ { 7, s_4_46, 45, 1, 0},
/* 47 */ { 8, s_4_47, 45, 2, 0},
/* 48 */ { 7, s_4_48, 45, 1, 0},
/* 49 */ { 7, s_4_49, 45, 1, 0},
/* 50 */ { 7, s_4_50, 45, 1, 0},
/* 51 */ { 5, s_4_51, -1, 1, 0},
/* 52 */ { 5, s_4_52, -1, 1, 0},
/* 53 */ { 5, s_4_53, -1, 1, 0},
/* 54 */ { 2, s_4_54, -1, 1, 0},
/* 55 */ { 3, s_4_55, 54, 1, 0},
/* 56 */ { 3, s_4_56, 54, 1, 0},
/* 57 */ { 2, s_4_57, -1, 2, 0},
/* 58 */ { 4, s_4_58, 57, 1, 0},
/* 59 */ { 5, s_4_59, 57, 2, 0},
/* 60 */ { 4, s_4_60, 57, 1, 0},
/* 61 */ { 4, s_4_61, 57, 1, 0},
/* 62 */ { 4, s_4_62, 57, 1, 0},
/* 63 */ { 2, s_4_63, -1, 2, 0},
/* 64 */ { 2, s_4_64, -1, 2, 0},
/* 65 */ { 2, s_4_65, -1, 2, 0},
/* 66 */ { 4, s_4_66, 65, 1, 0},
/* 67 */ { 5, s_4_67, 65, 2, 0},
/* 68 */ { 6, s_4_68, 67, 1, 0},
/* 69 */ { 7, s_4_69, 67, 2, 0},
/* 70 */ { 6, s_4_70, 67, 1, 0},
/* 71 */ { 6, s_4_71, 67, 1, 0},
/* 72 */ { 6, s_4_72, 67, 1, 0},
/* 73 */ { 4, s_4_73, 65, 1, 0},
/* 74 */ { 4, s_4_74, 65, 1, 0},
/* 75 */ { 4, s_4_75, 65, 1, 0},
/* 76 */ { 2, s_4_76, -1, 1, 0},
/* 77 */ { 3, s_4_77, 76, 1, 0},
/* 78 */ { 3, s_4_78, 76, 1, 0},
/* 79 */ { 4, s_4_79, -1, 1, 0},
/* 80 */ { 4, s_4_80, -1, 1, 0},
/* 81 */ { 2, s_4_81, -1, 1, 0},
/* 82 */ { 5, s_4_82, -1, 1, 0},
/* 83 */ { 3, s_4_83, -1, 1, 0},
/* 84 */ { 4, s_4_84, -1, 2, 0},
/* 85 */ { 5, s_4_85, 84, 1, 0},
/* 86 */ { 6, s_4_86, 84, 2, 0},
/* 87 */ { 5, s_4_87, 84, 1, 0},
/* 88 */ { 5, s_4_88, 84, 1, 0},
/* 89 */ { 5, s_4_89, 84, 1, 0},
/* 90 */ { 3, s_4_90, -1, 1, 0},
/* 91 */ { 3, s_4_91, -1, 1, 0},
/* 92 */ { 3, s_4_92, -1, 1, 0},
/* 93 */ { 4, s_4_93, -1, 1, 0}
};
static const symbol s_5_0[1] = { 'a' };
static const symbol s_5_1[1] = { 'e' };
static const symbol s_5_2[2] = { 'i', 'e' };
static const symbol s_5_3[1] = { 'i' };
static const symbol s_5_4[1] = { 0xE3 };
static const struct among a_5[5] =
{
/* 0 */ { 1, s_5_0, -1, 1, 0},
/* 1 */ { 1, s_5_1, -1, 1, 0},
/* 2 */ { 2, s_5_2, 1, 1, 0},
/* 3 */ { 1, s_5_3, -1, 1, 0},
/* 4 */ { 1, s_5_4, -1, 1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 32 };
static const symbol s_0[] = { 'u' };
static const symbol s_1[] = { 'U' };
static const symbol s_2[] = { 'i' };
static const symbol s_3[] = { 'I' };
static const symbol s_4[] = { 'i' };
static const symbol s_5[] = { 'u' };
static const symbol s_6[] = { 'a' };
static const symbol s_7[] = { 'e' };
static const symbol s_8[] = { 'i' };
static const symbol s_9[] = { 'a', 'b' };
static const symbol s_10[] = { 'i' };
static const symbol s_11[] = { 'a', 't' };
static const symbol s_12[] = { 'a', 0xFE, 'i' };
static const symbol s_13[] = { 'a', 'b', 'i', 'l' };
static const symbol s_14[] = { 'i', 'b', 'i', 'l' };
static const symbol s_15[] = { 'i', 'v' };
static const symbol s_16[] = { 'i', 'c' };
static const symbol s_17[] = { 'a', 't' };
static const symbol s_18[] = { 'i', 't' };
static const symbol s_19[] = { 0xFE };
static const symbol s_20[] = { 't' };
static const symbol s_21[] = { 'i', 's', 't' };
static const symbol s_22[] = { 'u' };
static int r_prelude(struct SN_env * z) {
while(1) { /* repeat, line 32 */
int c1 = z->c;
while(1) { /* goto, line 32 */
int c2 = z->c;
if (in_grouping(z, g_v, 97, 238, 0)) goto lab1;
z->bra = z->c; /* [, line 33 */
{ int c3 = z->c; /* or, line 33 */
if (!(eq_s(z, 1, s_0))) goto lab3;
z->ket = z->c; /* ], line 33 */
if (in_grouping(z, g_v, 97, 238, 0)) goto lab3;
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */
if (ret < 0) return ret;
}
goto lab2;
lab3:
z->c = c3;
if (!(eq_s(z, 1, s_2))) goto lab1;
z->ket = z->c; /* ], line 34 */
if (in_grouping(z, g_v, 97, 238, 0)) goto lab1;
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */
if (ret < 0) return ret;
}
}
lab2:
z->c = c2;
break;
lab1:
z->c = c2;
if (z->c >= z->l) goto lab0;
z->c++; /* goto, line 32 */
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
z->I[2] = z->l;
{ int c1 = z->c; /* do, line 44 */
{ int c2 = z->c; /* or, line 46 */
if (in_grouping(z, g_v, 97, 238, 0)) goto lab2;
{ int c3 = z->c; /* or, line 45 */
if (out_grouping(z, g_v, 97, 238, 0)) goto lab4;
{ /* gopast */ /* grouping v, line 45 */
int ret = out_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
goto lab3;
lab4:
z->c = c3;
if (in_grouping(z, g_v, 97, 238, 0)) goto lab2;
{ /* gopast */ /* non v, line 45 */
int ret = in_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab2;
z->c += ret;
}
}
lab3:
goto lab1;
lab2:
z->c = c2;
if (out_grouping(z, g_v, 97, 238, 0)) goto lab0;
{ int c4 = z->c; /* or, line 47 */
if (out_grouping(z, g_v, 97, 238, 0)) goto lab6;
{ /* gopast */ /* grouping v, line 47 */
int ret = out_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab6;
z->c += ret;
}
goto lab5;
lab6:
z->c = c4;
if (in_grouping(z, g_v, 97, 238, 0)) goto lab0;
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 47 */
}
lab5:
;
}
lab1:
z->I[0] = z->c; /* setmark pV, line 48 */
lab0:
z->c = c1;
}
{ int c5 = z->c; /* do, line 50 */
{ /* gopast */ /* grouping v, line 51 */
int ret = out_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab7;
z->c += ret;
}
{ /* gopast */ /* non v, line 51 */
int ret = in_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab7;
z->c += ret;
}
z->I[1] = z->c; /* setmark p1, line 51 */
{ /* gopast */ /* grouping v, line 52 */
int ret = out_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab7;
z->c += ret;
}
{ /* gopast */ /* non v, line 52 */
int ret = in_grouping(z, g_v, 97, 238, 1);
if (ret < 0) goto lab7;
z->c += ret;
}
z->I[2] = z->c; /* setmark p2, line 52 */
lab7:
z->c = c5;
}
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 56 */
int c1 = z->c;
z->bra = z->c; /* [, line 58 */
if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else
among_var = find_among(z, a_0, 3); /* substring, line 58 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 58 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */
if (ret < 0) return ret;
}
break;
case 3:
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 61 */
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_RV(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[2] <= z->c)) return 0;
return 1;
}
static int r_step_0(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 73 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_1, 16); /* substring, line 73 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 73 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 73 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 75 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */
if (ret < 0) return ret;
}
break;
case 5:
{ int m1 = z->l - z->c; (void)m1; /* not, line 83 */
if (!(eq_s_b(z, 2, s_9))) goto lab0;
return 0;
lab0:
z->c = z->l - m1;
}
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = slice_from_s(z, 3, s_12); /* <-, line 87 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_combo_suffix(struct SN_env * z) {
int among_var;
{ int m_test = z->l - z->c; /* test, line 91 */
z->ket = z->c; /* [, line 92 */
among_var = find_among_b(z, a_2, 46); /* substring, line 92 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 92 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 92 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */
if (ret < 0) return ret;
}
break;
}
z->B[0] = 1; /* set standard_suffix_removed, line 125 */
z->c = z->l - m_test;
}
return 1;
}
static int r_standard_suffix(struct SN_env * z) {
int among_var;
z->B[0] = 0; /* unset standard_suffix_removed, line 130 */
while(1) { /* repeat, line 131 */
int m1 = z->l - z->c; (void)m1;
{ int ret = r_combo_suffix(z);
if (ret == 0) goto lab0; /* call combo_suffix, line 131 */
if (ret < 0) return ret;
}
continue;
lab0:
z->c = z->l - m1;
break;
}
z->ket = z->c; /* [, line 132 */
among_var = find_among_b(z, a_3, 62); /* substring, line 132 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 132 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 132 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 149 */
if (ret < 0) return ret;
}
break;
case 2:
if (!(eq_s_b(z, 1, s_19))) return 0;
z->bra = z->c; /* ], line 152 */
{ int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */
if (ret < 0) return ret;
}
break;
}
z->B[0] = 1; /* set standard_suffix_removed, line 160 */
return 1;
}
static int r_verb_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 164 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 164 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 165 */
among_var = find_among_b(z, a_4, 94); /* substring, line 165 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 165 */
switch(among_var) {
case 0: { z->lb = mlimit; return 0; }
case 1:
{ int m2 = z->l - z->c; (void)m2; /* or, line 200 */
if (out_grouping_b(z, g_v, 97, 238, 0)) goto lab1;
goto lab0;
lab1:
z->c = z->l - m2;
if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; }
}
lab0:
{ int ret = slice_del(z); /* delete, line 200 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 214 */
if (ret < 0) return ret;
}
break;
}
z->lb = mlimit;
}
return 1;
}
static int r_vowel_suffix(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 219 */
among_var = find_among_b(z, a_5, 5); /* substring, line 219 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 219 */
{ int ret = r_RV(z);
if (ret == 0) return 0; /* call RV, line 219 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 220 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
extern int romanian_ISO_8859_2_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 226 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 226 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 227 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 227 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 228 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 229 */
{ int ret = r_step_0(z);
if (ret == 0) goto lab2; /* call step_0, line 229 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 230 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab3; /* call standard_suffix, line 230 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 231 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 231 */
if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */
goto lab5;
lab6:
z->c = z->l - m6;
{ int ret = r_verb_suffix(z);
if (ret == 0) goto lab4; /* call verb_suffix, line 231 */
if (ret < 0) return ret;
}
}
lab5:
lab4:
z->c = z->l - m5;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 232 */
{ int ret = r_vowel_suffix(z);
if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */
if (ret < 0) return ret;
}
lab7:
z->c = z->l - m7;
}
z->c = z->lb;
{ int c8 = z->c; /* do, line 234 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab8; /* call postlude, line 234 */
if (ret < 0) return ret;
}
lab8:
z->c = c8;
}
return 1;
}
extern struct SN_env * romanian_ISO_8859_2_create_env(void) { return SN_create_env(0, 3, 1); }
extern void romanian_ISO_8859_2_close_env(struct SN_env * z) { SN_close_env(z, 0); }

View File

@ -0,0 +1,700 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int russian_KOI8_R_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_tidy_up(struct SN_env * z);
static int r_derivational(struct SN_env * z);
static int r_noun(struct SN_env * z);
static int r_verb(struct SN_env * z);
static int r_reflexive(struct SN_env * z);
static int r_adjectival(struct SN_env * z);
static int r_adjective(struct SN_env * z);
static int r_perfective_gerund(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * russian_KOI8_R_create_env(void);
extern void russian_KOI8_R_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[3] = { 0xD7, 0xDB, 0xC9 };
static const symbol s_0_1[4] = { 0xC9, 0xD7, 0xDB, 0xC9 };
static const symbol s_0_2[4] = { 0xD9, 0xD7, 0xDB, 0xC9 };
static const symbol s_0_3[1] = { 0xD7 };
static const symbol s_0_4[2] = { 0xC9, 0xD7 };
static const symbol s_0_5[2] = { 0xD9, 0xD7 };
static const symbol s_0_6[5] = { 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 };
static const symbol s_0_7[6] = { 0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 };
static const symbol s_0_8[6] = { 0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 };
static const struct among a_0[9] =
{
/* 0 */ { 3, s_0_0, -1, 1, 0},
/* 1 */ { 4, s_0_1, 0, 2, 0},
/* 2 */ { 4, s_0_2, 0, 2, 0},
/* 3 */ { 1, s_0_3, -1, 1, 0},
/* 4 */ { 2, s_0_4, 3, 2, 0},
/* 5 */ { 2, s_0_5, 3, 2, 0},
/* 6 */ { 5, s_0_6, -1, 1, 0},
/* 7 */ { 6, s_0_7, 6, 2, 0},
/* 8 */ { 6, s_0_8, 6, 2, 0}
};
static const symbol s_1_0[2] = { 0xC0, 0xC0 };
static const symbol s_1_1[2] = { 0xC5, 0xC0 };
static const symbol s_1_2[2] = { 0xCF, 0xC0 };
static const symbol s_1_3[2] = { 0xD5, 0xC0 };
static const symbol s_1_4[2] = { 0xC5, 0xC5 };
static const symbol s_1_5[2] = { 0xC9, 0xC5 };
static const symbol s_1_6[2] = { 0xCF, 0xC5 };
static const symbol s_1_7[2] = { 0xD9, 0xC5 };
static const symbol s_1_8[2] = { 0xC9, 0xC8 };
static const symbol s_1_9[2] = { 0xD9, 0xC8 };
static const symbol s_1_10[3] = { 0xC9, 0xCD, 0xC9 };
static const symbol s_1_11[3] = { 0xD9, 0xCD, 0xC9 };
static const symbol s_1_12[2] = { 0xC5, 0xCA };
static const symbol s_1_13[2] = { 0xC9, 0xCA };
static const symbol s_1_14[2] = { 0xCF, 0xCA };
static const symbol s_1_15[2] = { 0xD9, 0xCA };
static const symbol s_1_16[2] = { 0xC5, 0xCD };
static const symbol s_1_17[2] = { 0xC9, 0xCD };
static const symbol s_1_18[2] = { 0xCF, 0xCD };
static const symbol s_1_19[2] = { 0xD9, 0xCD };
static const symbol s_1_20[3] = { 0xC5, 0xC7, 0xCF };
static const symbol s_1_21[3] = { 0xCF, 0xC7, 0xCF };
static const symbol s_1_22[2] = { 0xC1, 0xD1 };
static const symbol s_1_23[2] = { 0xD1, 0xD1 };
static const symbol s_1_24[3] = { 0xC5, 0xCD, 0xD5 };
static const symbol s_1_25[3] = { 0xCF, 0xCD, 0xD5 };
static const struct among a_1[26] =
{
/* 0 */ { 2, s_1_0, -1, 1, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 3 */ { 2, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 2, s_1_5, -1, 1, 0},
/* 6 */ { 2, s_1_6, -1, 1, 0},
/* 7 */ { 2, s_1_7, -1, 1, 0},
/* 8 */ { 2, s_1_8, -1, 1, 0},
/* 9 */ { 2, s_1_9, -1, 1, 0},
/* 10 */ { 3, s_1_10, -1, 1, 0},
/* 11 */ { 3, s_1_11, -1, 1, 0},
/* 12 */ { 2, s_1_12, -1, 1, 0},
/* 13 */ { 2, s_1_13, -1, 1, 0},
/* 14 */ { 2, s_1_14, -1, 1, 0},
/* 15 */ { 2, s_1_15, -1, 1, 0},
/* 16 */ { 2, s_1_16, -1, 1, 0},
/* 17 */ { 2, s_1_17, -1, 1, 0},
/* 18 */ { 2, s_1_18, -1, 1, 0},
/* 19 */ { 2, s_1_19, -1, 1, 0},
/* 20 */ { 3, s_1_20, -1, 1, 0},
/* 21 */ { 3, s_1_21, -1, 1, 0},
/* 22 */ { 2, s_1_22, -1, 1, 0},
/* 23 */ { 2, s_1_23, -1, 1, 0},
/* 24 */ { 3, s_1_24, -1, 1, 0},
/* 25 */ { 3, s_1_25, -1, 1, 0}
};
static const symbol s_2_0[2] = { 0xC5, 0xCD };
static const symbol s_2_1[2] = { 0xCE, 0xCE };
static const symbol s_2_2[2] = { 0xD7, 0xDB };
static const symbol s_2_3[3] = { 0xC9, 0xD7, 0xDB };
static const symbol s_2_4[3] = { 0xD9, 0xD7, 0xDB };
static const symbol s_2_5[1] = { 0xDD };
static const symbol s_2_6[2] = { 0xC0, 0xDD };
static const symbol s_2_7[3] = { 0xD5, 0xC0, 0xDD };
static const struct among a_2[8] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 2, s_2_1, -1, 1, 0},
/* 2 */ { 2, s_2_2, -1, 1, 0},
/* 3 */ { 3, s_2_3, 2, 2, 0},
/* 4 */ { 3, s_2_4, 2, 2, 0},
/* 5 */ { 1, s_2_5, -1, 1, 0},
/* 6 */ { 2, s_2_6, 5, 1, 0},
/* 7 */ { 3, s_2_7, 6, 2, 0}
};
static const symbol s_3_0[2] = { 0xD3, 0xD1 };
static const symbol s_3_1[2] = { 0xD3, 0xD8 };
static const struct among a_3[2] =
{
/* 0 */ { 2, s_3_0, -1, 1, 0},
/* 1 */ { 2, s_3_1, -1, 1, 0}
};
static const symbol s_4_0[1] = { 0xC0 };
static const symbol s_4_1[2] = { 0xD5, 0xC0 };
static const symbol s_4_2[2] = { 0xCC, 0xC1 };
static const symbol s_4_3[3] = { 0xC9, 0xCC, 0xC1 };
static const symbol s_4_4[3] = { 0xD9, 0xCC, 0xC1 };
static const symbol s_4_5[2] = { 0xCE, 0xC1 };
static const symbol s_4_6[3] = { 0xC5, 0xCE, 0xC1 };
static const symbol s_4_7[3] = { 0xC5, 0xD4, 0xC5 };
static const symbol s_4_8[3] = { 0xC9, 0xD4, 0xC5 };
static const symbol s_4_9[3] = { 0xCA, 0xD4, 0xC5 };
static const symbol s_4_10[4] = { 0xC5, 0xCA, 0xD4, 0xC5 };
static const symbol s_4_11[4] = { 0xD5, 0xCA, 0xD4, 0xC5 };
static const symbol s_4_12[2] = { 0xCC, 0xC9 };
static const symbol s_4_13[3] = { 0xC9, 0xCC, 0xC9 };
static const symbol s_4_14[3] = { 0xD9, 0xCC, 0xC9 };
static const symbol s_4_15[1] = { 0xCA };
static const symbol s_4_16[2] = { 0xC5, 0xCA };
static const symbol s_4_17[2] = { 0xD5, 0xCA };
static const symbol s_4_18[1] = { 0xCC };
static const symbol s_4_19[2] = { 0xC9, 0xCC };
static const symbol s_4_20[2] = { 0xD9, 0xCC };
static const symbol s_4_21[2] = { 0xC5, 0xCD };
static const symbol s_4_22[2] = { 0xC9, 0xCD };
static const symbol s_4_23[2] = { 0xD9, 0xCD };
static const symbol s_4_24[1] = { 0xCE };
static const symbol s_4_25[2] = { 0xC5, 0xCE };
static const symbol s_4_26[2] = { 0xCC, 0xCF };
static const symbol s_4_27[3] = { 0xC9, 0xCC, 0xCF };
static const symbol s_4_28[3] = { 0xD9, 0xCC, 0xCF };
static const symbol s_4_29[2] = { 0xCE, 0xCF };
static const symbol s_4_30[3] = { 0xC5, 0xCE, 0xCF };
static const symbol s_4_31[3] = { 0xCE, 0xCE, 0xCF };
static const symbol s_4_32[2] = { 0xC0, 0xD4 };
static const symbol s_4_33[3] = { 0xD5, 0xC0, 0xD4 };
static const symbol s_4_34[2] = { 0xC5, 0xD4 };
static const symbol s_4_35[3] = { 0xD5, 0xC5, 0xD4 };
static const symbol s_4_36[2] = { 0xC9, 0xD4 };
static const symbol s_4_37[2] = { 0xD1, 0xD4 };
static const symbol s_4_38[2] = { 0xD9, 0xD4 };
static const symbol s_4_39[2] = { 0xD4, 0xD8 };
static const symbol s_4_40[3] = { 0xC9, 0xD4, 0xD8 };
static const symbol s_4_41[3] = { 0xD9, 0xD4, 0xD8 };
static const symbol s_4_42[3] = { 0xC5, 0xDB, 0xD8 };
static const symbol s_4_43[3] = { 0xC9, 0xDB, 0xD8 };
static const symbol s_4_44[2] = { 0xCE, 0xD9 };
static const symbol s_4_45[3] = { 0xC5, 0xCE, 0xD9 };
static const struct among a_4[46] =
{
/* 0 */ { 1, s_4_0, -1, 2, 0},
/* 1 */ { 2, s_4_1, 0, 2, 0},
/* 2 */ { 2, s_4_2, -1, 1, 0},
/* 3 */ { 3, s_4_3, 2, 2, 0},
/* 4 */ { 3, s_4_4, 2, 2, 0},
/* 5 */ { 2, s_4_5, -1, 1, 0},
/* 6 */ { 3, s_4_6, 5, 2, 0},
/* 7 */ { 3, s_4_7, -1, 1, 0},
/* 8 */ { 3, s_4_8, -1, 2, 0},
/* 9 */ { 3, s_4_9, -1, 1, 0},
/* 10 */ { 4, s_4_10, 9, 2, 0},
/* 11 */ { 4, s_4_11, 9, 2, 0},
/* 12 */ { 2, s_4_12, -1, 1, 0},
/* 13 */ { 3, s_4_13, 12, 2, 0},
/* 14 */ { 3, s_4_14, 12, 2, 0},
/* 15 */ { 1, s_4_15, -1, 1, 0},
/* 16 */ { 2, s_4_16, 15, 2, 0},
/* 17 */ { 2, s_4_17, 15, 2, 0},
/* 18 */ { 1, s_4_18, -1, 1, 0},
/* 19 */ { 2, s_4_19, 18, 2, 0},
/* 20 */ { 2, s_4_20, 18, 2, 0},
/* 21 */ { 2, s_4_21, -1, 1, 0},
/* 22 */ { 2, s_4_22, -1, 2, 0},
/* 23 */ { 2, s_4_23, -1, 2, 0},
/* 24 */ { 1, s_4_24, -1, 1, 0},
/* 25 */ { 2, s_4_25, 24, 2, 0},
/* 26 */ { 2, s_4_26, -1, 1, 0},
/* 27 */ { 3, s_4_27, 26, 2, 0},
/* 28 */ { 3, s_4_28, 26, 2, 0},
/* 29 */ { 2, s_4_29, -1, 1, 0},
/* 30 */ { 3, s_4_30, 29, 2, 0},
/* 31 */ { 3, s_4_31, 29, 1, 0},
/* 32 */ { 2, s_4_32, -1, 1, 0},
/* 33 */ { 3, s_4_33, 32, 2, 0},
/* 34 */ { 2, s_4_34, -1, 1, 0},
/* 35 */ { 3, s_4_35, 34, 2, 0},
/* 36 */ { 2, s_4_36, -1, 2, 0},
/* 37 */ { 2, s_4_37, -1, 2, 0},
/* 38 */ { 2, s_4_38, -1, 2, 0},
/* 39 */ { 2, s_4_39, -1, 1, 0},
/* 40 */ { 3, s_4_40, 39, 2, 0},
/* 41 */ { 3, s_4_41, 39, 2, 0},
/* 42 */ { 3, s_4_42, -1, 1, 0},
/* 43 */ { 3, s_4_43, -1, 2, 0},
/* 44 */ { 2, s_4_44, -1, 1, 0},
/* 45 */ { 3, s_4_45, 44, 2, 0}
};
static const symbol s_5_0[1] = { 0xC0 };
static const symbol s_5_1[2] = { 0xC9, 0xC0 };
static const symbol s_5_2[2] = { 0xD8, 0xC0 };
static const symbol s_5_3[1] = { 0xC1 };
static const symbol s_5_4[1] = { 0xC5 };
static const symbol s_5_5[2] = { 0xC9, 0xC5 };
static const symbol s_5_6[2] = { 0xD8, 0xC5 };
static const symbol s_5_7[2] = { 0xC1, 0xC8 };
static const symbol s_5_8[2] = { 0xD1, 0xC8 };
static const symbol s_5_9[3] = { 0xC9, 0xD1, 0xC8 };
static const symbol s_5_10[1] = { 0xC9 };
static const symbol s_5_11[2] = { 0xC5, 0xC9 };
static const symbol s_5_12[2] = { 0xC9, 0xC9 };
static const symbol s_5_13[3] = { 0xC1, 0xCD, 0xC9 };
static const symbol s_5_14[3] = { 0xD1, 0xCD, 0xC9 };
static const symbol s_5_15[4] = { 0xC9, 0xD1, 0xCD, 0xC9 };
static const symbol s_5_16[1] = { 0xCA };
static const symbol s_5_17[2] = { 0xC5, 0xCA };
static const symbol s_5_18[3] = { 0xC9, 0xC5, 0xCA };
static const symbol s_5_19[2] = { 0xC9, 0xCA };
static const symbol s_5_20[2] = { 0xCF, 0xCA };
static const symbol s_5_21[2] = { 0xC1, 0xCD };
static const symbol s_5_22[2] = { 0xC5, 0xCD };
static const symbol s_5_23[3] = { 0xC9, 0xC5, 0xCD };
static const symbol s_5_24[2] = { 0xCF, 0xCD };
static const symbol s_5_25[2] = { 0xD1, 0xCD };
static const symbol s_5_26[3] = { 0xC9, 0xD1, 0xCD };
static const symbol s_5_27[1] = { 0xCF };
static const symbol s_5_28[1] = { 0xD1 };
static const symbol s_5_29[2] = { 0xC9, 0xD1 };
static const symbol s_5_30[2] = { 0xD8, 0xD1 };
static const symbol s_5_31[1] = { 0xD5 };
static const symbol s_5_32[2] = { 0xC5, 0xD7 };
static const symbol s_5_33[2] = { 0xCF, 0xD7 };
static const symbol s_5_34[1] = { 0xD8 };
static const symbol s_5_35[1] = { 0xD9 };
static const struct among a_5[36] =
{
/* 0 */ { 1, s_5_0, -1, 1, 0},
/* 1 */ { 2, s_5_1, 0, 1, 0},
/* 2 */ { 2, s_5_2, 0, 1, 0},
/* 3 */ { 1, s_5_3, -1, 1, 0},
/* 4 */ { 1, s_5_4, -1, 1, 0},
/* 5 */ { 2, s_5_5, 4, 1, 0},
/* 6 */ { 2, s_5_6, 4, 1, 0},
/* 7 */ { 2, s_5_7, -1, 1, 0},
/* 8 */ { 2, s_5_8, -1, 1, 0},
/* 9 */ { 3, s_5_9, 8, 1, 0},
/* 10 */ { 1, s_5_10, -1, 1, 0},
/* 11 */ { 2, s_5_11, 10, 1, 0},
/* 12 */ { 2, s_5_12, 10, 1, 0},
/* 13 */ { 3, s_5_13, 10, 1, 0},
/* 14 */ { 3, s_5_14, 10, 1, 0},
/* 15 */ { 4, s_5_15, 14, 1, 0},
/* 16 */ { 1, s_5_16, -1, 1, 0},
/* 17 */ { 2, s_5_17, 16, 1, 0},
/* 18 */ { 3, s_5_18, 17, 1, 0},
/* 19 */ { 2, s_5_19, 16, 1, 0},
/* 20 */ { 2, s_5_20, 16, 1, 0},
/* 21 */ { 2, s_5_21, -1, 1, 0},
/* 22 */ { 2, s_5_22, -1, 1, 0},
/* 23 */ { 3, s_5_23, 22, 1, 0},
/* 24 */ { 2, s_5_24, -1, 1, 0},
/* 25 */ { 2, s_5_25, -1, 1, 0},
/* 26 */ { 3, s_5_26, 25, 1, 0},
/* 27 */ { 1, s_5_27, -1, 1, 0},
/* 28 */ { 1, s_5_28, -1, 1, 0},
/* 29 */ { 2, s_5_29, 28, 1, 0},
/* 30 */ { 2, s_5_30, 28, 1, 0},
/* 31 */ { 1, s_5_31, -1, 1, 0},
/* 32 */ { 2, s_5_32, -1, 1, 0},
/* 33 */ { 2, s_5_33, -1, 1, 0},
/* 34 */ { 1, s_5_34, -1, 1, 0},
/* 35 */ { 1, s_5_35, -1, 1, 0}
};
static const symbol s_6_0[3] = { 0xCF, 0xD3, 0xD4 };
static const symbol s_6_1[4] = { 0xCF, 0xD3, 0xD4, 0xD8 };
static const struct among a_6[2] =
{
/* 0 */ { 3, s_6_0, -1, 1, 0},
/* 1 */ { 4, s_6_1, -1, 1, 0}
};
static const symbol s_7_0[4] = { 0xC5, 0xCA, 0xDB, 0xC5 };
static const symbol s_7_1[1] = { 0xCE };
static const symbol s_7_2[1] = { 0xD8 };
static const symbol s_7_3[3] = { 0xC5, 0xCA, 0xDB };
static const struct among a_7[4] =
{
/* 0 */ { 4, s_7_0, -1, 1, 0},
/* 1 */ { 1, s_7_1, -1, 2, 0},
/* 2 */ { 1, s_7_2, -1, 3, 0},
/* 3 */ { 3, s_7_3, -1, 1, 0}
};
static const unsigned char g_v[] = { 35, 130, 34, 18 };
static const symbol s_0[] = { 0xC1 };
static const symbol s_1[] = { 0xD1 };
static const symbol s_2[] = { 0xC1 };
static const symbol s_3[] = { 0xD1 };
static const symbol s_4[] = { 0xC1 };
static const symbol s_5[] = { 0xD1 };
static const symbol s_6[] = { 0xCE };
static const symbol s_7[] = { 0xCE };
static const symbol s_8[] = { 0xCE };
static const symbol s_9[] = { 0xC9 };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c1 = z->c; /* do, line 63 */
{ /* gopast */ /* grouping v, line 64 */
int ret = out_grouping(z, g_v, 192, 220, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[0] = z->c; /* setmark pV, line 64 */
{ /* gopast */ /* non v, line 64 */
int ret = in_grouping(z, g_v, 192, 220, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
{ /* gopast */ /* grouping v, line 65 */
int ret = out_grouping(z, g_v, 192, 220, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
{ /* gopast */ /* non v, line 65 */
int ret = in_grouping(z, g_v, 192, 220, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 65 */
lab0:
z->c = c1;
}
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_perfective_gerund(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 74 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((25166336 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_0, 9); /* substring, line 74 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 74 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 78 */
if (!(eq_s_b(z, 1, s_0))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_1))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 78 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 85 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_adjective(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 90 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((2271009 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_1, 26); /* substring, line 90 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 90 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 99 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_adjectival(struct SN_env * z) {
int among_var;
{ int ret = r_adjective(z);
if (ret == 0) return 0; /* call adjective, line 104 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
z->ket = z->c; /* [, line 112 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((671113216 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; }
among_var = find_among_b(z, a_2, 8); /* substring, line 112 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab0; }
z->bra = z->c; /* ], line 112 */
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab0; }
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 117 */
if (!(eq_s_b(z, 1, s_2))) goto lab2;
goto lab1;
lab2:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m_keep; goto lab0; }
}
lab1:
{ int ret = slice_del(z); /* delete, line 117 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 124 */
if (ret < 0) return ret;
}
break;
}
lab0:
;
}
return 1;
}
static int r_reflexive(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 131 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 209 && z->p[z->c - 1] != 216)) return 0;
among_var = find_among_b(z, a_3, 2); /* substring, line 131 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 131 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 134 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_verb(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 139 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((51443235 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_4, 46); /* substring, line 139 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 139 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 145 */
if (!(eq_s_b(z, 1, s_4))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_5))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 145 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 153 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_noun(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 162 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((60991267 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_5, 36); /* substring, line 162 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 162 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 169 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_derivational(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 178 */
if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 212 && z->p[z->c - 1] != 216)) return 0;
among_var = find_among_b(z, a_6, 2); /* substring, line 178 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 178 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 178 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 181 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_tidy_up(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 186 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((151011360 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_7, 4); /* substring, line 186 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 186 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 190 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 191 */
if (!(eq_s_b(z, 1, s_6))) return 0;
z->bra = z->c; /* ], line 191 */
if (!(eq_s_b(z, 1, s_7))) return 0;
{ int ret = slice_del(z); /* delete, line 191 */
if (ret < 0) return ret;
}
break;
case 2:
if (!(eq_s_b(z, 1, s_8))) return 0;
{ int ret = slice_del(z); /* delete, line 194 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 196 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
extern int russian_KOI8_R_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 203 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 203 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 204 */
{ int mlimit; /* setlimit, line 204 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 204 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
{ int m3 = z->l - z->c; (void)m3; /* do, line 205 */
{ int m4 = z->l - z->c; (void)m4; /* or, line 206 */
{ int ret = r_perfective_gerund(z);
if (ret == 0) goto lab3; /* call perfective_gerund, line 206 */
if (ret < 0) return ret;
}
goto lab2;
lab3:
z->c = z->l - m4;
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 207 */
{ int ret = r_reflexive(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 207 */
if (ret < 0) return ret;
}
lab4:
;
}
{ int m5 = z->l - z->c; (void)m5; /* or, line 208 */
{ int ret = r_adjectival(z);
if (ret == 0) goto lab6; /* call adjectival, line 208 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = z->l - m5;
{ int ret = r_verb(z);
if (ret == 0) goto lab7; /* call verb, line 208 */
if (ret < 0) return ret;
}
goto lab5;
lab7:
z->c = z->l - m5;
{ int ret = r_noun(z);
if (ret == 0) goto lab1; /* call noun, line 208 */
if (ret < 0) return ret;
}
}
lab5:
;
}
lab2:
lab1:
z->c = z->l - m3;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 211 */
z->ket = z->c; /* [, line 211 */
if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m_keep; goto lab8; }
z->bra = z->c; /* ], line 211 */
{ int ret = slice_del(z); /* delete, line 211 */
if (ret < 0) return ret;
}
lab8:
;
}
{ int m6 = z->l - z->c; (void)m6; /* do, line 214 */
{ int ret = r_derivational(z);
if (ret == 0) goto lab9; /* call derivational, line 214 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m6;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 215 */
{ int ret = r_tidy_up(z);
if (ret == 0) goto lab10; /* call tidy_up, line 215 */
if (ret < 0) return ret;
}
lab10:
z->c = z->l - m7;
}
z->lb = mlimit;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * russian_KOI8_R_create_env(void) { return SN_create_env(0, 2, 0); }
extern void russian_KOI8_R_close_env(struct SN_env * z) { SN_close_env(z, 0); }

View File

@ -0,0 +1,339 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int danish_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_undouble(struct SN_env * z);
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * danish_UTF_8_create_env(void);
extern void danish_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[3] = { 'h', 'e', 'd' };
static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
static const symbol s_0_3[1] = { 'e' };
static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
static const symbol s_0_7[3] = { 'e', 'n', 'e' };
static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
static const symbol s_0_9[3] = { 'e', 'r', 'e' };
static const symbol s_0_10[2] = { 'e', 'n' };
static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
static const symbol s_0_13[2] = { 'e', 'r' };
static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
static const symbol s_0_16[1] = { 's' };
static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
static const symbol s_0_18[2] = { 'e', 's' };
static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
static const symbol s_0_24[3] = { 'e', 'n', 's' };
static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
static const symbol s_0_27[3] = { 'e', 'r', 's' };
static const symbol s_0_28[3] = { 'e', 't', 's' };
static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
static const symbol s_0_30[2] = { 'e', 't' };
static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
static const struct among a_0[32] =
{
/* 0 */ { 3, s_0_0, -1, 1, 0},
/* 1 */ { 5, s_0_1, 0, 1, 0},
/* 2 */ { 4, s_0_2, -1, 1, 0},
/* 3 */ { 1, s_0_3, -1, 1, 0},
/* 4 */ { 5, s_0_4, 3, 1, 0},
/* 5 */ { 4, s_0_5, 3, 1, 0},
/* 6 */ { 6, s_0_6, 5, 1, 0},
/* 7 */ { 3, s_0_7, 3, 1, 0},
/* 8 */ { 4, s_0_8, 3, 1, 0},
/* 9 */ { 3, s_0_9, 3, 1, 0},
/* 10 */ { 2, s_0_10, -1, 1, 0},
/* 11 */ { 5, s_0_11, 10, 1, 0},
/* 12 */ { 4, s_0_12, 10, 1, 0},
/* 13 */ { 2, s_0_13, -1, 1, 0},
/* 14 */ { 5, s_0_14, 13, 1, 0},
/* 15 */ { 4, s_0_15, 13, 1, 0},
/* 16 */ { 1, s_0_16, -1, 2, 0},
/* 17 */ { 4, s_0_17, 16, 1, 0},
/* 18 */ { 2, s_0_18, 16, 1, 0},
/* 19 */ { 5, s_0_19, 18, 1, 0},
/* 20 */ { 7, s_0_20, 19, 1, 0},
/* 21 */ { 4, s_0_21, 18, 1, 0},
/* 22 */ { 5, s_0_22, 18, 1, 0},
/* 23 */ { 4, s_0_23, 18, 1, 0},
/* 24 */ { 3, s_0_24, 16, 1, 0},
/* 25 */ { 6, s_0_25, 24, 1, 0},
/* 26 */ { 5, s_0_26, 24, 1, 0},
/* 27 */ { 3, s_0_27, 16, 1, 0},
/* 28 */ { 3, s_0_28, 16, 1, 0},
/* 29 */ { 5, s_0_29, 28, 1, 0},
/* 30 */ { 2, s_0_30, -1, 1, 0},
/* 31 */ { 4, s_0_31, 30, 1, 0}
};
static const symbol s_1_0[2] = { 'g', 'd' };
static const symbol s_1_1[2] = { 'd', 't' };
static const symbol s_1_2[2] = { 'g', 't' };
static const symbol s_1_3[2] = { 'k', 't' };
static const struct among a_1[4] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 2, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0}
};
static const symbol s_2_0[2] = { 'i', 'g' };
static const symbol s_2_1[3] = { 'l', 'i', 'g' };
static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
static const symbol s_2_3[3] = { 'e', 'l', 's' };
static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
static const struct among a_2[5] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 4, s_2_2, 1, 1, 0},
/* 3 */ { 3, s_2_3, -1, 1, 0},
/* 4 */ { 5, s_2_4, -1, 2, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
static const symbol s_0[] = { 's', 't' };
static const symbol s_1[] = { 'i', 'g' };
static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 33 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
if (ret < 0) return 0;
z->c = ret; /* hop, line 33 */
}
z->I[1] = z->c; /* setmark x, line 33 */
z->c = c_test;
}
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
{ /* gopast */ /* non v, line 34 */
int ret = in_grouping_U(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 34 */
/* try, line 35 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 41 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 41 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 41 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 41 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 48 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0;
{ int ret = slice_del(z); /* delete, line 50 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 55 */
{ int mlimit; /* setlimit, line 56 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 56 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
z->bra = z->c; /* ], line 56 */
z->lb = mlimit;
}
z->c = z->l - m_test;
}
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 62 */
}
z->bra = z->c; /* ], line 62 */
{ int ret = slice_del(z); /* delete, line 62 */
if (ret < 0) return ret;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 66 */
z->ket = z->c; /* [, line 66 */
if (!(eq_s_b(z, 2, s_0))) goto lab0;
z->bra = z->c; /* ], line 66 */
if (!(eq_s_b(z, 2, s_1))) goto lab0;
{ int ret = slice_del(z); /* delete, line 66 */
if (ret < 0) return ret;
}
lab0:
z->c = z->l - m1;
}
{ int mlimit; /* setlimit, line 67 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 67 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
z->ket = z->c; /* [, line 67 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 67 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 70 */
if (ret < 0) return ret;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 70 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m3;
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_undouble(struct SN_env * z) {
{ int mlimit; /* setlimit, line 76 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 76 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 76 */
if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 76 */
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
if (z->S[0] == 0) return -1; /* -> ch, line 76 */
z->lb = mlimit;
}
if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
{ int ret = slice_del(z); /* delete, line 78 */
if (ret < 0) return ret;
}
return 1;
}
extern int danish_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 84 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 84 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 85 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 86 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 86 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 87 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 88 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 88 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 89 */
{ int ret = r_undouble(z);
if (ret == 0) goto lab4; /* call undouble, line 89 */
if (ret < 0) return ret;
}
lab4:
z->c = z->l - m5;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); }
extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }

View File

@ -0,0 +1,634 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int dutch_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_standard_suffix(struct SN_env * z);
static int r_undouble(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_en_ending(struct SN_env * z);
static int r_e_ending(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * dutch_UTF_8_create_env(void);
extern void dutch_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_1[2] = { 0xC3, 0xA1 };
static const symbol s_0_2[2] = { 0xC3, 0xA4 };
static const symbol s_0_3[2] = { 0xC3, 0xA9 };
static const symbol s_0_4[2] = { 0xC3, 0xAB };
static const symbol s_0_5[2] = { 0xC3, 0xAD };
static const symbol s_0_6[2] = { 0xC3, 0xAF };
static const symbol s_0_7[2] = { 0xC3, 0xB3 };
static const symbol s_0_8[2] = { 0xC3, 0xB6 };
static const symbol s_0_9[2] = { 0xC3, 0xBA };
static const symbol s_0_10[2] = { 0xC3, 0xBC };
static const struct among a_0[11] =
{
/* 0 */ { 0, 0, -1, 6, 0},
/* 1 */ { 2, s_0_1, 0, 1, 0},
/* 2 */ { 2, s_0_2, 0, 1, 0},
/* 3 */ { 2, s_0_3, 0, 2, 0},
/* 4 */ { 2, s_0_4, 0, 2, 0},
/* 5 */ { 2, s_0_5, 0, 3, 0},
/* 6 */ { 2, s_0_6, 0, 3, 0},
/* 7 */ { 2, s_0_7, 0, 4, 0},
/* 8 */ { 2, s_0_8, 0, 4, 0},
/* 9 */ { 2, s_0_9, 0, 5, 0},
/* 10 */ { 2, s_0_10, 0, 5, 0}
};
static const symbol s_1_1[1] = { 'I' };
static const symbol s_1_2[1] = { 'Y' };
static const struct among a_1[3] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 1, s_1_1, 0, 2, 0},
/* 2 */ { 1, s_1_2, 0, 1, 0}
};
static const symbol s_2_0[2] = { 'd', 'd' };
static const symbol s_2_1[2] = { 'k', 'k' };
static const symbol s_2_2[2] = { 't', 't' };
static const struct among a_2[3] =
{
/* 0 */ { 2, s_2_0, -1, -1, 0},
/* 1 */ { 2, s_2_1, -1, -1, 0},
/* 2 */ { 2, s_2_2, -1, -1, 0}
};
static const symbol s_3_0[3] = { 'e', 'n', 'e' };
static const symbol s_3_1[2] = { 's', 'e' };
static const symbol s_3_2[2] = { 'e', 'n' };
static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
static const symbol s_3_4[1] = { 's' };
static const struct among a_3[5] =
{
/* 0 */ { 3, s_3_0, -1, 2, 0},
/* 1 */ { 2, s_3_1, -1, 3, 0},
/* 2 */ { 2, s_3_2, -1, 2, 0},
/* 3 */ { 5, s_3_3, 2, 1, 0},
/* 4 */ { 1, s_3_4, -1, 3, 0}
};
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
static const symbol s_4_1[2] = { 'i', 'g' };
static const symbol s_4_2[3] = { 'i', 'n', 'g' };
static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
static const symbol s_4_5[3] = { 'b', 'a', 'r' };
static const struct among a_4[6] =
{
/* 0 */ { 3, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 4, s_4_3, -1, 3, 0},
/* 4 */ { 4, s_4_4, -1, 4, 0},
/* 5 */ { 3, s_4_5, -1, 5, 0}
};
static const symbol s_5_0[2] = { 'a', 'a' };
static const symbol s_5_1[2] = { 'e', 'e' };
static const symbol s_5_2[2] = { 'o', 'o' };
static const symbol s_5_3[2] = { 'u', 'u' };
static const struct among a_5[4] =
{
/* 0 */ { 2, s_5_0, -1, -1, 0},
/* 1 */ { 2, s_5_1, -1, -1, 0},
/* 2 */ { 2, s_5_2, -1, -1, 0},
/* 3 */ { 2, s_5_3, -1, -1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const symbol s_0[] = { 'a' };
static const symbol s_1[] = { 'e' };
static const symbol s_2[] = { 'i' };
static const symbol s_3[] = { 'o' };
static const symbol s_4[] = { 'u' };
static const symbol s_5[] = { 'y' };
static const symbol s_6[] = { 'Y' };
static const symbol s_7[] = { 'i' };
static const symbol s_8[] = { 'I' };
static const symbol s_9[] = { 'y' };
static const symbol s_10[] = { 'Y' };
static const symbol s_11[] = { 'y' };
static const symbol s_12[] = { 'i' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'g', 'e', 'm' };
static const symbol s_15[] = { 'h', 'e', 'i', 'd' };
static const symbol s_16[] = { 'h', 'e', 'i', 'd' };
static const symbol s_17[] = { 'c' };
static const symbol s_18[] = { 'e', 'n' };
static const symbol s_19[] = { 'i', 'g' };
static const symbol s_20[] = { 'e' };
static const symbol s_21[] = { 'e' };
static int r_prelude(struct SN_env * z) {
int among_var;
{ int c_test = z->c; /* test, line 42 */
while(1) { /* repeat, line 42 */
int c1 = z->c;
z->bra = z->c; /* [, line 43 */
if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else
among_var = find_among(z, a_0, 11); /* substring, line 43 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 43 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 54 */
}
break;
}
continue;
lab0:
z->c = c1;
break;
}
z->c = c_test;
}
{ int c_keep = z->c; /* try, line 57 */
z->bra = z->c; /* [, line 57 */
if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; }
z->ket = z->c; /* ], line 57 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */
if (ret < 0) return ret;
}
lab1:
;
}
while(1) { /* repeat, line 58 */
int c2 = z->c;
while(1) { /* goto, line 58 */
int c3 = z->c;
if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3;
z->bra = z->c; /* [, line 59 */
{ int c4 = z->c; /* or, line 59 */
if (!(eq_s(z, 1, s_7))) goto lab5;
z->ket = z->c; /* ], line 59 */
if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5;
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */
if (ret < 0) return ret;
}
goto lab4;
lab5:
z->c = c4;
if (!(eq_s(z, 1, s_9))) goto lab3;
z->ket = z->c; /* ], line 60 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */
if (ret < 0) return ret;
}
}
lab4:
z->c = c3;
break;
lab3:
z->c = c3;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab2;
z->c = ret; /* goto, line 58 */
}
}
continue;
lab2:
z->c = c2;
break;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ /* gopast */ /* grouping v, line 69 */
int ret = out_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 69 */
int ret = in_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 69 */
/* try, line 70 */
if (!(z->I[0] < 3)) goto lab0;
z->I[0] = 3;
lab0:
{ /* gopast */ /* grouping v, line 71 */
int ret = out_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 71 */
int ret = in_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 71 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 75 */
int c1 = z->c;
z->bra = z->c; /* [, line 77 */
if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else
among_var = find_among(z, a_1, 3); /* substring, line 77 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 77 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 80 */
}
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_undouble(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 91 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
z->c = z->l - m_test;
}
z->ket = z->c; /* [, line 91 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 91 */
}
z->bra = z->c; /* ], line 91 */
{ int ret = slice_del(z); /* delete, line 91 */
if (ret < 0) return ret;
}
return 1;
}
static int r_e_ending(struct SN_env * z) {
z->B[0] = 0; /* unset e_found, line 95 */
z->ket = z->c; /* [, line 96 */
if (!(eq_s_b(z, 1, s_13))) return 0;
z->bra = z->c; /* ], line 96 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 96 */
if (ret < 0) return ret;
}
{ int m_test = z->l - z->c; /* test, line 96 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set e_found, line 97 */
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 98 */
if (ret < 0) return ret;
}
return 1;
}
static int r_en_ending(struct SN_env * z) {
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
}
{ int m1 = z->l - z->c; (void)m1; /* and, line 102 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* not, line 102 */
if (!(eq_s_b(z, 3, s_14))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
}
{ int ret = slice_del(z); /* delete, line 102 */
if (ret < 0) return ret;
}
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 103 */
if (ret < 0) return ret;
}
return 1;
}
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 107 */
z->ket = z->c; /* [, line 108 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 108 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 110 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = r_en_ending(z);
if (ret == 0) goto lab0; /* call en_ending, line 113 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 116 */
if (ret < 0) return ret;
}
if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 116 */
if (ret < 0) return ret;
}
break;
}
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 120 */
{ int ret = r_e_ending(z);
if (ret == 0) goto lab1; /* call e_ending, line 120 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 122 */
z->ket = z->c; /* [, line 122 */
if (!(eq_s_b(z, 4, s_16))) goto lab2;
z->bra = z->c; /* ], line 122 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 122 */
if (ret < 0) return ret;
}
{ int m4 = z->l - z->c; (void)m4; /* not, line 122 */
if (!(eq_s_b(z, 1, s_17))) goto lab3;
goto lab2;
lab3:
z->c = z->l - m4;
}
{ int ret = slice_del(z); /* delete, line 122 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 123 */
if (!(eq_s_b(z, 2, s_18))) goto lab2;
z->bra = z->c; /* ], line 123 */
{ int ret = r_en_ending(z);
if (ret == 0) goto lab2; /* call en_ending, line 123 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 126 */
z->ket = z->c; /* [, line 127 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4;
among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
if (!(among_var)) goto lab4;
z->bra = z->c; /* ], line 127 */
switch(among_var) {
case 0: goto lab4;
case 1:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 129 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 129 */
if (ret < 0) return ret;
}
{ int m6 = z->l - z->c; (void)m6; /* or, line 130 */
z->ket = z->c; /* [, line 130 */
if (!(eq_s_b(z, 2, s_19))) goto lab6;
z->bra = z->c; /* ], line 130 */
{ int ret = r_R2(z);
if (ret == 0) goto lab6; /* call R2, line 130 */
if (ret < 0) return ret;
}
{ int m7 = z->l - z->c; (void)m7; /* not, line 130 */
if (!(eq_s_b(z, 1, s_20))) goto lab7;
goto lab6;
lab7:
z->c = z->l - m7;
}
{ int ret = slice_del(z); /* delete, line 130 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = z->l - m6;
{ int ret = r_undouble(z);
if (ret == 0) goto lab4; /* call undouble, line 130 */
if (ret < 0) return ret;
}
}
lab5:
break;
case 2:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 133 */
if (ret < 0) return ret;
}
{ int m8 = z->l - z->c; (void)m8; /* not, line 133 */
if (!(eq_s_b(z, 1, s_21))) goto lab8;
goto lab4;
lab8:
z->c = z->l - m8;
}
{ int ret = slice_del(z); /* delete, line 133 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 136 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 136 */
if (ret < 0) return ret;
}
{ int ret = r_e_ending(z);
if (ret == 0) goto lab4; /* call e_ending, line 136 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 139 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 139 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 142 */
if (ret < 0) return ret;
}
if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
{ int ret = slice_del(z); /* delete, line 142 */
if (ret < 0) return ret;
}
break;
}
lab4:
z->c = z->l - m5;
}
{ int m9 = z->l - z->c; (void)m9; /* do, line 146 */
if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9;
{ int m_test = z->l - z->c; /* test, line 148 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9;
if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9;
z->c = z->l - m_test;
}
z->ket = z->c; /* [, line 152 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) goto lab9;
z->c = ret; /* next, line 152 */
}
z->bra = z->c; /* ], line 152 */
{ int ret = slice_del(z); /* delete, line 152 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m9;
}
return 1;
}
extern int dutch_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 159 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 159 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 160 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 160 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 161 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 162 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 162 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 163 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 163 */
if (ret < 0) return ret;
}
lab3:
z->c = c4;
}
return 1;
}
extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); }
extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,768 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int finnish_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_tidy(struct SN_env * z);
static int r_other_endings(struct SN_env * z);
static int r_t_plural(struct SN_env * z);
static int r_i_plural(struct SN_env * z);
static int r_case_ending(struct SN_env * z);
static int r_VI(struct SN_env * z);
static int r_LONG(struct SN_env * z);
static int r_possessive(struct SN_env * z);
static int r_particle_etc(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * finnish_UTF_8_create_env(void);
extern void finnish_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[2] = { 'p', 'a' };
static const symbol s_0_1[3] = { 's', 't', 'i' };
static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' };
static const symbol s_0_3[3] = { 'h', 'a', 'n' };
static const symbol s_0_4[3] = { 'k', 'i', 'n' };
static const symbol s_0_5[4] = { 'h', 0xC3, 0xA4, 'n' };
static const symbol s_0_6[6] = { 'k', 0xC3, 0xA4, 0xC3, 0xA4, 'n' };
static const symbol s_0_7[2] = { 'k', 'o' };
static const symbol s_0_8[3] = { 'p', 0xC3, 0xA4 };
static const symbol s_0_9[3] = { 'k', 0xC3, 0xB6 };
static const struct among a_0[10] =
{
/* 0 */ { 2, s_0_0, -1, 1, 0},
/* 1 */ { 3, s_0_1, -1, 2, 0},
/* 2 */ { 4, s_0_2, -1, 1, 0},
/* 3 */ { 3, s_0_3, -1, 1, 0},
/* 4 */ { 3, s_0_4, -1, 1, 0},
/* 5 */ { 4, s_0_5, -1, 1, 0},
/* 6 */ { 6, s_0_6, -1, 1, 0},
/* 7 */ { 2, s_0_7, -1, 1, 0},
/* 8 */ { 3, s_0_8, -1, 1, 0},
/* 9 */ { 3, s_0_9, -1, 1, 0}
};
static const symbol s_1_0[3] = { 'l', 'l', 'a' };
static const symbol s_1_1[2] = { 'n', 'a' };
static const symbol s_1_2[3] = { 's', 's', 'a' };
static const symbol s_1_3[2] = { 't', 'a' };
static const symbol s_1_4[3] = { 'l', 't', 'a' };
static const symbol s_1_5[3] = { 's', 't', 'a' };
static const struct among a_1[6] =
{
/* 0 */ { 3, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 3, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0},
/* 4 */ { 3, s_1_4, 3, -1, 0},
/* 5 */ { 3, s_1_5, 3, -1, 0}
};
static const symbol s_2_0[4] = { 'l', 'l', 0xC3, 0xA4 };
static const symbol s_2_1[3] = { 'n', 0xC3, 0xA4 };
static const symbol s_2_2[4] = { 's', 's', 0xC3, 0xA4 };
static const symbol s_2_3[3] = { 't', 0xC3, 0xA4 };
static const symbol s_2_4[4] = { 'l', 't', 0xC3, 0xA4 };
static const symbol s_2_5[4] = { 's', 't', 0xC3, 0xA4 };
static const struct among a_2[6] =
{
/* 0 */ { 4, s_2_0, -1, -1, 0},
/* 1 */ { 3, s_2_1, -1, -1, 0},
/* 2 */ { 4, s_2_2, -1, -1, 0},
/* 3 */ { 3, s_2_3, -1, -1, 0},
/* 4 */ { 4, s_2_4, 3, -1, 0},
/* 5 */ { 4, s_2_5, 3, -1, 0}
};
static const symbol s_3_0[3] = { 'l', 'l', 'e' };
static const symbol s_3_1[3] = { 'i', 'n', 'e' };
static const struct among a_3[2] =
{
/* 0 */ { 3, s_3_0, -1, -1, 0},
/* 1 */ { 3, s_3_1, -1, -1, 0}
};
static const symbol s_4_0[3] = { 'n', 's', 'a' };
static const symbol s_4_1[3] = { 'm', 'm', 'e' };
static const symbol s_4_2[3] = { 'n', 'n', 'e' };
static const symbol s_4_3[2] = { 'n', 'i' };
static const symbol s_4_4[2] = { 's', 'i' };
static const symbol s_4_5[2] = { 'a', 'n' };
static const symbol s_4_6[2] = { 'e', 'n' };
static const symbol s_4_7[3] = { 0xC3, 0xA4, 'n' };
static const symbol s_4_8[4] = { 'n', 's', 0xC3, 0xA4 };
static const struct among a_4[9] =
{
/* 0 */ { 3, s_4_0, -1, 3, 0},
/* 1 */ { 3, s_4_1, -1, 3, 0},
/* 2 */ { 3, s_4_2, -1, 3, 0},
/* 3 */ { 2, s_4_3, -1, 2, 0},
/* 4 */ { 2, s_4_4, -1, 1, 0},
/* 5 */ { 2, s_4_5, -1, 4, 0},
/* 6 */ { 2, s_4_6, -1, 6, 0},
/* 7 */ { 3, s_4_7, -1, 5, 0},
/* 8 */ { 4, s_4_8, -1, 3, 0}
};
static const symbol s_5_0[2] = { 'a', 'a' };
static const symbol s_5_1[2] = { 'e', 'e' };
static const symbol s_5_2[2] = { 'i', 'i' };
static const symbol s_5_3[2] = { 'o', 'o' };
static const symbol s_5_4[2] = { 'u', 'u' };
static const symbol s_5_5[4] = { 0xC3, 0xA4, 0xC3, 0xA4 };
static const symbol s_5_6[4] = { 0xC3, 0xB6, 0xC3, 0xB6 };
static const struct among a_5[7] =
{
/* 0 */ { 2, s_5_0, -1, -1, 0},
/* 1 */ { 2, s_5_1, -1, -1, 0},
/* 2 */ { 2, s_5_2, -1, -1, 0},
/* 3 */ { 2, s_5_3, -1, -1, 0},
/* 4 */ { 2, s_5_4, -1, -1, 0},
/* 5 */ { 4, s_5_5, -1, -1, 0},
/* 6 */ { 4, s_5_6, -1, -1, 0}
};
static const symbol s_6_0[1] = { 'a' };
static const symbol s_6_1[3] = { 'l', 'l', 'a' };
static const symbol s_6_2[2] = { 'n', 'a' };
static const symbol s_6_3[3] = { 's', 's', 'a' };
static const symbol s_6_4[2] = { 't', 'a' };
static const symbol s_6_5[3] = { 'l', 't', 'a' };
static const symbol s_6_6[3] = { 's', 't', 'a' };
static const symbol s_6_7[3] = { 't', 't', 'a' };
static const symbol s_6_8[3] = { 'l', 'l', 'e' };
static const symbol s_6_9[3] = { 'i', 'n', 'e' };
static const symbol s_6_10[3] = { 'k', 's', 'i' };
static const symbol s_6_11[1] = { 'n' };
static const symbol s_6_12[3] = { 'h', 'a', 'n' };
static const symbol s_6_13[3] = { 'd', 'e', 'n' };
static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' };
static const symbol s_6_15[3] = { 'h', 'e', 'n' };
static const symbol s_6_16[4] = { 't', 't', 'e', 'n' };
static const symbol s_6_17[3] = { 'h', 'i', 'n' };
static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' };
static const symbol s_6_19[3] = { 'h', 'o', 'n' };
static const symbol s_6_20[4] = { 'h', 0xC3, 0xA4, 'n' };
static const symbol s_6_21[4] = { 'h', 0xC3, 0xB6, 'n' };
static const symbol s_6_22[2] = { 0xC3, 0xA4 };
static const symbol s_6_23[4] = { 'l', 'l', 0xC3, 0xA4 };
static const symbol s_6_24[3] = { 'n', 0xC3, 0xA4 };
static const symbol s_6_25[4] = { 's', 's', 0xC3, 0xA4 };
static const symbol s_6_26[3] = { 't', 0xC3, 0xA4 };
static const symbol s_6_27[4] = { 'l', 't', 0xC3, 0xA4 };
static const symbol s_6_28[4] = { 's', 't', 0xC3, 0xA4 };
static const symbol s_6_29[4] = { 't', 't', 0xC3, 0xA4 };
static const struct among a_6[30] =
{
/* 0 */ { 1, s_6_0, -1, 8, 0},
/* 1 */ { 3, s_6_1, 0, -1, 0},
/* 2 */ { 2, s_6_2, 0, -1, 0},
/* 3 */ { 3, s_6_3, 0, -1, 0},
/* 4 */ { 2, s_6_4, 0, -1, 0},
/* 5 */ { 3, s_6_5, 4, -1, 0},
/* 6 */ { 3, s_6_6, 4, -1, 0},
/* 7 */ { 3, s_6_7, 4, 9, 0},
/* 8 */ { 3, s_6_8, -1, -1, 0},
/* 9 */ { 3, s_6_9, -1, -1, 0},
/* 10 */ { 3, s_6_10, -1, -1, 0},
/* 11 */ { 1, s_6_11, -1, 7, 0},
/* 12 */ { 3, s_6_12, 11, 1, 0},
/* 13 */ { 3, s_6_13, 11, -1, r_VI},
/* 14 */ { 4, s_6_14, 11, -1, r_LONG},
/* 15 */ { 3, s_6_15, 11, 2, 0},
/* 16 */ { 4, s_6_16, 11, -1, r_VI},
/* 17 */ { 3, s_6_17, 11, 3, 0},
/* 18 */ { 4, s_6_18, 11, -1, r_VI},
/* 19 */ { 3, s_6_19, 11, 4, 0},
/* 20 */ { 4, s_6_20, 11, 5, 0},
/* 21 */ { 4, s_6_21, 11, 6, 0},
/* 22 */ { 2, s_6_22, -1, 8, 0},
/* 23 */ { 4, s_6_23, 22, -1, 0},
/* 24 */ { 3, s_6_24, 22, -1, 0},
/* 25 */ { 4, s_6_25, 22, -1, 0},
/* 26 */ { 3, s_6_26, 22, -1, 0},
/* 27 */ { 4, s_6_27, 26, -1, 0},
/* 28 */ { 4, s_6_28, 26, -1, 0},
/* 29 */ { 4, s_6_29, 26, 9, 0}
};
static const symbol s_7_0[3] = { 'e', 'j', 'a' };
static const symbol s_7_1[3] = { 'm', 'm', 'a' };
static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' };
static const symbol s_7_3[3] = { 'm', 'p', 'a' };
static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' };
static const symbol s_7_5[3] = { 'm', 'm', 'i' };
static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' };
static const symbol s_7_7[3] = { 'm', 'p', 'i' };
static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' };
static const symbol s_7_9[4] = { 'e', 'j', 0xC3, 0xA4 };
static const symbol s_7_10[4] = { 'm', 'm', 0xC3, 0xA4 };
static const symbol s_7_11[5] = { 'i', 'm', 'm', 0xC3, 0xA4 };
static const symbol s_7_12[4] = { 'm', 'p', 0xC3, 0xA4 };
static const symbol s_7_13[5] = { 'i', 'm', 'p', 0xC3, 0xA4 };
static const struct among a_7[14] =
{
/* 0 */ { 3, s_7_0, -1, -1, 0},
/* 1 */ { 3, s_7_1, -1, 1, 0},
/* 2 */ { 4, s_7_2, 1, -1, 0},
/* 3 */ { 3, s_7_3, -1, 1, 0},
/* 4 */ { 4, s_7_4, 3, -1, 0},
/* 5 */ { 3, s_7_5, -1, 1, 0},
/* 6 */ { 4, s_7_6, 5, -1, 0},
/* 7 */ { 3, s_7_7, -1, 1, 0},
/* 8 */ { 4, s_7_8, 7, -1, 0},
/* 9 */ { 4, s_7_9, -1, -1, 0},
/* 10 */ { 4, s_7_10, -1, 1, 0},
/* 11 */ { 5, s_7_11, 10, -1, 0},
/* 12 */ { 4, s_7_12, -1, 1, 0},
/* 13 */ { 5, s_7_13, 12, -1, 0}
};
static const symbol s_8_0[1] = { 'i' };
static const symbol s_8_1[1] = { 'j' };
static const struct among a_8[2] =
{
/* 0 */ { 1, s_8_0, -1, -1, 0},
/* 1 */ { 1, s_8_1, -1, -1, 0}
};
static const symbol s_9_0[3] = { 'm', 'm', 'a' };
static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' };
static const struct among a_9[2] =
{
/* 0 */ { 3, s_9_0, -1, 1, 0},
/* 1 */ { 4, s_9_1, 0, -1, 0}
};
static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 };
static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
static const symbol s_0[] = { 'k' };
static const symbol s_1[] = { 'k', 's', 'e' };
static const symbol s_2[] = { 'k', 's', 'i' };
static const symbol s_3[] = { 'i' };
static const symbol s_4[] = { 'a' };
static const symbol s_5[] = { 'e' };
static const symbol s_6[] = { 'i' };
static const symbol s_7[] = { 'o' };
static const symbol s_8[] = { 0xC3, 0xA4 };
static const symbol s_9[] = { 0xC3, 0xB6 };
static const symbol s_10[] = { 'i', 'e' };
static const symbol s_11[] = { 'e' };
static const symbol s_12[] = { 'p', 'o' };
static const symbol s_13[] = { 't' };
static const symbol s_14[] = { 'p', 'o' };
static const symbol s_15[] = { 'j' };
static const symbol s_16[] = { 'o' };
static const symbol s_17[] = { 'u' };
static const symbol s_18[] = { 'o' };
static const symbol s_19[] = { 'j' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */
{ /* gopast */ /* non V1, line 46 */
int ret = in_grouping_U(z, g_V1, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 46 */
if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */
{ /* gopast */ /* non V1, line 47 */
int ret = in_grouping_U(z, g_V1, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 47 */
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_particle_etc(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 55 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 55 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 55 */
among_var = find_among_b(z, a_0, 10); /* substring, line 55 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 55 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
if (in_grouping_b_U(z, g_particle_end, 97, 246, 0)) return 0;
break;
case 2:
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 64 */
if (ret < 0) return ret;
}
break;
}
{ int ret = slice_del(z); /* delete, line 66 */
if (ret < 0) return ret;
}
return 1;
}
static int r_possessive(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 69 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 69 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 69 */
among_var = find_among_b(z, a_4, 9); /* substring, line 69 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 69 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m2 = z->l - z->c; (void)m2; /* not, line 72 */
if (!(eq_s_b(z, 1, s_0))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
{ int ret = slice_del(z); /* delete, line 72 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 74 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 74 */
if (!(eq_s_b(z, 3, s_1))) return 0;
z->bra = z->c; /* ], line 74 */
{ int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 78 */
if (ret < 0) return ret;
}
break;
case 4:
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0;
if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */
{ int ret = slice_del(z); /* delete, line 81 */
if (ret < 0) return ret;
}
break;
case 5:
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 164) return 0;
if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */
{ int ret = slice_del(z); /* delete, line 84 */
if (ret < 0) return ret;
}
break;
case 6:
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0;
if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */
{ int ret = slice_del(z); /* delete, line 86 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_LONG(struct SN_env * z) {
if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */
return 1;
}
static int r_VI(struct SN_env * z) {
if (!(eq_s_b(z, 1, s_3))) return 0;
if (in_grouping_b_U(z, g_V2, 97, 246, 0)) return 0;
return 1;
}
static int r_case_ending(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 96 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 96 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 96 */
among_var = find_among_b(z, a_6, 30); /* substring, line 96 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 96 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
if (!(eq_s_b(z, 1, s_4))) return 0;
break;
case 2:
if (!(eq_s_b(z, 1, s_5))) return 0;
break;
case 3:
if (!(eq_s_b(z, 1, s_6))) return 0;
break;
case 4:
if (!(eq_s_b(z, 1, s_7))) return 0;
break;
case 5:
if (!(eq_s_b(z, 2, s_8))) return 0;
break;
case 6:
if (!(eq_s_b(z, 2, s_9))) return 0;
break;
case 7:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
{ int m2 = z->l - z->c; (void)m2; /* and, line 113 */
{ int m3 = z->l - z->c; (void)m3; /* or, line 112 */
{ int ret = r_LONG(z);
if (ret == 0) goto lab2; /* call LONG, line 111 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
z->c = z->l - m3;
if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; }
}
lab1:
z->c = z->l - m2;
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) { z->c = z->l - m_keep; goto lab0; }
z->c = ret; /* next, line 113 */
}
}
z->bra = z->c; /* ], line 113 */
lab0:
;
}
break;
case 8:
if (in_grouping_b_U(z, g_V1, 97, 246, 0)) return 0;
if (out_grouping_b_U(z, g_V1, 97, 246, 0)) return 0;
break;
case 9:
if (!(eq_s_b(z, 1, s_11))) return 0;
break;
}
{ int ret = slice_del(z); /* delete, line 138 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set ending_removed, line 139 */
return 1;
}
static int r_other_endings(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 142 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[1]) return 0;
z->c = z->I[1]; /* tomark, line 142 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 142 */
among_var = find_among_b(z, a_7, 14); /* substring, line 142 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 142 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m2 = z->l - z->c; (void)m2; /* not, line 146 */
if (!(eq_s_b(z, 2, s_12))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
}
break;
}
{ int ret = slice_del(z); /* delete, line 151 */
if (ret < 0) return ret;
}
return 1;
}
static int r_i_plural(struct SN_env * z) {
{ int mlimit; /* setlimit, line 154 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 154 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 154 */
if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */
z->bra = z->c; /* ], line 154 */
z->lb = mlimit;
}
{ int ret = slice_del(z); /* delete, line 158 */
if (ret < 0) return ret;
}
return 1;
}
static int r_t_plural(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 161 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 161 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 162 */
if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 162 */
{ int m_test = z->l - z->c; /* test, line 162 */
if (in_grouping_b_U(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; }
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 163 */
if (ret < 0) return ret;
}
z->lb = mlimit;
}
{ int mlimit; /* setlimit, line 165 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[1]) return 0;
z->c = z->I[1]; /* tomark, line 165 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
z->ket = z->c; /* [, line 165 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_9, 2); /* substring, line 165 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 165 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int m3 = z->l - z->c; (void)m3; /* not, line 167 */
if (!(eq_s_b(z, 2, s_14))) goto lab0;
return 0;
lab0:
z->c = z->l - m3;
}
break;
}
{ int ret = slice_del(z); /* delete, line 170 */
if (ret < 0) return ret;
}
return 1;
}
static int r_tidy(struct SN_env * z) {
{ int mlimit; /* setlimit, line 173 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 173 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* do, line 174 */
{ int m3 = z->l - z->c; (void)m3; /* and, line 174 */
{ int ret = r_LONG(z);
if (ret == 0) goto lab0; /* call LONG, line 174 */
if (ret < 0) return ret;
}
z->c = z->l - m3;
z->ket = z->c; /* [, line 174 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 174 */
}
z->bra = z->c; /* ], line 174 */
{ int ret = slice_del(z); /* delete, line 174 */
if (ret < 0) return ret;
}
}
lab0:
z->c = z->l - m2;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 175 */
z->ket = z->c; /* [, line 175 */
if (in_grouping_b_U(z, g_AEI, 97, 228, 0)) goto lab1;
z->bra = z->c; /* ], line 175 */
if (out_grouping_b_U(z, g_V1, 97, 246, 0)) goto lab1;
{ int ret = slice_del(z); /* delete, line 175 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 176 */
z->ket = z->c; /* [, line 176 */
if (!(eq_s_b(z, 1, s_15))) goto lab2;
z->bra = z->c; /* ], line 176 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 176 */
if (!(eq_s_b(z, 1, s_16))) goto lab4;
goto lab3;
lab4:
z->c = z->l - m6;
if (!(eq_s_b(z, 1, s_17))) goto lab2;
}
lab3:
{ int ret = slice_del(z); /* delete, line 176 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m5;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 177 */
z->ket = z->c; /* [, line 177 */
if (!(eq_s_b(z, 1, s_18))) goto lab5;
z->bra = z->c; /* ], line 177 */
if (!(eq_s_b(z, 1, s_19))) goto lab5;
{ int ret = slice_del(z); /* delete, line 177 */
if (ret < 0) return ret;
}
lab5:
z->c = z->l - m7;
}
z->lb = mlimit;
}
if (in_grouping_b_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */
z->ket = z->c; /* [, line 179 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 179 */
}
z->bra = z->c; /* ], line 179 */
z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */
if (z->S[0] == 0) return -1; /* -> x, line 179 */
if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */
{ int ret = slice_del(z); /* delete, line 179 */
if (ret < 0) return ret;
}
return 1;
}
extern int finnish_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 185 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 185 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->B[0] = 0; /* unset ending_removed, line 186 */
z->lb = z->c; z->c = z->l; /* backwards, line 187 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 188 */
{ int ret = r_particle_etc(z);
if (ret == 0) goto lab1; /* call particle_etc, line 188 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 189 */
{ int ret = r_possessive(z);
if (ret == 0) goto lab2; /* call possessive, line 189 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 190 */
{ int ret = r_case_ending(z);
if (ret == 0) goto lab3; /* call case_ending, line 190 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
{ int m5 = z->l - z->c; (void)m5; /* do, line 191 */
{ int ret = r_other_endings(z);
if (ret == 0) goto lab4; /* call other_endings, line 191 */
if (ret < 0) return ret;
}
lab4:
z->c = z->l - m5;
}
{ int m6 = z->l - z->c; (void)m6; /* or, line 192 */
if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */
{ int m7 = z->l - z->c; (void)m7; /* do, line 192 */
{ int ret = r_i_plural(z);
if (ret == 0) goto lab7; /* call i_plural, line 192 */
if (ret < 0) return ret;
}
lab7:
z->c = z->l - m7;
}
goto lab5;
lab6:
z->c = z->l - m6;
{ int m8 = z->l - z->c; (void)m8; /* do, line 192 */
{ int ret = r_t_plural(z);
if (ret == 0) goto lab8; /* call t_plural, line 192 */
if (ret < 0) return ret;
}
lab8:
z->c = z->l - m8;
}
}
lab5:
{ int m9 = z->l - z->c; (void)m9; /* do, line 193 */
{ int ret = r_tidy(z);
if (ret == 0) goto lab9; /* call tidy, line 193 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m9;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * finnish_UTF_8_create_env(void) { return SN_create_env(1, 2, 1); }
extern void finnish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,509 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int german_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_standard_suffix(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * german_UTF_8_create_env(void);
extern void german_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_1[1] = { 'U' };
static const symbol s_0_2[1] = { 'Y' };
static const symbol s_0_3[2] = { 0xC3, 0xA4 };
static const symbol s_0_4[2] = { 0xC3, 0xB6 };
static const symbol s_0_5[2] = { 0xC3, 0xBC };
static const struct among a_0[6] =
{
/* 0 */ { 0, 0, -1, 6, 0},
/* 1 */ { 1, s_0_1, 0, 2, 0},
/* 2 */ { 1, s_0_2, 0, 1, 0},
/* 3 */ { 2, s_0_3, 0, 3, 0},
/* 4 */ { 2, s_0_4, 0, 4, 0},
/* 5 */ { 2, s_0_5, 0, 5, 0}
};
static const symbol s_1_0[1] = { 'e' };
static const symbol s_1_1[2] = { 'e', 'm' };
static const symbol s_1_2[2] = { 'e', 'n' };
static const symbol s_1_3[3] = { 'e', 'r', 'n' };
static const symbol s_1_4[2] = { 'e', 'r' };
static const symbol s_1_5[1] = { 's' };
static const symbol s_1_6[2] = { 'e', 's' };
static const struct among a_1[7] =
{
/* 0 */ { 1, s_1_0, -1, 1, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 3 */ { 3, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 1, s_1_5, -1, 2, 0},
/* 6 */ { 2, s_1_6, 5, 1, 0}
};
static const symbol s_2_0[2] = { 'e', 'n' };
static const symbol s_2_1[2] = { 'e', 'r' };
static const symbol s_2_2[2] = { 's', 't' };
static const symbol s_2_3[3] = { 'e', 's', 't' };
static const struct among a_2[4] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 2, s_2_1, -1, 1, 0},
/* 2 */ { 2, s_2_2, -1, 2, 0},
/* 3 */ { 3, s_2_3, 2, 1, 0}
};
static const symbol s_3_0[2] = { 'i', 'g' };
static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' };
static const struct among a_3[2] =
{
/* 0 */ { 2, s_3_0, -1, 1, 0},
/* 1 */ { 4, s_3_1, -1, 1, 0}
};
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
static const symbol s_4_1[2] = { 'i', 'g' };
static const symbol s_4_2[3] = { 'u', 'n', 'g' };
static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' };
static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' };
static const symbol s_4_5[2] = { 'i', 'k' };
static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' };
static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' };
static const struct among a_4[8] =
{
/* 0 */ { 3, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 4, s_4_3, -1, 3, 0},
/* 4 */ { 4, s_4_4, -1, 2, 0},
/* 5 */ { 2, s_4_5, -1, 2, 0},
/* 6 */ { 4, s_4_6, -1, 3, 0},
/* 7 */ { 4, s_4_7, -1, 4, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
static const unsigned char g_s_ending[] = { 117, 30, 5 };
static const unsigned char g_st_ending[] = { 117, 30, 4 };
static const symbol s_0[] = { 0xC3, 0x9F };
static const symbol s_1[] = { 's', 's' };
static const symbol s_2[] = { 'u' };
static const symbol s_3[] = { 'U' };
static const symbol s_4[] = { 'y' };
static const symbol s_5[] = { 'Y' };
static const symbol s_6[] = { 'y' };
static const symbol s_7[] = { 'u' };
static const symbol s_8[] = { 'a' };
static const symbol s_9[] = { 'o' };
static const symbol s_10[] = { 'u' };
static const symbol s_11[] = { 'i', 'g' };
static const symbol s_12[] = { 'e' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'e', 'r' };
static const symbol s_15[] = { 'e', 'n' };
static int r_prelude(struct SN_env * z) {
{ int c_test = z->c; /* test, line 30 */
while(1) { /* repeat, line 30 */
int c1 = z->c;
{ int c2 = z->c; /* or, line 33 */
z->bra = z->c; /* [, line 32 */
if (!(eq_s(z, 2, s_0))) goto lab2;
z->ket = z->c; /* ], line 32 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
z->c = c2;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 33 */
}
}
lab1:
continue;
lab0:
z->c = c1;
break;
}
z->c = c_test;
}
while(1) { /* repeat, line 36 */
int c3 = z->c;
while(1) { /* goto, line 36 */
int c4 = z->c;
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
z->bra = z->c; /* [, line 37 */
{ int c5 = z->c; /* or, line 37 */
if (!(eq_s(z, 1, s_2))) goto lab6;
z->ket = z->c; /* ], line 37 */
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6;
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = c5;
if (!(eq_s(z, 1, s_4))) goto lab4;
z->ket = z->c; /* ], line 38 */
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
if (ret < 0) return ret;
}
}
lab5:
z->c = c4;
break;
lab4:
z->c = c4;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab3;
z->c = ret; /* goto, line 36 */
}
}
continue;
lab3:
z->c = c3;
break;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c_test = z->c; /* test, line 47 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
if (ret < 0) return 0;
z->c = ret; /* hop, line 47 */
}
z->I[2] = z->c; /* setmark x, line 47 */
z->c = c_test;
}
{ /* gopast */ /* grouping v, line 49 */
int ret = out_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 49 */
int ret = in_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 49 */
/* try, line 50 */
if (!(z->I[0] < z->I[2])) goto lab0;
z->I[0] = z->I[2];
lab0:
{ /* gopast */ /* grouping v, line 51 */
int ret = out_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 51 */
int ret = in_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 51 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 55 */
int c1 = z->c;
z->bra = z->c; /* [, line 57 */
among_var = find_among(z, a_0, 6); /* substring, line 57 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 57 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 63 */
}
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 74 */
z->ket = z->c; /* [, line 75 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_1, 7); /* substring, line 75 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 75 */
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 75 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_del(z); /* delete, line 77 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 80 */
if (ret < 0) return ret;
}
break;
}
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 84 */
z->ket = z->c; /* [, line 85 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
among_var = find_among_b(z, a_2, 4); /* substring, line 85 */
if (!(among_var)) goto lab1;
z->bra = z->c; /* ], line 85 */
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 85 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab1;
case 1:
{ int ret = slice_del(z); /* delete, line 87 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab1;
{ int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3);
if (ret < 0) goto lab1;
z->c = ret; /* hop, line 90 */
}
{ int ret = slice_del(z); /* delete, line 90 */
if (ret < 0) return ret;
}
break;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 94 */
z->ket = z->c; /* [, line 95 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_4, 8); /* substring, line 95 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 95 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 95 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab2;
case 1:
{ int ret = slice_del(z); /* delete, line 97 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */
z->ket = z->c; /* [, line 98 */
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; }
z->bra = z->c; /* ], line 98 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 98 */
if (!(eq_s_b(z, 1, s_12))) goto lab4;
{ z->c = z->l - m_keep; goto lab3; }
lab4:
z->c = z->l - m4;
}
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 98 */
if (ret < 0) return ret;
}
lab3:
;
}
break;
case 2:
{ int m5 = z->l - z->c; (void)m5; /* not, line 101 */
if (!(eq_s_b(z, 1, s_13))) goto lab5;
goto lab2;
lab5:
z->c = z->l - m5;
}
{ int ret = slice_del(z); /* delete, line 101 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 104 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */
z->ket = z->c; /* [, line 106 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 106 */
if (!(eq_s_b(z, 2, s_14))) goto lab8;
goto lab7;
lab8:
z->c = z->l - m6;
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; }
}
lab7:
z->bra = z->c; /* ], line 106 */
{ int ret = r_R1(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 106 */
if (ret < 0) return ret;
}
lab6:
;
}
break;
case 4:
{ int ret = slice_del(z); /* delete, line 110 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
z->ket = z->c; /* [, line 112 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; }
among_var = find_among_b(z, a_3, 2); /* substring, line 112 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab9; }
z->bra = z->c; /* ], line 112 */
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab9; }
case 1:
{ int ret = slice_del(z); /* delete, line 114 */
if (ret < 0) return ret;
}
break;
}
lab9:
;
}
break;
}
lab2:
z->c = z->l - m3;
}
return 1;
}
extern int german_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 125 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 125 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 126 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 126 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 127 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 128 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 128 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 129 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 129 */
if (ret < 0) return ret;
}
lab3:
z->c = c4;
}
return 1;
}
extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); }
extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,299 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int norwegian_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * norwegian_UTF_8_create_env(void);
extern void norwegian_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 'a' };
static const symbol s_0_1[1] = { 'e' };
static const symbol s_0_2[3] = { 'e', 'd', 'e' };
static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' };
static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' };
static const symbol s_0_5[3] = { 'a', 'n', 'e' };
static const symbol s_0_6[3] = { 'e', 'n', 'e' };
static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' };
static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' };
static const symbol s_0_9[2] = { 'e', 'n' };
static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' };
static const symbol s_0_11[2] = { 'a', 'r' };
static const symbol s_0_12[2] = { 'e', 'r' };
static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' };
static const symbol s_0_14[1] = { 's' };
static const symbol s_0_15[2] = { 'a', 's' };
static const symbol s_0_16[2] = { 'e', 's' };
static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' };
static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' };
static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' };
static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' };
static const symbol s_0_21[3] = { 'e', 'n', 's' };
static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' };
static const symbol s_0_23[3] = { 'e', 'r', 's' };
static const symbol s_0_24[3] = { 'e', 't', 's' };
static const symbol s_0_25[2] = { 'e', 't' };
static const symbol s_0_26[3] = { 'h', 'e', 't' };
static const symbol s_0_27[3] = { 'e', 'r', 't' };
static const symbol s_0_28[3] = { 'a', 's', 't' };
static const struct among a_0[29] =
{
/* 0 */ { 1, s_0_0, -1, 1, 0},
/* 1 */ { 1, s_0_1, -1, 1, 0},
/* 2 */ { 3, s_0_2, 1, 1, 0},
/* 3 */ { 4, s_0_3, 1, 1, 0},
/* 4 */ { 4, s_0_4, 1, 1, 0},
/* 5 */ { 3, s_0_5, 1, 1, 0},
/* 6 */ { 3, s_0_6, 1, 1, 0},
/* 7 */ { 6, s_0_7, 6, 1, 0},
/* 8 */ { 4, s_0_8, 1, 3, 0},
/* 9 */ { 2, s_0_9, -1, 1, 0},
/* 10 */ { 5, s_0_10, 9, 1, 0},
/* 11 */ { 2, s_0_11, -1, 1, 0},
/* 12 */ { 2, s_0_12, -1, 1, 0},
/* 13 */ { 5, s_0_13, 12, 1, 0},
/* 14 */ { 1, s_0_14, -1, 2, 0},
/* 15 */ { 2, s_0_15, 14, 1, 0},
/* 16 */ { 2, s_0_16, 14, 1, 0},
/* 17 */ { 4, s_0_17, 16, 1, 0},
/* 18 */ { 5, s_0_18, 16, 1, 0},
/* 19 */ { 4, s_0_19, 16, 1, 0},
/* 20 */ { 7, s_0_20, 19, 1, 0},
/* 21 */ { 3, s_0_21, 14, 1, 0},
/* 22 */ { 6, s_0_22, 21, 1, 0},
/* 23 */ { 3, s_0_23, 14, 1, 0},
/* 24 */ { 3, s_0_24, 14, 1, 0},
/* 25 */ { 2, s_0_25, -1, 1, 0},
/* 26 */ { 3, s_0_26, 25, 1, 0},
/* 27 */ { 3, s_0_27, -1, 3, 0},
/* 28 */ { 3, s_0_28, -1, 1, 0}
};
static const symbol s_1_0[2] = { 'd', 't' };
static const symbol s_1_1[2] = { 'v', 't' };
static const struct among a_1[2] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0}
};
static const symbol s_2_0[3] = { 'l', 'e', 'g' };
static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' };
static const symbol s_2_2[2] = { 'i', 'g' };
static const symbol s_2_3[3] = { 'e', 'i', 'g' };
static const symbol s_2_4[3] = { 'l', 'i', 'g' };
static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' };
static const symbol s_2_6[3] = { 'e', 'l', 's' };
static const symbol s_2_7[3] = { 'l', 'o', 'v' };
static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' };
static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' };
static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' };
static const struct among a_2[11] =
{
/* 0 */ { 3, s_2_0, -1, 1, 0},
/* 1 */ { 4, s_2_1, 0, 1, 0},
/* 2 */ { 2, s_2_2, -1, 1, 0},
/* 3 */ { 3, s_2_3, 2, 1, 0},
/* 4 */ { 3, s_2_4, 2, 1, 0},
/* 5 */ { 4, s_2_5, 4, 1, 0},
/* 6 */ { 3, s_2_6, -1, 1, 0},
/* 7 */ { 3, s_2_7, -1, 1, 0},
/* 8 */ { 4, s_2_8, 7, 1, 0},
/* 9 */ { 4, s_2_9, 7, 1, 0},
/* 10 */ { 7, s_2_10, 9, 1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
static const unsigned char g_s_ending[] = { 119, 125, 149, 1 };
static const symbol s_0[] = { 'k' };
static const symbol s_1[] = { 'e', 'r' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 30 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
if (ret < 0) return 0;
z->c = ret; /* hop, line 30 */
}
z->I[1] = z->c; /* setmark x, line 30 */
z->c = c_test;
}
if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */
{ /* gopast */ /* non v, line 31 */
int ret = in_grouping_U(z, g_v, 97, 248, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 31 */
/* try, line 32 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 38 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 38 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 38 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 29); /* substring, line 38 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 38 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m2 = z->l - z->c; (void)m2; /* or, line 46 */
if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1;
goto lab0;
lab1:
z->c = z->l - m2;
if (!(eq_s_b(z, 1, s_0))) return 0;
if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 46 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 53 */
{ int mlimit; /* setlimit, line 54 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 54 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 54 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */
z->bra = z->c; /* ], line 54 */
z->lb = mlimit;
}
z->c = z->l - m_test;
}
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 59 */
}
z->bra = z->c; /* ], line 59 */
{ int ret = slice_del(z); /* delete, line 59 */
if (ret < 0) return ret;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 63 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 63 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 63 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 11); /* substring, line 63 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 63 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 67 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
extern int norwegian_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 74 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 74 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 75 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 76 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 76 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 77 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 77 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 78 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 78 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); }
extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

View File

@ -0,0 +1,755 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int porter_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_Step_5b(struct SN_env * z);
static int r_Step_5a(struct SN_env * z);
static int r_Step_4(struct SN_env * z);
static int r_Step_3(struct SN_env * z);
static int r_Step_2(struct SN_env * z);
static int r_Step_1c(struct SN_env * z);
static int r_Step_1b(struct SN_env * z);
static int r_Step_1a(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_shortv(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * porter_UTF_8_create_env(void);
extern void porter_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 's' };
static const symbol s_0_1[3] = { 'i', 'e', 's' };
static const symbol s_0_2[4] = { 's', 's', 'e', 's' };
static const symbol s_0_3[2] = { 's', 's' };
static const struct among a_0[4] =
{
/* 0 */ { 1, s_0_0, -1, 3, 0},
/* 1 */ { 3, s_0_1, 0, 2, 0},
/* 2 */ { 4, s_0_2, 0, 1, 0},
/* 3 */ { 2, s_0_3, 0, -1, 0}
};
static const symbol s_1_1[2] = { 'b', 'b' };
static const symbol s_1_2[2] = { 'd', 'd' };
static const symbol s_1_3[2] = { 'f', 'f' };
static const symbol s_1_4[2] = { 'g', 'g' };
static const symbol s_1_5[2] = { 'b', 'l' };
static const symbol s_1_6[2] = { 'm', 'm' };
static const symbol s_1_7[2] = { 'n', 'n' };
static const symbol s_1_8[2] = { 'p', 'p' };
static const symbol s_1_9[2] = { 'r', 'r' };
static const symbol s_1_10[2] = { 'a', 't' };
static const symbol s_1_11[2] = { 't', 't' };
static const symbol s_1_12[2] = { 'i', 'z' };
static const struct among a_1[13] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 2, s_1_1, 0, 2, 0},
/* 2 */ { 2, s_1_2, 0, 2, 0},
/* 3 */ { 2, s_1_3, 0, 2, 0},
/* 4 */ { 2, s_1_4, 0, 2, 0},
/* 5 */ { 2, s_1_5, 0, 1, 0},
/* 6 */ { 2, s_1_6, 0, 2, 0},
/* 7 */ { 2, s_1_7, 0, 2, 0},
/* 8 */ { 2, s_1_8, 0, 2, 0},
/* 9 */ { 2, s_1_9, 0, 2, 0},
/* 10 */ { 2, s_1_10, 0, 1, 0},
/* 11 */ { 2, s_1_11, 0, 2, 0},
/* 12 */ { 2, s_1_12, 0, 1, 0}
};
static const symbol s_2_0[2] = { 'e', 'd' };
static const symbol s_2_1[3] = { 'e', 'e', 'd' };
static const symbol s_2_2[3] = { 'i', 'n', 'g' };
static const struct among a_2[3] =
{
/* 0 */ { 2, s_2_0, -1, 2, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 3, s_2_2, -1, 2, 0}
};
static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' };
static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' };
static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' };
static const symbol s_3_3[3] = { 'e', 'l', 'i' };
static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' };
static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' };
static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' };
static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' };
static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' };
static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' };
static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' };
static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' };
static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' };
static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
static const struct among a_3[20] =
{
/* 0 */ { 4, s_3_0, -1, 3, 0},
/* 1 */ { 4, s_3_1, -1, 2, 0},
/* 2 */ { 4, s_3_2, -1, 4, 0},
/* 3 */ { 3, s_3_3, -1, 6, 0},
/* 4 */ { 4, s_3_4, -1, 9, 0},
/* 5 */ { 5, s_3_5, -1, 12, 0},
/* 6 */ { 5, s_3_6, -1, 5, 0},
/* 7 */ { 5, s_3_7, -1, 10, 0},
/* 8 */ { 6, s_3_8, -1, 14, 0},
/* 9 */ { 5, s_3_9, -1, 13, 0},
/* 10 */ { 6, s_3_10, -1, 1, 0},
/* 11 */ { 7, s_3_11, 10, 8, 0},
/* 12 */ { 5, s_3_12, -1, 10, 0},
/* 13 */ { 5, s_3_13, -1, 8, 0},
/* 14 */ { 7, s_3_14, 13, 7, 0},
/* 15 */ { 4, s_3_15, -1, 7, 0},
/* 16 */ { 4, s_3_16, -1, 8, 0},
/* 17 */ { 7, s_3_17, -1, 13, 0},
/* 18 */ { 7, s_3_18, -1, 11, 0},
/* 19 */ { 7, s_3_19, -1, 12, 0}
};
static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' };
static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' };
static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' };
static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' };
static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' };
static const symbol s_4_5[3] = { 'f', 'u', 'l' };
static const symbol s_4_6[4] = { 'n', 'e', 's', 's' };
static const struct among a_4[7] =
{
/* 0 */ { 5, s_4_0, -1, 2, 0},
/* 1 */ { 5, s_4_1, -1, 3, 0},
/* 2 */ { 5, s_4_2, -1, 1, 0},
/* 3 */ { 5, s_4_3, -1, 2, 0},
/* 4 */ { 4, s_4_4, -1, 2, 0},
/* 5 */ { 3, s_4_5, -1, 3, 0},
/* 6 */ { 4, s_4_6, -1, 3, 0}
};
static const symbol s_5_0[2] = { 'i', 'c' };
static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' };
static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' };
static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' };
static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' };
static const symbol s_5_5[3] = { 'a', 't', 'e' };
static const symbol s_5_6[3] = { 'i', 'v', 'e' };
static const symbol s_5_7[3] = { 'i', 'z', 'e' };
static const symbol s_5_8[3] = { 'i', 't', 'i' };
static const symbol s_5_9[2] = { 'a', 'l' };
static const symbol s_5_10[3] = { 'i', 's', 'm' };
static const symbol s_5_11[3] = { 'i', 'o', 'n' };
static const symbol s_5_12[2] = { 'e', 'r' };
static const symbol s_5_13[3] = { 'o', 'u', 's' };
static const symbol s_5_14[3] = { 'a', 'n', 't' };
static const symbol s_5_15[3] = { 'e', 'n', 't' };
static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' };
static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' };
static const symbol s_5_18[2] = { 'o', 'u' };
static const struct among a_5[19] =
{
/* 0 */ { 2, s_5_0, -1, 1, 0},
/* 1 */ { 4, s_5_1, -1, 1, 0},
/* 2 */ { 4, s_5_2, -1, 1, 0},
/* 3 */ { 4, s_5_3, -1, 1, 0},
/* 4 */ { 4, s_5_4, -1, 1, 0},
/* 5 */ { 3, s_5_5, -1, 1, 0},
/* 6 */ { 3, s_5_6, -1, 1, 0},
/* 7 */ { 3, s_5_7, -1, 1, 0},
/* 8 */ { 3, s_5_8, -1, 1, 0},
/* 9 */ { 2, s_5_9, -1, 1, 0},
/* 10 */ { 3, s_5_10, -1, 1, 0},
/* 11 */ { 3, s_5_11, -1, 2, 0},
/* 12 */ { 2, s_5_12, -1, 1, 0},
/* 13 */ { 3, s_5_13, -1, 1, 0},
/* 14 */ { 3, s_5_14, -1, 1, 0},
/* 15 */ { 3, s_5_15, -1, 1, 0},
/* 16 */ { 4, s_5_16, 15, 1, 0},
/* 17 */ { 5, s_5_17, 16, 1, 0},
/* 18 */ { 2, s_5_18, -1, 1, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1 };
static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
static const symbol s_0[] = { 's', 's' };
static const symbol s_1[] = { 'i' };
static const symbol s_2[] = { 'e', 'e' };
static const symbol s_3[] = { 'e' };
static const symbol s_4[] = { 'e' };
static const symbol s_5[] = { 'y' };
static const symbol s_6[] = { 'Y' };
static const symbol s_7[] = { 'i' };
static const symbol s_8[] = { 't', 'i', 'o', 'n' };
static const symbol s_9[] = { 'e', 'n', 'c', 'e' };
static const symbol s_10[] = { 'a', 'n', 'c', 'e' };
static const symbol s_11[] = { 'a', 'b', 'l', 'e' };
static const symbol s_12[] = { 'e', 'n', 't' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'i', 'z', 'e' };
static const symbol s_15[] = { 'a', 't', 'e' };
static const symbol s_16[] = { 'a', 'l' };
static const symbol s_17[] = { 'a', 'l' };
static const symbol s_18[] = { 'f', 'u', 'l' };
static const symbol s_19[] = { 'o', 'u', 's' };
static const symbol s_20[] = { 'i', 'v', 'e' };
static const symbol s_21[] = { 'b', 'l', 'e' };
static const symbol s_22[] = { 'a', 'l' };
static const symbol s_23[] = { 'i', 'c' };
static const symbol s_24[] = { 's' };
static const symbol s_25[] = { 't' };
static const symbol s_26[] = { 'e' };
static const symbol s_27[] = { 'l' };
static const symbol s_28[] = { 'l' };
static const symbol s_29[] = { 'y' };
static const symbol s_30[] = { 'Y' };
static const symbol s_31[] = { 'y' };
static const symbol s_32[] = { 'Y' };
static const symbol s_33[] = { 'Y' };
static const symbol s_34[] = { 'y' };
static int r_shortv(struct SN_env * z) {
if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) return 0;
if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0;
if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0;
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_Step_1a(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 25 */
if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0;
among_var = find_among_b(z, a_0, 4); /* substring, line 25 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 25 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 29 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_1b(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 34 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0;
among_var = find_among_b(z, a_2, 3); /* substring, line 34 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 34 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 35 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m_test = z->l - z->c; /* test, line 38 */
{ /* gopast */ /* grouping v, line 38 */
int ret = out_grouping_b_U(z, g_v, 97, 121, 1);
if (ret < 0) return 0;
z->c -= ret;
}
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 38 */
if (ret < 0) return ret;
}
{ int m_test = z->l - z->c; /* test, line 39 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else
among_var = find_among_b(z, a_1, 13); /* substring, line 39 */
if (!(among_var)) return 0;
z->c = z->l - m_test;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int c_keep = z->c;
int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */
z->c = c_keep;
if (ret < 0) return ret;
}
break;
case 2:
z->ket = z->c; /* [, line 44 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 44 */
}
z->bra = z->c; /* ], line 44 */
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 3:
if (z->c != z->I[0]) return 0; /* atmark, line 45 */
{ int m_test = z->l - z->c; /* test, line 45 */
{ int ret = r_shortv(z);
if (ret == 0) return 0; /* call shortv, line 45 */
if (ret < 0) return ret;
}
z->c = z->l - m_test;
}
{ int c_keep = z->c;
int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */
z->c = c_keep;
if (ret < 0) return ret;
}
break;
}
break;
}
return 1;
}
static int r_Step_1c(struct SN_env * z) {
z->ket = z->c; /* [, line 52 */
{ int m1 = z->l - z->c; (void)m1; /* or, line 52 */
if (!(eq_s_b(z, 1, s_5))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_6))) return 0;
}
lab0:
z->bra = z->c; /* ], line 52 */
{ /* gopast */ /* grouping v, line 53 */
int ret = out_grouping_b_U(z, g_v, 97, 121, 1);
if (ret < 0) return 0;
z->c -= ret;
}
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */
if (ret < 0) return ret;
}
return 1;
}
static int r_Step_2(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 58 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_3, 20); /* substring, line 58 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 58 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 58 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */
if (ret < 0) return ret;
}
break;
case 11:
{ int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */
if (ret < 0) return ret;
}
break;
case 12:
{ int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */
if (ret < 0) return ret;
}
break;
case 13:
{ int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */
if (ret < 0) return ret;
}
break;
case 14:
{ int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_3(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 82 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_4, 7); /* substring, line 82 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 82 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 82 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 87 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_4(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 92 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_5, 19); /* substring, line 92 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 92 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 92 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 95 */
if (ret < 0) return ret;
}
break;
case 2:
{ int m1 = z->l - z->c; (void)m1; /* or, line 96 */
if (!(eq_s_b(z, 1, s_24))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_25))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_5a(struct SN_env * z) {
z->ket = z->c; /* [, line 101 */
if (!(eq_s_b(z, 1, s_26))) return 0;
z->bra = z->c; /* ], line 101 */
{ int m1 = z->l - z->c; (void)m1; /* or, line 102 */
{ int ret = r_R2(z);
if (ret == 0) goto lab1; /* call R2, line 102 */
if (ret < 0) return ret;
}
goto lab0;
lab1:
z->c = z->l - m1;
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
}
{ int m2 = z->l - z->c; (void)m2; /* not, line 102 */
{ int ret = r_shortv(z);
if (ret == 0) goto lab2; /* call shortv, line 102 */
if (ret < 0) return ret;
}
return 0;
lab2:
z->c = z->l - m2;
}
}
lab0:
{ int ret = slice_del(z); /* delete, line 103 */
if (ret < 0) return ret;
}
return 1;
}
static int r_Step_5b(struct SN_env * z) {
z->ket = z->c; /* [, line 107 */
if (!(eq_s_b(z, 1, s_27))) return 0;
z->bra = z->c; /* ], line 107 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 108 */
if (ret < 0) return ret;
}
if (!(eq_s_b(z, 1, s_28))) return 0;
{ int ret = slice_del(z); /* delete, line 109 */
if (ret < 0) return ret;
}
return 1;
}
extern int porter_UTF_8_stem(struct SN_env * z) {
z->B[0] = 0; /* unset Y_found, line 115 */
{ int c1 = z->c; /* do, line 116 */
z->bra = z->c; /* [, line 116 */
if (!(eq_s(z, 1, s_29))) goto lab0;
z->ket = z->c; /* ], line 116 */
{ int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 116 */
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 117 */
while(1) { /* repeat, line 117 */
int c3 = z->c;
while(1) { /* goto, line 117 */
int c4 = z->c;
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3;
z->bra = z->c; /* [, line 117 */
if (!(eq_s(z, 1, s_31))) goto lab3;
z->ket = z->c; /* ], line 117 */
z->c = c4;
break;
lab3:
z->c = c4;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab2;
z->c = ret; /* goto, line 117 */
}
}
{ int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 117 */
continue;
lab2:
z->c = c3;
break;
}
z->c = c2;
}
z->I[0] = z->l;
z->I[1] = z->l;
{ int c5 = z->c; /* do, line 121 */
{ /* gopast */ /* grouping v, line 122 */
int ret = out_grouping_U(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
{ /* gopast */ /* non v, line 122 */
int ret = in_grouping_U(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 122 */
{ /* gopast */ /* grouping v, line 123 */
int ret = out_grouping_U(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
{ /* gopast */ /* non v, line 123 */
int ret = in_grouping_U(z, g_v, 97, 121, 1);
if (ret < 0) goto lab4;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 123 */
lab4:
z->c = c5;
}
z->lb = z->c; z->c = z->l; /* backwards, line 126 */
{ int m6 = z->l - z->c; (void)m6; /* do, line 127 */
{ int ret = r_Step_1a(z);
if (ret == 0) goto lab5; /* call Step_1a, line 127 */
if (ret < 0) return ret;
}
lab5:
z->c = z->l - m6;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 128 */
{ int ret = r_Step_1b(z);
if (ret == 0) goto lab6; /* call Step_1b, line 128 */
if (ret < 0) return ret;
}
lab6:
z->c = z->l - m7;
}
{ int m8 = z->l - z->c; (void)m8; /* do, line 129 */
{ int ret = r_Step_1c(z);
if (ret == 0) goto lab7; /* call Step_1c, line 129 */
if (ret < 0) return ret;
}
lab7:
z->c = z->l - m8;
}
{ int m9 = z->l - z->c; (void)m9; /* do, line 130 */
{ int ret = r_Step_2(z);
if (ret == 0) goto lab8; /* call Step_2, line 130 */
if (ret < 0) return ret;
}
lab8:
z->c = z->l - m9;
}
{ int m10 = z->l - z->c; (void)m10; /* do, line 131 */
{ int ret = r_Step_3(z);
if (ret == 0) goto lab9; /* call Step_3, line 131 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m10;
}
{ int m11 = z->l - z->c; (void)m11; /* do, line 132 */
{ int ret = r_Step_4(z);
if (ret == 0) goto lab10; /* call Step_4, line 132 */
if (ret < 0) return ret;
}
lab10:
z->c = z->l - m11;
}
{ int m12 = z->l - z->c; (void)m12; /* do, line 133 */
{ int ret = r_Step_5a(z);
if (ret == 0) goto lab11; /* call Step_5a, line 133 */
if (ret < 0) return ret;
}
lab11:
z->c = z->l - m12;
}
{ int m13 = z->l - z->c; (void)m13; /* do, line 134 */
{ int ret = r_Step_5b(z);
if (ret == 0) goto lab12; /* call Step_5b, line 134 */
if (ret < 0) return ret;
}
lab12:
z->c = z->l - m13;
}
z->c = z->lb;
{ int c14 = z->c; /* do, line 137 */
if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */
while(1) { /* repeat, line 137 */
int c15 = z->c;
while(1) { /* goto, line 137 */
int c16 = z->c;
z->bra = z->c; /* [, line 137 */
if (!(eq_s(z, 1, s_33))) goto lab15;
z->ket = z->c; /* ], line 137 */
z->c = c16;
break;
lab15:
z->c = c16;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab14;
z->c = ret; /* goto, line 137 */
}
}
{ int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */
if (ret < 0) return ret;
}
continue;
lab14:
z->c = c15;
break;
}
lab13:
z->c = c14;
}
return 1;
}
extern struct SN_env * porter_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); }
extern void porter_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,694 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int russian_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_tidy_up(struct SN_env * z);
static int r_derivational(struct SN_env * z);
static int r_noun(struct SN_env * z);
static int r_verb(struct SN_env * z);
static int r_reflexive(struct SN_env * z);
static int r_adjectival(struct SN_env * z);
static int r_adjective(struct SN_env * z);
static int r_perfective_gerund(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * russian_UTF_8_create_env(void);
extern void russian_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[10] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C };
static const symbol s_0_1[12] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C };
static const symbol s_0_2[12] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C };
static const symbol s_0_3[2] = { 0xD0, 0xB2 };
static const symbol s_0_4[4] = { 0xD1, 0x8B, 0xD0, 0xB2 };
static const symbol s_0_5[4] = { 0xD0, 0xB8, 0xD0, 0xB2 };
static const symbol s_0_6[6] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 };
static const symbol s_0_7[8] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 };
static const symbol s_0_8[8] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 };
static const struct among a_0[9] =
{
/* 0 */ { 10, s_0_0, -1, 1, 0},
/* 1 */ { 12, s_0_1, 0, 2, 0},
/* 2 */ { 12, s_0_2, 0, 2, 0},
/* 3 */ { 2, s_0_3, -1, 1, 0},
/* 4 */ { 4, s_0_4, 3, 2, 0},
/* 5 */ { 4, s_0_5, 3, 2, 0},
/* 6 */ { 6, s_0_6, -1, 1, 0},
/* 7 */ { 8, s_0_7, 6, 2, 0},
/* 8 */ { 8, s_0_8, 6, 2, 0}
};
static const symbol s_1_0[6] = { 0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83 };
static const symbol s_1_1[6] = { 0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83 };
static const symbol s_1_2[4] = { 0xD1, 0x8B, 0xD1, 0x85 };
static const symbol s_1_3[4] = { 0xD0, 0xB8, 0xD1, 0x85 };
static const symbol s_1_4[4] = { 0xD1, 0x83, 0xD1, 0x8E };
static const symbol s_1_5[4] = { 0xD1, 0x8E, 0xD1, 0x8E };
static const symbol s_1_6[4] = { 0xD0, 0xB5, 0xD1, 0x8E };
static const symbol s_1_7[4] = { 0xD0, 0xBE, 0xD1, 0x8E };
static const symbol s_1_8[4] = { 0xD1, 0x8F, 0xD1, 0x8F };
static const symbol s_1_9[4] = { 0xD0, 0xB0, 0xD1, 0x8F };
static const symbol s_1_10[4] = { 0xD1, 0x8B, 0xD0, 0xB5 };
static const symbol s_1_11[4] = { 0xD0, 0xB5, 0xD0, 0xB5 };
static const symbol s_1_12[4] = { 0xD0, 0xB8, 0xD0, 0xB5 };
static const symbol s_1_13[4] = { 0xD0, 0xBE, 0xD0, 0xB5 };
static const symbol s_1_14[6] = { 0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8 };
static const symbol s_1_15[6] = { 0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8 };
static const symbol s_1_16[4] = { 0xD1, 0x8B, 0xD0, 0xB9 };
static const symbol s_1_17[4] = { 0xD0, 0xB5, 0xD0, 0xB9 };
static const symbol s_1_18[4] = { 0xD0, 0xB8, 0xD0, 0xB9 };
static const symbol s_1_19[4] = { 0xD0, 0xBE, 0xD0, 0xB9 };
static const symbol s_1_20[4] = { 0xD1, 0x8B, 0xD0, 0xBC };
static const symbol s_1_21[4] = { 0xD0, 0xB5, 0xD0, 0xBC };
static const symbol s_1_22[4] = { 0xD0, 0xB8, 0xD0, 0xBC };
static const symbol s_1_23[4] = { 0xD0, 0xBE, 0xD0, 0xBC };
static const symbol s_1_24[6] = { 0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE };
static const symbol s_1_25[6] = { 0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE };
static const struct among a_1[26] =
{
/* 0 */ { 6, s_1_0, -1, 1, 0},
/* 1 */ { 6, s_1_1, -1, 1, 0},
/* 2 */ { 4, s_1_2, -1, 1, 0},
/* 3 */ { 4, s_1_3, -1, 1, 0},
/* 4 */ { 4, s_1_4, -1, 1, 0},
/* 5 */ { 4, s_1_5, -1, 1, 0},
/* 6 */ { 4, s_1_6, -1, 1, 0},
/* 7 */ { 4, s_1_7, -1, 1, 0},
/* 8 */ { 4, s_1_8, -1, 1, 0},
/* 9 */ { 4, s_1_9, -1, 1, 0},
/* 10 */ { 4, s_1_10, -1, 1, 0},
/* 11 */ { 4, s_1_11, -1, 1, 0},
/* 12 */ { 4, s_1_12, -1, 1, 0},
/* 13 */ { 4, s_1_13, -1, 1, 0},
/* 14 */ { 6, s_1_14, -1, 1, 0},
/* 15 */ { 6, s_1_15, -1, 1, 0},
/* 16 */ { 4, s_1_16, -1, 1, 0},
/* 17 */ { 4, s_1_17, -1, 1, 0},
/* 18 */ { 4, s_1_18, -1, 1, 0},
/* 19 */ { 4, s_1_19, -1, 1, 0},
/* 20 */ { 4, s_1_20, -1, 1, 0},
/* 21 */ { 4, s_1_21, -1, 1, 0},
/* 22 */ { 4, s_1_22, -1, 1, 0},
/* 23 */ { 4, s_1_23, -1, 1, 0},
/* 24 */ { 6, s_1_24, -1, 1, 0},
/* 25 */ { 6, s_1_25, -1, 1, 0}
};
static const symbol s_2_0[4] = { 0xD0, 0xB2, 0xD1, 0x88 };
static const symbol s_2_1[6] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88 };
static const symbol s_2_2[6] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88 };
static const symbol s_2_3[2] = { 0xD1, 0x89 };
static const symbol s_2_4[4] = { 0xD1, 0x8E, 0xD1, 0x89 };
static const symbol s_2_5[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89 };
static const symbol s_2_6[4] = { 0xD0, 0xB5, 0xD0, 0xBC };
static const symbol s_2_7[4] = { 0xD0, 0xBD, 0xD0, 0xBD };
static const struct among a_2[8] =
{
/* 0 */ { 4, s_2_0, -1, 1, 0},
/* 1 */ { 6, s_2_1, 0, 2, 0},
/* 2 */ { 6, s_2_2, 0, 2, 0},
/* 3 */ { 2, s_2_3, -1, 1, 0},
/* 4 */ { 4, s_2_4, 3, 1, 0},
/* 5 */ { 6, s_2_5, 4, 2, 0},
/* 6 */ { 4, s_2_6, -1, 1, 0},
/* 7 */ { 4, s_2_7, -1, 1, 0}
};
static const symbol s_3_0[4] = { 0xD1, 0x81, 0xD1, 0x8C };
static const symbol s_3_1[4] = { 0xD1, 0x81, 0xD1, 0x8F };
static const struct among a_3[2] =
{
/* 0 */ { 4, s_3_0, -1, 1, 0},
/* 1 */ { 4, s_3_1, -1, 1, 0}
};
static const symbol s_4_0[4] = { 0xD1, 0x8B, 0xD1, 0x82 };
static const symbol s_4_1[4] = { 0xD1, 0x8E, 0xD1, 0x82 };
static const symbol s_4_2[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82 };
static const symbol s_4_3[4] = { 0xD1, 0x8F, 0xD1, 0x82 };
static const symbol s_4_4[4] = { 0xD0, 0xB5, 0xD1, 0x82 };
static const symbol s_4_5[6] = { 0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82 };
static const symbol s_4_6[4] = { 0xD0, 0xB8, 0xD1, 0x82 };
static const symbol s_4_7[4] = { 0xD0, 0xBD, 0xD1, 0x8B };
static const symbol s_4_8[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B };
static const symbol s_4_9[4] = { 0xD1, 0x82, 0xD1, 0x8C };
static const symbol s_4_10[6] = { 0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C };
static const symbol s_4_11[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C };
static const symbol s_4_12[6] = { 0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C };
static const symbol s_4_13[6] = { 0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C };
static const symbol s_4_14[2] = { 0xD1, 0x8E };
static const symbol s_4_15[4] = { 0xD1, 0x83, 0xD1, 0x8E };
static const symbol s_4_16[4] = { 0xD0, 0xBB, 0xD0, 0xB0 };
static const symbol s_4_17[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0 };
static const symbol s_4_18[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0 };
static const symbol s_4_19[4] = { 0xD0, 0xBD, 0xD0, 0xB0 };
static const symbol s_4_20[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0 };
static const symbol s_4_21[6] = { 0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5 };
static const symbol s_4_22[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5 };
static const symbol s_4_23[6] = { 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 };
static const symbol s_4_24[8] = { 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 };
static const symbol s_4_25[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 };
static const symbol s_4_26[4] = { 0xD0, 0xBB, 0xD0, 0xB8 };
static const symbol s_4_27[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8 };
static const symbol s_4_28[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8 };
static const symbol s_4_29[2] = { 0xD0, 0xB9 };
static const symbol s_4_30[4] = { 0xD1, 0x83, 0xD0, 0xB9 };
static const symbol s_4_31[4] = { 0xD0, 0xB5, 0xD0, 0xB9 };
static const symbol s_4_32[2] = { 0xD0, 0xBB };
static const symbol s_4_33[4] = { 0xD1, 0x8B, 0xD0, 0xBB };
static const symbol s_4_34[4] = { 0xD0, 0xB8, 0xD0, 0xBB };
static const symbol s_4_35[4] = { 0xD1, 0x8B, 0xD0, 0xBC };
static const symbol s_4_36[4] = { 0xD0, 0xB5, 0xD0, 0xBC };
static const symbol s_4_37[4] = { 0xD0, 0xB8, 0xD0, 0xBC };
static const symbol s_4_38[2] = { 0xD0, 0xBD };
static const symbol s_4_39[4] = { 0xD0, 0xB5, 0xD0, 0xBD };
static const symbol s_4_40[4] = { 0xD0, 0xBB, 0xD0, 0xBE };
static const symbol s_4_41[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE };
static const symbol s_4_42[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE };
static const symbol s_4_43[4] = { 0xD0, 0xBD, 0xD0, 0xBE };
static const symbol s_4_44[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE };
static const symbol s_4_45[6] = { 0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE };
static const struct among a_4[46] =
{
/* 0 */ { 4, s_4_0, -1, 2, 0},
/* 1 */ { 4, s_4_1, -1, 1, 0},
/* 2 */ { 6, s_4_2, 1, 2, 0},
/* 3 */ { 4, s_4_3, -1, 2, 0},
/* 4 */ { 4, s_4_4, -1, 1, 0},
/* 5 */ { 6, s_4_5, 4, 2, 0},
/* 6 */ { 4, s_4_6, -1, 2, 0},
/* 7 */ { 4, s_4_7, -1, 1, 0},
/* 8 */ { 6, s_4_8, 7, 2, 0},
/* 9 */ { 4, s_4_9, -1, 1, 0},
/* 10 */ { 6, s_4_10, 9, 2, 0},
/* 11 */ { 6, s_4_11, 9, 2, 0},
/* 12 */ { 6, s_4_12, -1, 1, 0},
/* 13 */ { 6, s_4_13, -1, 2, 0},
/* 14 */ { 2, s_4_14, -1, 2, 0},
/* 15 */ { 4, s_4_15, 14, 2, 0},
/* 16 */ { 4, s_4_16, -1, 1, 0},
/* 17 */ { 6, s_4_17, 16, 2, 0},
/* 18 */ { 6, s_4_18, 16, 2, 0},
/* 19 */ { 4, s_4_19, -1, 1, 0},
/* 20 */ { 6, s_4_20, 19, 2, 0},
/* 21 */ { 6, s_4_21, -1, 1, 0},
/* 22 */ { 6, s_4_22, -1, 2, 0},
/* 23 */ { 6, s_4_23, -1, 1, 0},
/* 24 */ { 8, s_4_24, 23, 2, 0},
/* 25 */ { 8, s_4_25, 23, 2, 0},
/* 26 */ { 4, s_4_26, -1, 1, 0},
/* 27 */ { 6, s_4_27, 26, 2, 0},
/* 28 */ { 6, s_4_28, 26, 2, 0},
/* 29 */ { 2, s_4_29, -1, 1, 0},
/* 30 */ { 4, s_4_30, 29, 2, 0},
/* 31 */ { 4, s_4_31, 29, 2, 0},
/* 32 */ { 2, s_4_32, -1, 1, 0},
/* 33 */ { 4, s_4_33, 32, 2, 0},
/* 34 */ { 4, s_4_34, 32, 2, 0},
/* 35 */ { 4, s_4_35, -1, 2, 0},
/* 36 */ { 4, s_4_36, -1, 1, 0},
/* 37 */ { 4, s_4_37, -1, 2, 0},
/* 38 */ { 2, s_4_38, -1, 1, 0},
/* 39 */ { 4, s_4_39, 38, 2, 0},
/* 40 */ { 4, s_4_40, -1, 1, 0},
/* 41 */ { 6, s_4_41, 40, 2, 0},
/* 42 */ { 6, s_4_42, 40, 2, 0},
/* 43 */ { 4, s_4_43, -1, 1, 0},
/* 44 */ { 6, s_4_44, 43, 2, 0},
/* 45 */ { 6, s_4_45, 43, 1, 0}
};
static const symbol s_5_0[2] = { 0xD1, 0x83 };
static const symbol s_5_1[4] = { 0xD1, 0x8F, 0xD1, 0x85 };
static const symbol s_5_2[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85 };
static const symbol s_5_3[4] = { 0xD0, 0xB0, 0xD1, 0x85 };
static const symbol s_5_4[2] = { 0xD1, 0x8B };
static const symbol s_5_5[2] = { 0xD1, 0x8C };
static const symbol s_5_6[2] = { 0xD1, 0x8E };
static const symbol s_5_7[4] = { 0xD1, 0x8C, 0xD1, 0x8E };
static const symbol s_5_8[4] = { 0xD0, 0xB8, 0xD1, 0x8E };
static const symbol s_5_9[2] = { 0xD1, 0x8F };
static const symbol s_5_10[4] = { 0xD1, 0x8C, 0xD1, 0x8F };
static const symbol s_5_11[4] = { 0xD0, 0xB8, 0xD1, 0x8F };
static const symbol s_5_12[2] = { 0xD0, 0xB0 };
static const symbol s_5_13[4] = { 0xD0, 0xB5, 0xD0, 0xB2 };
static const symbol s_5_14[4] = { 0xD0, 0xBE, 0xD0, 0xB2 };
static const symbol s_5_15[2] = { 0xD0, 0xB5 };
static const symbol s_5_16[4] = { 0xD1, 0x8C, 0xD0, 0xB5 };
static const symbol s_5_17[4] = { 0xD0, 0xB8, 0xD0, 0xB5 };
static const symbol s_5_18[2] = { 0xD0, 0xB8 };
static const symbol s_5_19[4] = { 0xD0, 0xB5, 0xD0, 0xB8 };
static const symbol s_5_20[4] = { 0xD0, 0xB8, 0xD0, 0xB8 };
static const symbol s_5_21[6] = { 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 };
static const symbol s_5_22[8] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 };
static const symbol s_5_23[6] = { 0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8 };
static const symbol s_5_24[2] = { 0xD0, 0xB9 };
static const symbol s_5_25[4] = { 0xD0, 0xB5, 0xD0, 0xB9 };
static const symbol s_5_26[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9 };
static const symbol s_5_27[4] = { 0xD0, 0xB8, 0xD0, 0xB9 };
static const symbol s_5_28[4] = { 0xD0, 0xBE, 0xD0, 0xB9 };
static const symbol s_5_29[4] = { 0xD1, 0x8F, 0xD0, 0xBC };
static const symbol s_5_30[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC };
static const symbol s_5_31[4] = { 0xD0, 0xB0, 0xD0, 0xBC };
static const symbol s_5_32[4] = { 0xD0, 0xB5, 0xD0, 0xBC };
static const symbol s_5_33[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC };
static const symbol s_5_34[4] = { 0xD0, 0xBE, 0xD0, 0xBC };
static const symbol s_5_35[2] = { 0xD0, 0xBE };
static const struct among a_5[36] =
{
/* 0 */ { 2, s_5_0, -1, 1, 0},
/* 1 */ { 4, s_5_1, -1, 1, 0},
/* 2 */ { 6, s_5_2, 1, 1, 0},
/* 3 */ { 4, s_5_3, -1, 1, 0},
/* 4 */ { 2, s_5_4, -1, 1, 0},
/* 5 */ { 2, s_5_5, -1, 1, 0},
/* 6 */ { 2, s_5_6, -1, 1, 0},
/* 7 */ { 4, s_5_7, 6, 1, 0},
/* 8 */ { 4, s_5_8, 6, 1, 0},
/* 9 */ { 2, s_5_9, -1, 1, 0},
/* 10 */ { 4, s_5_10, 9, 1, 0},
/* 11 */ { 4, s_5_11, 9, 1, 0},
/* 12 */ { 2, s_5_12, -1, 1, 0},
/* 13 */ { 4, s_5_13, -1, 1, 0},
/* 14 */ { 4, s_5_14, -1, 1, 0},
/* 15 */ { 2, s_5_15, -1, 1, 0},
/* 16 */ { 4, s_5_16, 15, 1, 0},
/* 17 */ { 4, s_5_17, 15, 1, 0},
/* 18 */ { 2, s_5_18, -1, 1, 0},
/* 19 */ { 4, s_5_19, 18, 1, 0},
/* 20 */ { 4, s_5_20, 18, 1, 0},
/* 21 */ { 6, s_5_21, 18, 1, 0},
/* 22 */ { 8, s_5_22, 21, 1, 0},
/* 23 */ { 6, s_5_23, 18, 1, 0},
/* 24 */ { 2, s_5_24, -1, 1, 0},
/* 25 */ { 4, s_5_25, 24, 1, 0},
/* 26 */ { 6, s_5_26, 25, 1, 0},
/* 27 */ { 4, s_5_27, 24, 1, 0},
/* 28 */ { 4, s_5_28, 24, 1, 0},
/* 29 */ { 4, s_5_29, -1, 1, 0},
/* 30 */ { 6, s_5_30, 29, 1, 0},
/* 31 */ { 4, s_5_31, -1, 1, 0},
/* 32 */ { 4, s_5_32, -1, 1, 0},
/* 33 */ { 6, s_5_33, 32, 1, 0},
/* 34 */ { 4, s_5_34, -1, 1, 0},
/* 35 */ { 2, s_5_35, -1, 1, 0}
};
static const symbol s_6_0[6] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82 };
static const symbol s_6_1[8] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C };
static const struct among a_6[2] =
{
/* 0 */ { 6, s_6_0, -1, 1, 0},
/* 1 */ { 8, s_6_1, -1, 1, 0}
};
static const symbol s_7_0[6] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88 };
static const symbol s_7_1[2] = { 0xD1, 0x8C };
static const symbol s_7_2[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5 };
static const symbol s_7_3[2] = { 0xD0, 0xBD };
static const struct among a_7[4] =
{
/* 0 */ { 6, s_7_0, -1, 1, 0},
/* 1 */ { 2, s_7_1, -1, 3, 0},
/* 2 */ { 8, s_7_2, -1, 1, 0},
/* 3 */ { 2, s_7_3, -1, 2, 0}
};
static const unsigned char g_v[] = { 33, 65, 8, 232 };
static const symbol s_0[] = { 0xD0, 0xB0 };
static const symbol s_1[] = { 0xD1, 0x8F };
static const symbol s_2[] = { 0xD0, 0xB0 };
static const symbol s_3[] = { 0xD1, 0x8F };
static const symbol s_4[] = { 0xD0, 0xB0 };
static const symbol s_5[] = { 0xD1, 0x8F };
static const symbol s_6[] = { 0xD0, 0xBD };
static const symbol s_7[] = { 0xD0, 0xBD };
static const symbol s_8[] = { 0xD0, 0xBD };
static const symbol s_9[] = { 0xD0, 0xB8 };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c1 = z->c; /* do, line 61 */
{ /* gopast */ /* grouping v, line 62 */
int ret = out_grouping_U(z, g_v, 1072, 1103, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[0] = z->c; /* setmark pV, line 62 */
{ /* gopast */ /* non v, line 62 */
int ret = in_grouping_U(z, g_v, 1072, 1103, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
{ /* gopast */ /* grouping v, line 63 */
int ret = out_grouping_U(z, g_v, 1072, 1103, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
{ /* gopast */ /* non v, line 63 */
int ret = in_grouping_U(z, g_v, 1072, 1103, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 63 */
lab0:
z->c = c1;
}
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_perfective_gerund(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 72 */
among_var = find_among_b(z, a_0, 9); /* substring, line 72 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 72 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 76 */
if (!(eq_s_b(z, 2, s_0))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_1))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 76 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 83 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_adjective(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 88 */
among_var = find_among_b(z, a_1, 26); /* substring, line 88 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 88 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 97 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_adjectival(struct SN_env * z) {
int among_var;
{ int ret = r_adjective(z);
if (ret == 0) return 0; /* call adjective, line 102 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 109 */
z->ket = z->c; /* [, line 110 */
among_var = find_among_b(z, a_2, 8); /* substring, line 110 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab0; }
z->bra = z->c; /* ], line 110 */
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab0; }
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 115 */
if (!(eq_s_b(z, 2, s_2))) goto lab2;
goto lab1;
lab2:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_3))) { z->c = z->l - m_keep; goto lab0; }
}
lab1:
{ int ret = slice_del(z); /* delete, line 115 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 122 */
if (ret < 0) return ret;
}
break;
}
lab0:
;
}
return 1;
}
static int r_reflexive(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 129 */
if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 140 && z->p[z->c - 1] != 143)) return 0;
among_var = find_among_b(z, a_3, 2); /* substring, line 129 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 129 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 132 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_verb(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 137 */
among_var = find_among_b(z, a_4, 46); /* substring, line 137 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 137 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 143 */
if (!(eq_s_b(z, 2, s_4))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_5))) return 0;
}
lab0:
{ int ret = slice_del(z); /* delete, line 143 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 151 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_noun(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 160 */
among_var = find_among_b(z, a_5, 36); /* substring, line 160 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 160 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 167 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_derivational(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 176 */
if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 130 && z->p[z->c - 1] != 140)) return 0;
among_var = find_among_b(z, a_6, 2); /* substring, line 176 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 176 */
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 176 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 179 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_tidy_up(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 184 */
among_var = find_among_b(z, a_7, 4); /* substring, line 184 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 184 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 188 */
if (ret < 0) return ret;
}
z->ket = z->c; /* [, line 189 */
if (!(eq_s_b(z, 2, s_6))) return 0;
z->bra = z->c; /* ], line 189 */
if (!(eq_s_b(z, 2, s_7))) return 0;
{ int ret = slice_del(z); /* delete, line 189 */
if (ret < 0) return ret;
}
break;
case 2:
if (!(eq_s_b(z, 2, s_8))) return 0;
{ int ret = slice_del(z); /* delete, line 192 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 194 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
extern int russian_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 201 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 201 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 202 */
{ int mlimit; /* setlimit, line 202 */
int m2 = z->l - z->c; (void)m2;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 202 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m2;
{ int m3 = z->l - z->c; (void)m3; /* do, line 203 */
{ int m4 = z->l - z->c; (void)m4; /* or, line 204 */
{ int ret = r_perfective_gerund(z);
if (ret == 0) goto lab3; /* call perfective_gerund, line 204 */
if (ret < 0) return ret;
}
goto lab2;
lab3:
z->c = z->l - m4;
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 205 */
{ int ret = r_reflexive(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 205 */
if (ret < 0) return ret;
}
lab4:
;
}
{ int m5 = z->l - z->c; (void)m5; /* or, line 206 */
{ int ret = r_adjectival(z);
if (ret == 0) goto lab6; /* call adjectival, line 206 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = z->l - m5;
{ int ret = r_verb(z);
if (ret == 0) goto lab7; /* call verb, line 206 */
if (ret < 0) return ret;
}
goto lab5;
lab7:
z->c = z->l - m5;
{ int ret = r_noun(z);
if (ret == 0) goto lab1; /* call noun, line 206 */
if (ret < 0) return ret;
}
}
lab5:
;
}
lab2:
lab1:
z->c = z->l - m3;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 209 */
z->ket = z->c; /* [, line 209 */
if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m_keep; goto lab8; }
z->bra = z->c; /* ], line 209 */
{ int ret = slice_del(z); /* delete, line 209 */
if (ret < 0) return ret;
}
lab8:
;
}
{ int m6 = z->l - z->c; (void)m6; /* do, line 212 */
{ int ret = r_derivational(z);
if (ret == 0) goto lab9; /* call derivational, line 212 */
if (ret < 0) return ret;
}
lab9:
z->c = z->l - m6;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 213 */
{ int ret = r_tidy_up(z);
if (ret == 0) goto lab10; /* call tidy_up, line 213 */
if (ret < 0) return ret;
}
lab10:
z->c = z->l - m7;
}
z->lb = mlimit;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * russian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); }
extern void russian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,309 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
#ifdef __cplusplus
extern "C" {
#endif
extern int swedish_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_other_suffix(struct SN_env * z);
static int r_consonant_pair(struct SN_env * z);
static int r_main_suffix(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
#endif
extern struct SN_env * swedish_UTF_8_create_env(void);
extern void swedish_UTF_8_close_env(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static const symbol s_0_0[1] = { 'a' };
static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
static const symbol s_0_5[2] = { 'a', 'd' };
static const symbol s_0_6[1] = { 'e' };
static const symbol s_0_7[3] = { 'a', 'd', 'e' };
static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
static const symbol s_0_10[3] = { 'a', 'r', 'e' };
static const symbol s_0_11[4] = { 'a', 's', 't', 'e' };
static const symbol s_0_12[2] = { 'e', 'n' };
static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
static const symbol s_0_16[3] = { 'e', 'r', 'n' };
static const symbol s_0_17[2] = { 'a', 'r' };
static const symbol s_0_18[2] = { 'e', 'r' };
static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
static const symbol s_0_20[2] = { 'o', 'r' };
static const symbol s_0_21[1] = { 's' };
static const symbol s_0_22[2] = { 'a', 's' };
static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
static const symbol s_0_26[2] = { 'e', 's' };
static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
static const symbol s_0_29[3] = { 'e', 'n', 's' };
static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
static const symbol s_0_33[2] = { 'a', 't' };
static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
static const symbol s_0_35[3] = { 'h', 'e', 't' };
static const symbol s_0_36[3] = { 'a', 's', 't' };
static const struct among a_0[37] =
{
/* 0 */ { 1, s_0_0, -1, 1, 0},
/* 1 */ { 4, s_0_1, 0, 1, 0},
/* 2 */ { 4, s_0_2, 0, 1, 0},
/* 3 */ { 7, s_0_3, 2, 1, 0},
/* 4 */ { 4, s_0_4, 0, 1, 0},
/* 5 */ { 2, s_0_5, -1, 1, 0},
/* 6 */ { 1, s_0_6, -1, 1, 0},
/* 7 */ { 3, s_0_7, 6, 1, 0},
/* 8 */ { 4, s_0_8, 6, 1, 0},
/* 9 */ { 4, s_0_9, 6, 1, 0},
/* 10 */ { 3, s_0_10, 6, 1, 0},
/* 11 */ { 4, s_0_11, 6, 1, 0},
/* 12 */ { 2, s_0_12, -1, 1, 0},
/* 13 */ { 5, s_0_13, 12, 1, 0},
/* 14 */ { 4, s_0_14, 12, 1, 0},
/* 15 */ { 5, s_0_15, 12, 1, 0},
/* 16 */ { 3, s_0_16, -1, 1, 0},
/* 17 */ { 2, s_0_17, -1, 1, 0},
/* 18 */ { 2, s_0_18, -1, 1, 0},
/* 19 */ { 5, s_0_19, 18, 1, 0},
/* 20 */ { 2, s_0_20, -1, 1, 0},
/* 21 */ { 1, s_0_21, -1, 2, 0},
/* 22 */ { 2, s_0_22, 21, 1, 0},
/* 23 */ { 5, s_0_23, 22, 1, 0},
/* 24 */ { 5, s_0_24, 22, 1, 0},
/* 25 */ { 5, s_0_25, 22, 1, 0},
/* 26 */ { 2, s_0_26, 21, 1, 0},
/* 27 */ { 4, s_0_27, 26, 1, 0},
/* 28 */ { 5, s_0_28, 26, 1, 0},
/* 29 */ { 3, s_0_29, 21, 1, 0},
/* 30 */ { 5, s_0_30, 29, 1, 0},
/* 31 */ { 6, s_0_31, 29, 1, 0},
/* 32 */ { 4, s_0_32, 21, 1, 0},
/* 33 */ { 2, s_0_33, -1, 1, 0},
/* 34 */ { 5, s_0_34, -1, 1, 0},
/* 35 */ { 3, s_0_35, -1, 1, 0},
/* 36 */ { 3, s_0_36, -1, 1, 0}
};
static const symbol s_1_0[2] = { 'd', 'd' };
static const symbol s_1_1[2] = { 'g', 'd' };
static const symbol s_1_2[2] = { 'n', 'n' };
static const symbol s_1_3[2] = { 'd', 't' };
static const symbol s_1_4[2] = { 'g', 't' };
static const symbol s_1_5[2] = { 'k', 't' };
static const symbol s_1_6[2] = { 't', 't' };
static const struct among a_1[7] =
{
/* 0 */ { 2, s_1_0, -1, -1, 0},
/* 1 */ { 2, s_1_1, -1, -1, 0},
/* 2 */ { 2, s_1_2, -1, -1, 0},
/* 3 */ { 2, s_1_3, -1, -1, 0},
/* 4 */ { 2, s_1_4, -1, -1, 0},
/* 5 */ { 2, s_1_5, -1, -1, 0},
/* 6 */ { 2, s_1_6, -1, -1, 0}
};
static const symbol s_2_0[2] = { 'i', 'g' };
static const symbol s_2_1[3] = { 'l', 'i', 'g' };
static const symbol s_2_2[3] = { 'e', 'l', 's' };
static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' };
static const struct among a_2[5] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 3, s_2_1, 0, 1, 0},
/* 2 */ { 3, s_2_2, -1, 1, 0},
/* 3 */ { 5, s_2_3, -1, 3, 0},
/* 4 */ { 5, s_2_4, -1, 2, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
static const unsigned char g_s_ending[] = { 119, 127, 149 };
static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' };
static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c_test = z->c; /* test, line 29 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
if (ret < 0) return 0;
z->c = ret; /* hop, line 29 */
}
z->I[1] = z->c; /* setmark x, line 29 */
z->c = c_test;
}
if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */
{ /* gopast */ /* non v, line 30 */
int ret = in_grouping_U(z, g_v, 97, 246, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 30 */
/* try, line 31 */
if (!(z->I[0] < z->I[1])) goto lab0;
z->I[0] = z->I[1];
lab0:
return 1;
}
static int r_main_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 37 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 37 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 37 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 37 */
z->lb = mlimit;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_del(z); /* delete, line 44 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0;
{ int ret = slice_del(z); /* delete, line 46 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_consonant_pair(struct SN_env * z) {
{ int mlimit; /* setlimit, line 50 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 50 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* and, line 52 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */
z->c = z->l - m2;
z->ket = z->c; /* [, line 52 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) { z->lb = mlimit; return 0; }
z->c = ret; /* next, line 52 */
}
z->bra = z->c; /* ], line 52 */
{ int ret = slice_del(z); /* delete, line 52 */
if (ret < 0) return ret;
}
}
z->lb = mlimit;
}
return 1;
}
static int r_other_suffix(struct SN_env * z) {
int among_var;
{ int mlimit; /* setlimit, line 55 */
int m1 = z->l - z->c; (void)m1;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 55 */
mlimit = z->lb; z->lb = z->c;
z->c = z->l - m1;
z->ket = z->c; /* [, line 56 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
if (!(among_var)) { z->lb = mlimit; return 0; }
z->bra = z->c; /* ], line 56 */
switch(among_var) {
case 0: { z->lb = mlimit; return 0; }
case 1:
{ int ret = slice_del(z); /* delete, line 57 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
if (ret < 0) return ret;
}
break;
}
z->lb = mlimit;
}
return 1;
}
extern int swedish_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 66 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 66 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
z->lb = z->c; z->c = z->l; /* backwards, line 67 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 68 */
{ int ret = r_main_suffix(z);
if (ret == 0) goto lab1; /* call main_suffix, line 68 */
if (ret < 0) return ret;
}
lab1:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 69 */
{ int ret = r_consonant_pair(z);
if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
{ int m4 = z->l - z->c; (void)m4; /* do, line 70 */
{ int ret = r_other_suffix(z);
if (ret == 0) goto lab3; /* call other_suffix, line 70 */
if (ret < 0) return ret;
}
lab3:
z->c = z->l - m4;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); }
extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,473 @@
#include "header.h"
#define unless(C) if(!(C))
#define CREATE_SIZE 1
extern symbol * create_s(void) {
symbol * p;
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
if (mem == NULL) return NULL;
p = (symbol *) (HEAD + (char *) mem);
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}
extern void lose_s(symbol * p) {
if (p == NULL) return;
free((char *) p - HEAD);
}
/*
new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
position, or 0 on failure.
-- used to implement hop and next in the utf8 case.
*/
extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
int b;
if (n >= 0) {
for (; n > 0; n--) {
if (c >= l) return -1;
b = p[c++];
if (b >= 0xC0) { /* 1100 0000 */
while (c < l) {
b = p[c];
if (b >= 0xC0 || b < 0x80) break;
/* break unless b is 10------ */
c++;
}
}
}
} else {
for (; n < 0; n++) {
if (c <= lb) return -1;
b = p[--c];
if (b >= 0x80) { /* 1000 0000 */
while (c > lb) {
b = p[c];
if (b >= 0xC0) break; /* 1100 0000 */
c--;
}
}
}
}
return c;
}
/* Code for character groupings: utf8 cases */
static int get_utf8(const symbol * p, int c, int l, int * slot) {
int b0, b1;
if (c >= l) return 0;
b0 = p[c++];
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
* slot = b0; return 1;
}
b1 = p[c++];
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
}
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
int b0, b1;
if (c <= lb) return 0;
b0 = p[--c];
if (b0 < 0x80 || c == lb) { /* 1000 0000 */
* slot = b0; return 1;
}
b1 = p[--c];
if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
* slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
}
* slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
}
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_utf8(z->p, z->c, z->l, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c += w;
} while (repeat);
return 0;
}
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
unless (w) return -1;
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return w;
z->c -= w;
} while (repeat);
return 0;
}
/* Code for character groupings: non-utf8 cases */
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c >= z->l) return -1;
ch = z->p[z->c];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c++;
} while (repeat);
return 0;
}
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
do {
int ch;
if (z->c <= z->lb) return -1;
ch = z->p[z->c - 1];
unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 1;
z->c--;
} while (repeat);
return 0;
}
extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
z->c += s_size; return 1;
}
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
z->c -= s_size; return 1;
}
extern int eq_v(struct SN_env * z, const symbol * p) {
return eq_s(z, SIZE(p), p);
}
extern int eq_v_b(struct SN_env * z, const symbol * p) {
return eq_s_b(z, SIZE(p), p);
}
extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
int i = 0;
int j = v_size;
int c = z->c; int l = z->l;
symbol * q = z->p + c;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{
int i2; for (i2 = common; i2 < w->s_size; i2++) {
if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c + w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
/* find_among_b is for backwards processing. Same comments apply */
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
int i = 0;
int j = v_size;
int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;
const struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1) {
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{
int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i2];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1) {
w = v + i;
if (common_i >= w->s_size) {
z->c = c - w->s_size;
if (w->function == 0) return w->result;
{
int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
/* Increase the size of the buffer pointed to by p to at least n symbols.
* If insufficient memory, returns NULL and frees the old buffer.
*/
static symbol * increase_size(symbol * p, int n) {
symbol * q;
int new_size = n + 20;
void * mem = realloc((char *) p - HEAD,
HEAD + (new_size + 1) * sizeof(symbol));
if (mem == NULL) {
lose_s(p);
return NULL;
}
q = (symbol *) (HEAD + (char *)mem);
CAPACITY(q) = new_size;
return q;
}
/* to replace symbols between c_bra and c_ket in z->p by the
s_size symbols at s.
Returns 0 on success, -1 on error.
Also, frees z->p (and sets it to NULL) on error.
*/
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
{
int adjustment;
int len;
if (z->p == NULL) {
z->p = create_s();
if (z->p == NULL) return -1;
}
adjustment = s_size - (c_ket - c_bra);
len = SIZE(z->p);
if (adjustment != 0) {
if (adjustment + len > CAPACITY(z->p)) {
z->p = increase_size(z->p, adjustment + len);
if (z->p == NULL) return -1;
}
memmove(z->p + c_ket + adjustment,
z->p + c_ket,
(len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else
if (z->c > c_bra)
z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
if (adjptr != NULL)
*adjptr = adjustment;
return 0;
}
static int slice_check(struct SN_env * z) {
if (z->bra < 0 ||
z->bra > z->ket ||
z->ket > z->l ||
z->p == NULL ||
z->l > SIZE(z->p)) /* this line could be removed */
{
#if 0
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
#endif
return -1;
}
return 0;
}
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
if (slice_check(z)) return -1;
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
}
extern int slice_from_v(struct SN_env * z, const symbol * p) {
return slice_from_s(z, SIZE(p), p);
}
extern int slice_del(struct SN_env * z) {
return slice_from_s(z, 0, 0);
}
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
int adjustment;
if (replace_s(z, bra, ket, s_size, s, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
int adjustment;
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
return -1;
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
return 0;
}
extern symbol * slice_to(struct SN_env * z, symbol * p) {
if (slice_check(z)) {
lose_s(p);
return NULL;
}
{
int len = z->ket - z->bra;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
}
extern symbol * assign_to(struct SN_env * z, symbol * p) {
int len = z->l;
if (CAPACITY(p) < len) {
p = increase_size(p, len);
if (p == NULL)
return NULL;
}
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
}
#if 0
extern void debug(struct SN_env * z, int number, int line_count) {
int i;
int limit = SIZE(z->p);
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++) {
if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
#endif

View File

@ -0,0 +1,26 @@
-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.1 2007/08/21 01:11:16 tgl Exp $$
-- text search configuration for _CFGNAME_ language
CREATE TEXT SEARCH DICTIONARY _DICTNAME_
(TEMPLATE = snowball,
OPTION = 'Language=_DICTNAME__STOPWORDS_');
COMMENT ON TEXT SEARCH DICTIONARY _DICTNAME_ IS 'Snowball stemmer for _DICTNAME_ language';
CREATE TEXT SEARCH CONFIGURATION _CFGNAME_
(PARSER = default);
COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'Configuration for _CFGNAME_ language';
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
FOR email, url, host, sfloat, version, uri, file, float, int, uint
WITH simple;
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
FOR lhword, lpart_hword, lword
WITH _LATDICTNAME_;
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
FOR hword, nlhword, nlpart_hword, nlword, word, part_hword
WITH _NONLATDICTNAME_;

View File

@ -0,0 +1,18 @@
-- $PostgreSQL: pgsql/src/backend/snowball/snowball_func.sql.in,v 1.1 2007/08/21 01:11:16 tgl Exp $$
SET search_path = pg_catalog;
CREATE FUNCTION dsnowball_init(INTERNAL)
RETURNS INTERNAL AS '$libdir/dict_snowball', 'dsnowball_init'
LANGUAGE C STRICT;
CREATE FUNCTION dsnowball_lexize(INTERNAL, INTERNAL, INTERNAL, INTERNAL)
RETURNS INTERNAL AS '$libdir/dict_snowball', 'dsnowball_lexize'
LANGUAGE C STRICT;
CREATE TEXT SEARCH TEMPLATE snowball
(INIT = dsnowball_init,
LEXIZE = dsnowball_lexize);
COMMENT ON TEXT SEARCH TEMPLATE snowball IS 'Snowball stemmer';

View File

@ -0,0 +1,94 @@
og
i
jeg
det
at
en
den
til
er
som
de
med
han
af
for
ikke
der
var
mig
sig
men
et
har
om
vi
min
havde
ham
hun
nu
over
da
fra
du
ud
sin
dem
os
op
man
hans
hvor
eller
hvad
skal
selv
her
alle
vil
blev
kunne
ind
når
være
dog
noget
ville
jo
deres
efter
ned
skulle
denne
end
dette
mit
også
under
have
dig
anden
hende
mine
alt
meget
sit
sine
vor
mod
disse
hvis
din
nogle
hos
blive
mange
ad
bliver
hendes
været
thi
jer
sådan

View File

@ -0,0 +1,101 @@
de
en
van
ik
te
dat
die
in
een
hij
het
niet
zijn
is
was
op
aan
met
als
voor
had
er
maar
om
hem
dan
zou
of
wat
mijn
men
dit
zo
door
over
ze
zich
bij
ook
tot
je
mij
uit
der
daar
haar
naar
heb
hoe
heeft
hebben
deze
u
want
nog
zal
me
zij
nu
ge
geen
omdat
iets
worden
toch
al
waren
veel
meer
doen
toen
moet
ben
zonder
kan
hun
dus
alles
onder
ja
eens
hier
wie
werd
altijd
doch
wordt
wezen
kunnen
ons
zelf
tegen
na
reeds
wil
kon
niets
uw
iemand
geweest
andere

View File

@ -0,0 +1,128 @@
i
me
my
myself
we
our
ours
ourselves
you
your
yours
yourself
yourselves
he
him
his
himself
she
her
hers
herself
it
its
itself
they
them
their
theirs
themselves
what
which
who
whom
this
that
these
those
am
is
are
was
were
be
been
being
have
has
had
having
do
does
did
doing
a
an
the
and
but
if
or
because
as
until
while
of
at
by
for
with
about
against
between
into
through
during
before
after
above
below
to
from
up
down
in
out
on
off
over
under
again
further
then
once
here
there
when
where
why
how
all
any
both
each
few
more
most
other
some
such
no
nor
not
only
own
same
so
than
too
very
s
t
can
will
just
don
should
now

View File

@ -0,0 +1,235 @@
olla
olen
olet
on
olemme
olette
ovat
ole
oli
olisi
olisit
olisin
olisimme
olisitte
olisivat
olit
olin
olimme
olitte
olivat
ollut
olleet
en
et
ei
emme
ette
eivät
minä
minun
minut
minua
minussa
minusta
minuun
minulla
minulta
minulle
sinä
sinun
sinut
sinua
sinussa
sinusta
sinuun
sinulla
sinulta
sinulle
hän
hänen
hänet
häntä
hänessä
hänestä
häneen
hänellä
häneltä
hänelle
me
meidän
meidät
meitä
meissä
meistä
meihin
meillä
meiltä
meille
te
teidän
teidät
teitä
teissä
teistä
teihin
teillä
teiltä
teille
he
heidän
heidät
heitä
heissä
heistä
heihin
heillä
heiltä
heille
tämä
tämän
tätä
tässä
tästä
tähän
tallä
tältä
tälle
tänä
täksi
tuo
tuon
tuotä
tuossa
tuosta
tuohon
tuolla
tuolta
tuolle
tuona
tuoksi
se
sen
sitä
siinä
siitä
siihen
sillä
siltä
sille
sinä
siksi
nämä
näiden
näitä
näissä
näistä
näihin
näillä
näiltä
näille
näinä
näiksi
nuo
noiden
noita
noissa
noista
noihin
noilla
noilta
noille
noina
noiksi
ne
niiden
niitä
niissä
niistä
niihin
niillä
niiltä
niille
niinä
niiksi
kuka
kenen
kenet
ketä
kenessä
kenestä
keneen
kenellä
keneltä
kenelle
kenenä
keneksi
ketkä
keiden
ketkä
keitä
keissä
keistä
keihin
keillä
keiltä
keille
keinä
keiksi
mikä
minkä
minkä
mitä
missä
mistä
mihin
millä
miltä
mille
minä
miksi
mitkä
joka
jonka
jota
jossa
josta
johon
jolla
jolta
jolle
jona
joksi
jotka
joiden
joita
joissa
joista
joihin
joilla
joilta
joille
joina
joiksi
että
ja
jos
koska
kuin
mutta
niin
sekä
sillä
tai
vaan
vai
vaikka
kanssa
mukaan
noin
poikki
yli
kun
niin
nyt
itse

View File

@ -0,0 +1,155 @@
au
aux
avec
ce
ces
dans
de
des
du
elle
en
et
eux
il
je
la
le
leur
lui
ma
mais
me
même
mes
moi
mon
ne
nos
notre
nous
on
ou
par
pas
pour
qu
que
qui
sa
se
ses
son
sur
ta
te
tes
toi
ton
tu
un
une
vos
votre
vous
c
d
j
l
à
m
n
s
t
y
été
étée
étées
étés
étant
étante
étants
étantes
suis
es
est
sommes
êtes
sont
serai
seras
sera
serons
serez
seront
serais
serait
serions
seriez
seraient
étais
était
étions
étiez
étaient
fus
fut
fûmes
fûtes
furent
sois
soit
soyons
soyez
soient
fusse
fusses
fût
fussions
fussiez
fussent
ayant
ayante
ayantes
ayants
eu
eue
eues
eus
ai
as
avons
avez
ont
aurai
auras
aura
aurons
aurez
auront
aurais
aurait
aurions
auriez
auraient
avais
avait
avions
aviez
avaient
eut
eûmes
eûtes
eurent
aie
aies
ait
ayons
ayez
aient
eusse
eusses
eût
eussions
eussiez
eussent

View File

@ -0,0 +1,231 @@
aber
alle
allem
allen
aller
alles
als
also
am
an
ander
andere
anderem
anderen
anderer
anderes
anderm
andern
anderr
anders
auch
auf
aus
bei
bin
bis
bist
da
damit
dann
der
den
des
dem
die
das
daß
derselbe
derselben
denselben
desselben
demselben
dieselbe
dieselben
dasselbe
dazu
dein
deine
deinem
deinen
deiner
deines
denn
derer
dessen
dich
dir
du
dies
diese
diesem
diesen
dieser
dieses
doch
dort
durch
ein
eine
einem
einen
einer
eines
einig
einige
einigem
einigen
einiger
einiges
einmal
er
ihn
ihm
es
etwas
euer
eure
eurem
euren
eurer
eures
für
gegen
gewesen
hab
habe
haben
hat
hatte
hatten
hier
hin
hinter
ich
mich
mir
ihr
ihre
ihrem
ihren
ihrer
ihres
euch
im
in
indem
ins
ist
jede
jedem
jeden
jeder
jedes
jene
jenem
jenen
jener
jenes
jetzt
kann
kein
keine
keinem
keinen
keiner
keines
können
könnte
machen
man
manche
manchem
manchen
mancher
manches
mein
meine
meinem
meinen
meiner
meines
mit
muss
musste
nach
nicht
nichts
noch
nun
nur
ob
oder
ohne
sehr
sein
seine
seinem
seinen
seiner
seines
selbst
sich
sie
ihnen
sind
so
solche
solchem
solchen
solcher
solches
soll
sollte
sondern
sonst
über
um
und
uns
unse
unsem
unsen
unser
unses
unter
viel
vom
von
vor
während
war
waren
warst
was
weg
weil
weiter
welche
welchem
welchen
welcher
welches
wenn
werde
werden
wie
wieder
will
wir
wird
wirst
wo
wollen
wollte
würde
würden
zu
zum
zur
zwar
zwischen

View File

@ -0,0 +1,199 @@
a
ahogy
ahol
aki
akik
akkor
alatt
által
általában
amely
amelyek
amelyekben
amelyeket
amelyet
amelynek
ami
amit
amolyan
amíg
amikor
át
abban
ahhoz
annak
arra
arról
az
azok
azon
azt
azzal
azért
aztán
azután
azonban
bár
be
belül
benne
cikk
cikkek
cikkeket
csak
de
e
eddig
egész
egy
egyes
egyetlen
egyéb
egyik
egyre
ekkor
el
elég
ellen
elõ
elõször
elõtt
elsõ
én
éppen
ebben
ehhez
emilyen
ennek
erre
ez
ezt
ezek
ezen
ezzel
ezért
és
fel
felé
hanem
hiszen
hogy
hogyan
igen
így
illetve
ill.
ill
ilyen
ilyenkor
ison
ismét
itt
jól
jobban
kell
kellett
keresztül
keressünk
ki
kívül
között
közül
legalább
lehet
lehetett
legyen
lenne
lenni
lesz
lett
maga
magát
majd
majd
már
más
másik
meg
még
mellett
mert
mely
melyek
mi
mit
míg
miért
milyen
mikor
minden
mindent
mindenki
mindig
mint
mintha
mivel
most
nagy
nagyobb
nagyon
ne
néha
nekem
neki
nem
néhány
nélkül
nincs
olyan
ott
össze
õ
õk
õket
pedig
persze
s
saját
sem
semmi
sok
sokat
sokkal
számára
szemben
szerint
szinte
talán
tehát
teljes
tovább
továbbá
több
úgy
ugyanis
új
újabb
újra
után
utána
utolsó
vagy
vagyis
valaki
valami
valamint
való
vagyok
van
vannak
volt
voltam
voltak
voltunk
vissza
vele
viszont
volna

View File

@ -0,0 +1,279 @@
ad
al
allo
ai
agli
all
agl
alla
alle
con
col
coi
da
dal
dallo
dai
dagli
dall
dagl
dalla
dalle
di
del
dello
dei
degli
dell
degl
della
delle
in
nel
nello
nei
negli
nell
negl
nella
nelle
su
sul
sullo
sui
sugli
sull
sugl
sulla
sulle
per
tra
contro
io
tu
lui
lei
noi
voi
loro
mio
mia
miei
mie
tuo
tua
tuoi
tue
suo
sua
suoi
sue
nostro
nostra
nostri
nostre
vostro
vostra
vostri
vostre
mi
ti
ci
vi
lo
la
li
le
gli
ne
il
un
uno
una
ma
ed
se
perché
anche
come
dov
dove
che
chi
cui
non
più
quale
quanto
quanti
quanta
quante
quello
quelli
quella
quelle
questo
questi
questa
queste
si
tutto
tutti
a
c
e
i
l
o
ho
hai
ha
abbiamo
avete
hanno
abbia
abbiate
abbiano
avrò
avrai
avrà
avremo
avrete
avranno
avrei
avresti
avrebbe
avremmo
avreste
avrebbero
avevo
avevi
aveva
avevamo
avevate
avevano
ebbi
avesti
ebbe
avemmo
aveste
ebbero
avessi
avesse
avessimo
avessero
avendo
avuto
avuta
avuti
avute
sono
sei
è
siamo
siete
sia
siate
siano
sarò
sarai
sarà
saremo
sarete
saranno
sarei
saresti
sarebbe
saremmo
sareste
sarebbero
ero
eri
era
eravamo
eravate
erano
fui
fosti
fu
fummo
foste
furono
fossi
fosse
fossimo
fossero
essendo
faccio
fai
facciamo
fanno
faccia
facciate
facciano
farò
farai
farà
faremo
farete
faranno
farei
faresti
farebbe
faremmo
fareste
farebbero
facevo
facevi
faceva
facevamo
facevate
facevano
feci
facesti
fece
facemmo
faceste
fecero
facessi
facesse
facessimo
facessero
facendo
sto
stai
sta
stiamo
stanno
stia
stiate
stiano
starò
starai
starà
staremo
starete
staranno
starei
staresti
starebbe
staremmo
stareste
starebbero
stavo
stavi
stava
stavamo
stavate
stavano
stetti
stesti
stette
stemmo
steste
stettero
stessi
stesse
stessimo
stessero
stando

View File

@ -0,0 +1,176 @@
og
i
jeg
det
at
en
et
den
til
er
som
de
med
han
av
ikke
ikkje
der
var
meg
seg
men
ett
har
om
vi
min
mitt
ha
hadde
hun
over
da
ved
fra
du
ut
sin
dem
oss
opp
man
kan
hans
hvor
eller
hva
skal
selv
sjøl
her
alle
vil
bli
ble
blei
blitt
kunne
inn
når
være
kom
noen
noe
ville
dere
som
deres
kun
ja
etter
ned
skulle
denne
for
deg
si
sine
sitt
mot
å
meget
hvorfor
dette
disse
uten
hvordan
ingen
din
ditt
blir
samme
hvilken
hvilke
sånn
inni
mellom
vår
hver
hvem
vors
hvis
både
bare
enn
fordi
før
mange
også
slik
vært
være
båe
begge
siden
dykk
dykkar
dei
deira
deires
deim
di
eg
ein
eit
eitt
elles
honom
hjå
ho
hoe
henne
hennar
hennes
hoss
hossen
ikkje
ingi
inkje
korleis
korso
kva
kvar
kvarhelst
kven
kvi
kvifor
me
medan
mi
mine
mykje
no
nokon
noka
nokor
noko
nokre
si
sia
sidan
so
somt
somme
um
upp
vere
vore
verte
vort
varte
vart

View File

@ -0,0 +1,203 @@
de
a
o
que
e
do
da
em
um
para
com
não
uma
os
no
se
na
por
mais
as
dos
como
mas
ao
ele
das
à
seu
sua
ou
quando
muito
nos
eu
também
pelo
pela
até
isso
ela
entre
depois
sem
mesmo
aos
seus
quem
nas
me
esse
eles
você
essa
num
nem
suas
meu
às
minha
numa
pelos
elas
qual
nós
lhe
deles
essas
esses
pelas
este
dele
tu
te
vocês
vos
lhes
meus
minhas
teu
tua
teus
tuas
nosso
nossa
nossos
nossas
dela
delas
esta
estes
estas
aquele
aquela
aqueles
aquelas
isto
aquilo
estou
está
estamos
estão
estive
esteve
estivemos
estiveram
estava
estávamos
estavam
estivera
estivéramos
esteja
estejamos
estejam
estivesse
estivéssemos
estivessem
estiver
estivermos
estiverem
hei
havemos
hão
houve
houvemos
houveram
houvera
houvéramos
haja
hajamos
hajam
houvesse
houvéssemos
houvessem
houver
houvermos
houverem
houverei
houverá
houveremos
houverão
houveria
houveríamos
houveriam
sou
somos
são
era
éramos
eram
fui
foi
fomos
foram
fora
fôramos
seja
sejamos
sejam
fosse
fôssemos
fossem
for
formos
forem
serei
será
seremos
serão
seria
seríamos
seriam
tenho
tem
temos
tém
tinha
tínhamos
tinham
tive
teve
tivemos
tiveram
tivera
tivéramos
tenha
tenhamos
tenham
tivesse
tivéssemos
tivessem
tiver
tivermos
tiverem
terei
terá
teremos
terão
teria
teríamos
teriam

View File

@ -0,0 +1,151 @@
и
в
во
не
что
он
на
я
с
со
как
а
то
все
она
так
его
но
да
ты
к
у
же
вы
за
бы
по
только
ее
мне
было
вот
от
меня
еще
нет
о
из
ему
теперь
когда
даже
ну
вдруг
ли
если
уже
или
ни
быть
был
него
до
вас
нибудь
опять
уж
вам
ведь
там
потом
себя
ничего
ей
может
они
тут
где
есть
надо
ней
для
мы
тебя
их
чем
была
сам
чтоб
без
будто
чего
раз
тоже
себе
под
будет
ж
тогда
кто
этот
того
потому
этого
какой
совсем
ним
здесь
этом
один
почти
мой
тем
чтобы
нее
сейчас
были
куда
зачем
всех
никогда
можно
при
наконец
два
об
другой
хоть
после
над
больше
тот
через
эти
нас
про
всего
них
какая
много
разве
три
эту
моя
впрочем
хорошо
свою
этой
перед
иногда
лучше
чуть
том
нельзя
такой
им
более
всегда
конечно
всю
между

View File

@ -0,0 +1,313 @@
de
la
que
el
en
y
a
los
del
se
las
por
un
para
con
no
una
su
al
lo
como
más
pero
sus
le
ya
o
este
porque
esta
entre
cuando
muy
sin
sobre
también
me
hasta
hay
donde
quien
desde
todo
nos
durante
todos
uno
les
ni
contra
otros
ese
eso
ante
ellos
e
esto
antes
algunos
qué
unos
yo
otro
otras
otra
él
tanto
esa
estos
mucho
quienes
nada
muchos
cual
poco
ella
estar
estas
algunas
algo
nosotros
mi
mis
te
ti
tu
tus
ellas
nosotras
vosostros
vosostras
os
mío
mía
míos
mías
tuyo
tuya
tuyos
tuyas
suyo
suya
suyos
suyas
nuestro
nuestra
nuestros
nuestras
vuestro
vuestra
vuestros
vuestras
esos
esas
estoy
estás
está
estamos
estáis
están
esté
estés
estemos
estéis
estén
estaré
estarás
estará
estaremos
estaréis
estarán
estaría
estarías
estaríamos
estaríais
estarían
estaba
estabas
estábamos
estabais
estaban
estuve
estuviste
estuvo
estuvimos
estuvisteis
estuvieron
estuviera
estuvieras
estuviéramos
estuvierais
estuvieran
estuviese
estuvieses
estuviésemos
estuvieseis
estuviesen
estando
estado
estada
estados
estadas
estad
he
has
ha
hemos
habéis
han
haya
hayas
hayamos
hayáis
hayan
habré
habrás
habrá
habremos
habréis
habrán
habría
habrías
habríamos
habríais
habrían
había
habías
habíamos
habíais
habían
hube
hubiste
hubo
hubimos
hubisteis
hubieron
hubiera
hubieras
hubiéramos
hubierais
hubieran
hubiese
hubieses
hubiésemos
hubieseis
hubiesen
habiendo
habido
habida
habidos
habidas
soy
eres
es
somos
sois
son
sea
seas
seamos
seáis
sean
seré
serás
será
seremos
seréis
serán
sería
serías
seríamos
seríais
serían
era
eras
éramos
erais
eran
fui
fuiste
fue
fuimos
fuisteis
fueron
fuera
fueras
fuéramos
fuerais
fueran
fuese
fueses
fuésemos
fueseis
fuesen
sintiendo
sentido
sentida
sentidos
sentidas
siente
sentid
tengo
tienes
tiene
tenemos
tenéis
tienen
tenga
tengas
tengamos
tengáis
tengan
tendré
tendrás
tendrá
tendremos
tendréis
tendrán
tendría
tendrías
tendríamos
tendríais
tendrían
tenía
tenías
teníamos
teníais
tenían
tuve
tuviste
tuvo
tuvimos
tuvisteis
tuvieron
tuviera
tuvieras
tuviéramos
tuvierais
tuvieran
tuviese
tuvieses
tuviésemos
tuvieseis
tuviesen
teniendo
tenido
tenida
tenidos
tenidas
tened

View File

@ -0,0 +1,114 @@
och
det
att
i
en
jag
hon
som
han
den
med
var
sig
för
till
är
men
ett
om
hade
de
av
icke
mig
du
henne
sin
nu
har
inte
hans
honom
skulle
hennes
där
min
man
ej
vid
kunde
något
från
ut
när
efter
upp
vi
dem
vara
vad
över
än
dig
kan
sina
här
ha
mot
alla
under
någon
eller
allt
mycket
sedan
ju
denna
själv
detta
åt
utan
varit
hur
ingen
mitt
ni
bli
blev
oss
din
dessa
några
deras
blir
mina
samma
vilken
er
sådan
vår
blivit
dess
inom
mellan
sådant
varför
varje
vilka
ditt
vem
vilket
sitta
sådana
vart
dina
vars
vårt
våra
ert
era
vilkas

View File

@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.284 2007/07/17 05:02:02 neilc Exp $
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.285 2007/08/21 01:11:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -355,6 +355,8 @@ check_xact_readonly(Node *parsetree)
case T_TruncateStmt:
case T_DropOwnedStmt:
case T_ReassignOwnedStmt:
case T_AlterTSDictionaryStmt:
case T_AlterTSConfigurationStmt:
ereport(ERROR,
(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
errmsg("transaction is read-only")));
@ -661,6 +663,26 @@ ProcessUtility(Node *parsetree,
stmt->missing_ok);
break;
case OBJECT_TSPARSER:
RemoveTSParser(names, stmt->behavior,
stmt->missing_ok);
break;
case OBJECT_TSDICTIONARY:
RemoveTSDictionary(names, stmt->behavior,
stmt->missing_ok);
break;
case OBJECT_TSTEMPLATE:
RemoveTSTemplate(names, stmt->behavior,
stmt->missing_ok);
break;
case OBJECT_TSCONFIGURATION:
RemoveTSConfiguration(names, stmt->behavior,
stmt->missing_ok);
break;
default:
elog(ERROR, "unrecognized drop object type: %d",
(int) stmt->removeType);
@ -832,6 +854,22 @@ ProcessUtility(Node *parsetree,
Assert(stmt->args == NIL);
DefineType(stmt->defnames, stmt->definition);
break;
case OBJECT_TSPARSER:
Assert(stmt->args == NIL);
DefineTSParser(stmt->defnames, stmt->definition);
break;
case OBJECT_TSDICTIONARY:
Assert(stmt->args == NIL);
DefineTSDictionary(stmt->defnames, stmt->definition);
break;
case OBJECT_TSTEMPLATE:
Assert(stmt->args == NIL);
DefineTSTemplate(stmt->defnames, stmt->definition);
break;
case OBJECT_TSCONFIGURATION:
Assert(stmt->args == NIL);
DefineTSConfiguration(stmt->defnames, stmt->definition);
break;
default:
elog(ERROR, "unrecognized define stmt type: %d",
(int) stmt->kind);
@ -1221,6 +1259,14 @@ ProcessUtility(Node *parsetree,
RemoveOpFamily((RemoveOpFamilyStmt *) parsetree);
break;
case T_AlterTSDictionaryStmt:
AlterTSDictionary((AlterTSDictionaryStmt *) parsetree);
break;
case T_AlterTSConfigurationStmt:
AlterTSConfiguration((AlterTSConfigurationStmt *) parsetree);
break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(parsetree));
@ -1525,6 +1571,18 @@ CreateCommandTag(Node *parsetree)
case OBJECT_SCHEMA:
tag = "DROP SCHEMA";
break;
case OBJECT_TSPARSER:
tag = "DROP TEXT SEARCH PARSER";
break;
case OBJECT_TSDICTIONARY:
tag = "DROP TEXT SEARCH DICTIONARY";
break;
case OBJECT_TSTEMPLATE:
tag = "DROP TEXT SEARCH TEMPLATE";
break;
case OBJECT_TSCONFIGURATION:
tag = "DROP TEXT SEARCH CONFIGURATION";
break;
default:
tag = "???";
}
@ -1591,6 +1649,18 @@ CreateCommandTag(Node *parsetree)
case OBJECT_VIEW:
tag = "ALTER VIEW";
break;
case OBJECT_TSPARSER:
tag = "ALTER TEXT SEARCH PARSER";
break;
case OBJECT_TSDICTIONARY:
tag = "ALTER TEXT SEARCH DICTIONARY";
break;
case OBJECT_TSTEMPLATE:
tag = "ALTER TEXT SEARCH TEMPLATE";
break;
case OBJECT_TSCONFIGURATION:
tag = "ALTER TEXT SEARCH CONFIGURATION";
break;
default:
tag = "???";
break;
@ -1618,6 +1688,18 @@ CreateCommandTag(Node *parsetree)
case OBJECT_TYPE:
tag = "ALTER TYPE";
break;
case OBJECT_TSPARSER:
tag = "ALTER TEXT SEARCH PARSER";
break;
case OBJECT_TSDICTIONARY:
tag = "ALTER TEXT SEARCH DICTIONARY";
break;
case OBJECT_TSTEMPLATE:
tag = "ALTER TEXT SEARCH TEMPLATE";
break;
case OBJECT_TSCONFIGURATION:
tag = "ALTER TEXT SEARCH CONFIGURATION";
break;
default:
tag = "???";
break;
@ -1663,6 +1745,12 @@ CreateCommandTag(Node *parsetree)
case OBJECT_TYPE:
tag = "ALTER TYPE";
break;
case OBJECT_TSCONFIGURATION:
tag = "ALTER TEXT SEARCH CONFIGURATION";
break;
case OBJECT_TSDICTIONARY:
tag = "ALTER TEXT SEARCH DICTIONARY";
break;
default:
tag = "???";
break;
@ -1722,6 +1810,18 @@ CreateCommandTag(Node *parsetree)
case OBJECT_TYPE:
tag = "CREATE TYPE";
break;
case OBJECT_TSPARSER:
tag = "CREATE TEXT SEARCH PARSER";
break;
case OBJECT_TSDICTIONARY:
tag = "CREATE TEXT SEARCH DICTIONARY";
break;
case OBJECT_TSTEMPLATE:
tag = "CREATE TEXT SEARCH TEMPLATE";
break;
case OBJECT_TSCONFIGURATION:
tag = "CREATE TEXT SEARCH CONFIGURATION";
break;
default:
tag = "???";
}
@ -1949,6 +2049,14 @@ CreateCommandTag(Node *parsetree)
tag = "DROP OPERATOR FAMILY";
break;
case T_AlterTSDictionaryStmt:
tag = "ALTER TEXT SEARCH DICTIONARY";
break;
case T_AlterTSConfigurationStmt:
tag = "ALTER TEXT SEARCH CONFIGURATION";
break;
case T_PrepareStmt:
tag = "PREPARE";
break;
@ -2386,6 +2494,14 @@ GetCommandLogLevel(Node *parsetree)
lev = LOGSTMT_DDL;
break;
case T_AlterTSDictionaryStmt:
lev = LOGSTMT_DDL;
break;
case T_AlterTSConfigurationStmt:
lev = LOGSTMT_DDL;
break;
case T_PrepareStmt:
{
PrepareStmt *stmt = (PrepareStmt *) parsetree;

View File

@ -0,0 +1,51 @@
#-------------------------------------------------------------------------
#
# Makefile for backend/tsearch
#
# Copyright (c) 2006-2007, PostgreSQL Global Development Group
#
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.1 2007/08/21 01:11:18 tgl Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/tsearch
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
DICTDIR=tsearch_data
DICTFILES=synonym.syn.sample thesaurus.ths.sample
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
dict_ispell.o regis.o spell.o \
to_tsany.o ts_utils.o
all: SUBSYS.o
SUBSYS.o: $(OBJS)
$(LD) $(LDREL) $(LDOUT) SUBSYS.o $^
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
.PHONY: install-data
install-data: $(DICTFILES) installdirs
for i in $(DICTFILES); \
do $(INSTALL_DATA) $$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
done
installdirs:
$(mkinstalldirs) '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)'
.PHONY: uninstall-data
uninstall-data:
for i in $(DICTFILES); \
do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
done
clean distclean maintainer-clean:
rm -f SUBSYS.o $(OBJS)
ifeq (depend,$(wildcard depend))
include depend
endif

131
src/backend/tsearch/dict.c Normal file
View File

@ -0,0 +1,131 @@
/*-------------------------------------------------------------------------
*
* dict.c
* Standard interface to dictionary
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/skey.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/*
* Lexize one word by dictionary, mostly debug function
*/
static ArrayType *
ts_lexize_workhorse(Oid dictId, text *in)
{
TSDictionaryCacheEntry *dict;
TSLexeme *res,
*ptr;
Datum *da;
ArrayType *a;
DictSubState dstate = {false, false, NULL};
dict = lookup_ts_dictionary_cache(dictId);
res = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
PointerGetDatum(dict->dictData),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
PointerGetDatum(&dstate)));
if (dstate.getnext)
{
dstate.isend = true;
ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
PointerGetDatum(dict->dictData),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
PointerGetDatum(&dstate)));
if (ptr != NULL)
res = ptr;
}
if (!res)
return NULL;
ptr = res;
while (ptr->lexeme)
ptr++;
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
ptr = res;
while (ptr->lexeme)
{
da[ptr - res] = DirectFunctionCall1(textin, CStringGetDatum(ptr->lexeme));
ptr++;
}
a = construct_array(da,
ptr - res,
TEXTOID,
-1,
false,
'i');
ptr = res;
while (ptr->lexeme)
{
pfree(DatumGetPointer(da[ptr - res]));
pfree(ptr->lexeme);
ptr++;
}
pfree(res);
pfree(da);
return a;
}
Datum
ts_lexize_byid(PG_FUNCTION_ARGS)
{
Oid dictId = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
ArrayType *a;
a = ts_lexize_workhorse(dictId, in);
if (a)
PG_RETURN_POINTER(a);
else
PG_RETURN_NULL();
}
Datum
ts_lexize_byname(PG_FUNCTION_ARGS)
{
text *dictname = PG_GETARG_TEXT_P(0);
text *in = PG_GETARG_TEXT_P(1);
Oid dictId;
ArrayType *a;
dictId = TSDictionaryGetDictid(textToQualifiedNameList(dictname), false);
a = ts_lexize_workhorse(dictId, in);
if (a)
PG_RETURN_POINTER(a);
else
PG_RETURN_NULL();
}

View File

@ -0,0 +1,164 @@
/*-------------------------------------------------------------------------
*
* dict_ispell.c
* Ispell dictionary interface
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
typedef struct
{
StopList stoplist;
IspellDict obj;
} DictISpell;
Datum
dispell_init(PG_FUNCTION_ARGS)
{
DictISpell *d;
Map *cfg,
*pcfg;
bool affloaded = false,
dictloaded = false,
stoploaded = false;
text *in;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for ISpell")));
in = PG_GETARG_TEXT_P(0);
parse_keyvalpairs(in, &cfg);
PG_FREE_IF_COPY(in, 0);
d = (DictISpell *) palloc0(sizeof(DictISpell));
d->stoplist.wordop = recode_and_lowerstr;
pcfg = cfg;
while (pcfg->key)
{
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
{
if (dictloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile parameters")));
NIImportDictionary(&(d->obj),
get_tsearch_config_filename(pcfg->value,
"dict"));
dictloaded = true;
}
else if (pg_strcasecmp("AffFile", pcfg->key) == 0)
{
if (affloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple AffFile parameters")));
NIImportAffixes(&(d->obj),
get_tsearch_config_filename(pcfg->value,
"affix"));
affloaded = true;
}
else if (pg_strcasecmp("StopWords", pcfg->key) == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(pcfg->value, &(d->stoplist));
sortstoplist(&(d->stoplist));
stoploaded = true;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized ISpell parameter: \"%s\"",
pcfg->key)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (affloaded && dictloaded)
{
NISortDictionary(&(d->obj));
NISortAffixes(&(d->obj));
}
else if (!affloaded)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing AffFile parameter")));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing DictFile parameter")));
}
MemoryContextDeleteChildren(CurrentMemoryContext);
PG_RETURN_POINTER(d);
}
Datum
dispell_lexize(PG_FUNCTION_ARGS)
{
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt;
TSLexeme *res;
TSLexeme *ptr,
*cptr;
if (len <= 0)
PG_RETURN_POINTER(NULL);
txt = lowerstr_with_len(in, len);
res = NINormalizeWord(&(d->obj), txt);
if (res == NULL)
PG_RETURN_POINTER(NULL);
ptr = cptr = res;
while (ptr->lexeme)
{
if (searchstoplist(&(d->stoplist), ptr->lexeme))
{
pfree(ptr->lexeme);
ptr->lexeme = NULL;
ptr++;
}
else
{
memcpy(cptr, ptr, sizeof(TSLexeme));
cptr++;
ptr++;
}
}
cptr->lexeme = NULL;
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,65 @@
/*-------------------------------------------------------------------------
*
* dict_simple.c
* Simple dictionary: just lowercase and check for stopword
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
typedef struct
{
StopList stoplist;
} DictExample;
Datum
dsimple_init(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
d->stoplist.wordop = recode_and_lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
char *filename = TextPGetCString(in);
readstoplist(filename, &d->stoplist);
sortstoplist(&d->stoplist);
pfree(filename);
}
PG_RETURN_POINTER(d);
}
Datum
dsimple_lexize(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt = lowerstr_with_len(in, len);
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
}
else
res[0].lexeme = txt;
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,176 @@
/*-------------------------------------------------------------------------
*
* dict_synonym.c
* Synonym dictionary: replace word by its synonym
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#define SYNBUFLEN 4096
typedef struct
{
char *in;
char *out;
} Syn;
typedef struct
{
int len;
Syn *syn;
} DictSyn;
static char *
findwrd(char *in, char **end)
{
char *start;
*end = NULL;
while (*in && t_isspace(in))
in += pg_mblen(in);
if (*in == '\0')
return NULL;
start = in;
while (*in && !t_isspace(in))
in += pg_mblen(in);
*end = in;
return start;
}
static int
compareSyn(const void *a, const void *b)
{
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
}
Datum
dsynonym_init(PG_FUNCTION_ARGS)
{
text *in;
DictSyn *d;
int cur = 0;
FILE *fin;
char *filename;
char buf[SYNBUFLEN];
char *starti,
*starto,
*end = NULL;
int slen;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for Synonym")));
in = PG_GETARG_TEXT_P(0);
filename = get_tsearch_config_filename(TextPGetCString(in), "syn");
PG_FREE_IF_COPY(in, 0);
if ((fin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open synonym file \"%s\": %m",
filename)));
d = (DictSyn *) palloc0(sizeof(DictSyn));
while (fgets(buf, SYNBUFLEN, fin))
{
slen = strlen(buf);
pg_verifymbstr(buf, slen, false);
if (cur == d->len)
{
if (d->len == 0)
{
d->len = 16;
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
}
else
{
d->len *= 2;
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
}
}
starti = findwrd(buf, &end);
if (!starti)
continue;
*end = '\0';
if (end >= buf + slen)
continue;
starto = findwrd(end + 1, &end);
if (!starto)
continue;
*end = '\0';
d->syn[cur].in = recode_and_lowerstr(starti);
d->syn[cur].out = recode_and_lowerstr(starto);
if (!(d->syn[cur].in && d->syn[cur].out))
{
FreeFile(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
cur++;
}
FreeFile(fin);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
pfree(filename);
PG_RETURN_POINTER(d);
}
Datum
dsynonym_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
Syn key,
*found;
TSLexeme *res;
if (len <= 0)
PG_RETURN_POINTER(NULL);
key.in = lowerstr_with_len(in, len);
key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
pfree(key.in);
if (!found)
PG_RETURN_POINTER(NULL);
res = palloc(sizeof(TSLexeme) * 2);
memset(res, 0, sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,887 @@
/*-------------------------------------------------------------------------
*
* dict_thesaurus.c
* Thesaurus dictionary: phrase to phrase substitution
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/namespace.h"
#include "storage/fd.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
/*
* Temporay we use TSLexeme.flags for inner use...
*/
#define DT_USEASIS 0x1000
typedef struct LexemeInfo
{
uint16 idsubst; /* entry's number in DictThesaurus->subst */
uint16 posinsubst; /* pos info in entry */
uint16 tnvariant; /* total num lexemes in one variant */
struct LexemeInfo *nextentry;
struct LexemeInfo *nextvariant;
} LexemeInfo;
typedef struct
{
char *lexeme;
LexemeInfo *entries;
} TheLexeme;
typedef struct
{
uint16 lastlexeme; /* number lexemes to substitute */
uint16 reslen;
TSLexeme *res; /* prepared substituted result */
} TheSubstitute;
typedef struct
{
/* subdictionary to normalize lexemes */
Oid subdictOid;
TSDictionaryCacheEntry *subdict;
/* Array to search lexeme by exact match */
TheLexeme *wrds;
int nwrds;
int ntwrds;
/*
* Storage of substituted result, n-th element is for n-th expression
*/
TheSubstitute *subst;
int nsubst;
} DictThesaurus;
static void
newLexeme(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 posinsubst)
{
TheLexeme *ptr;
if (d->nwrds >= d->ntwrds)
{
if (d->ntwrds == 0)
{
d->ntwrds = 16;
d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
}
else
{
d->ntwrds *= 2;
d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
}
}
ptr = d->wrds + d->nwrds;
d->nwrds++;
ptr->lexeme = palloc(e - b + 1);
memcpy(ptr->lexeme, b, e - b);
ptr->lexeme[e - b] = '\0';
ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
ptr->entries->nextentry = NULL;
ptr->entries->idsubst = idsubst;
ptr->entries->posinsubst = posinsubst;
}
static void
addWrd(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
{
static int nres = 0;
static int ntres = 0;
TheSubstitute *ptr;
if (nwrd == 0)
{
nres = ntres = 0;
if (idsubst >= d->nsubst)
{
if (d->nsubst == 0)
{
d->nsubst = 16;
d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
}
else
{
d->nsubst *= 2;
d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
}
}
}
ptr = d->subst + idsubst;
ptr->lastlexeme = posinsubst - 1;
if (nres + 1 >= ntres)
{
if (ntres == 0)
{
ntres = 2;
ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
}
else
{
ntres *= 2;
ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
}
}
ptr->res[nres].lexeme = palloc(e - b + 1);
memcpy(ptr->res[nres].lexeme, b, e - b);
ptr->res[nres].lexeme[e - b] = '\0';
ptr->res[nres].nvariant = nwrd;
if (useasis)
ptr->res[nres].flags = DT_USEASIS;
else
ptr->res[nres].flags = 0;
ptr->res[++nres].lexeme = NULL;
}
#define TR_WAITLEX 1
#define TR_INLEX 2
#define TR_WAITSUBS 3
#define TR_INSUBS 4
static void
thesaurusRead(char *filename, DictThesaurus * d)
{
FILE *fh;
char str[BUFSIZ];
int lineno = 0;
uint16 idsubst = 0;
bool useasis = false;
filename = get_tsearch_config_filename(filename, "ths");
fh = AllocateFile(filename, "r");
if (!fh)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open thesaurus file \"%s\": %m",
filename)));
while (fgets(str, sizeof(str), fh))
{
char *ptr,
*recoded;
int state = TR_WAITLEX;
char *beginwrd = NULL;
uint16 posinsubst = 0;
uint16 nwrd = 0;
ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
GetDatabaseEncoding(), PG_UTF8);
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
lineno++;
/* is it comment ? */
while (t_isspace(ptr))
ptr += pg_mblen(ptr);
if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r'))
continue;
while (*ptr)
{
if (state == TR_WAITLEX)
{
if (t_iseq(ptr, ':'))
{
if (posinsubst == 0)
{
FreeFile(fh);
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unexpected delimiter at line %d of thesaurus file \"%s\"",
lineno, filename)));
}
state = TR_WAITSUBS;
}
else if (!t_isspace(ptr))
{
beginwrd = ptr;
state = TR_INLEX;
}
}
else if (state == TR_INLEX)
{
if (t_iseq(ptr, ':'))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITSUBS;
}
else if (t_isspace(ptr))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITLEX;
}
}
else if (state == TR_WAITSUBS)
{
if (t_iseq(ptr, '*'))
{
useasis = true;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
}
else if (t_iseq(ptr, '\\'))
{
useasis = false;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
}
else if (!t_isspace(ptr))
{
useasis = false;
beginwrd = ptr;
state = TR_INSUBS;
}
}
else if (state == TR_INSUBS)
{
if (t_isspace(ptr))
{
if (ptr == beginwrd)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"",
lineno, filename)));
addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
state = TR_WAITSUBS;
}
}
else
elog(ERROR, "unrecognized thesaurus state: %d", state);
ptr += pg_mblen(ptr);
}
if (state == TR_INSUBS)
{
if (ptr == beginwrd)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"",
lineno, filename)));
addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
}
idsubst++;
if (!(nwrd && posinsubst))
{
FreeFile(fh);
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unexpected end of line at line %d of thesaurus file \"%s\"",
lineno, filename)));
}
if (recoded != str)
pfree(recoded);
}
d->nsubst = idsubst;
FreeFile(fh);
}
static TheLexeme *
addCompiledLexeme(TheLexeme * newwrds, int *nnw, int *tnm, TSLexeme * lexeme, LexemeInfo * src, uint16 tnvariant)
{
if (*nnw >= *tnm)
{
*tnm *= 2;
newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
}
newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
if (lexeme && lexeme->lexeme)
{
newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
newwrds[*nnw].entries->tnvariant = tnvariant;
}
else
{
newwrds[*nnw].lexeme = NULL;
newwrds[*nnw].entries->tnvariant = 1;
}
newwrds[*nnw].entries->idsubst = src->idsubst;
newwrds[*nnw].entries->posinsubst = src->posinsubst;
newwrds[*nnw].entries->nextentry = NULL;
(*nnw)++;
return newwrds;
}
static int
cmpLexemeInfo(LexemeInfo * a, LexemeInfo * b)
{
if (a == NULL || b == NULL)
return 0;
if (a->idsubst == b->idsubst)
{
if (a->posinsubst == b->posinsubst)
{
if (a->tnvariant == b->tnvariant)
return 0;
return (a->tnvariant > b->tnvariant) ? 1 : -1;
}
return (a->posinsubst > b->posinsubst) ? 1 : -1;
}
return (a->idsubst > b->idsubst) ? 1 : -1;
}
static int
cmpLexeme(TheLexeme * a, TheLexeme * b)
{
if (a->lexeme == NULL)
{
if (b->lexeme == NULL)
return 0;
else
return 1;
}
else if (b->lexeme == NULL)
return -1;
return strcmp(a->lexeme, b->lexeme);
}
static int
cmpLexemeQ(const void *a, const void *b)
{
return cmpLexeme((TheLexeme *) a, (TheLexeme *) b);
}
static int
cmpTheLexeme(const void *a, const void *b)
{
TheLexeme *la = (TheLexeme *) a;
TheLexeme *lb = (TheLexeme *) b;
int res;
if ((res = cmpLexeme(la, lb)) != 0)
return res;
return -cmpLexemeInfo(la->entries, lb->entries);
}
static void
compileTheLexeme(DictThesaurus * d)
{
int i,
nnw = 0,
tnm = 16;
TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
*ptrwrds;
for (i = 0; i < d->nwrds; i++)
{
TSLexeme *ptr;
ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
PointerGetDatum(d->subdict->dictData),
PointerGetDatum(d->wrds[i].lexeme),
Int32GetDatum(strlen(d->wrds[i].lexeme)),
PointerGetDatum(NULL)));
if (!(ptr && ptr->lexeme))
{
if (!ptr)
elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)",
d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
else
elog(NOTICE, "thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)",
d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
}
else
{
while (ptr->lexeme)
{
TSLexeme *remptr = ptr + 1;
int tnvar = 1;
int curvar = ptr->nvariant;
/* compute n words in one variant */
while (remptr->lexeme)
{
if (remptr->nvariant != (remptr - 1)->nvariant)
break;
tnvar++;
remptr++;
}
remptr = ptr;
while (remptr->lexeme && remptr->nvariant == curvar)
{
newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
remptr++;
}
ptr = remptr;
}
}
pfree(d->wrds[i].lexeme);
pfree(d->wrds[i].entries);
}
pfree(d->wrds);
d->wrds = newwrds;
d->nwrds = nnw;
d->ntwrds = tnm;
if (d->nwrds > 1)
{
qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
/* uniq */
newwrds = d->wrds;
ptrwrds = d->wrds + 1;
while (ptrwrds - d->wrds < d->nwrds)
{
if (cmpLexeme(ptrwrds, newwrds) == 0)
{
if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
{
ptrwrds->entries->nextentry = newwrds->entries;
newwrds->entries = ptrwrds->entries;
}
else
pfree(ptrwrds->entries);
if (ptrwrds->lexeme)
pfree(ptrwrds->lexeme);
}
else
{
newwrds++;
*newwrds = *ptrwrds;
}
ptrwrds++;
}
d->nwrds = newwrds - d->wrds + 1;
d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
}
}
static void
compileTheSubstitute(DictThesaurus * d)
{
int i;
for (i = 0; i < d->nsubst; i++)
{
TSLexeme *rem = d->subst[i].res,
*outptr,
*inptr;
int n = 2;
outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
outptr->lexeme = NULL;
inptr = rem;
while (inptr && inptr->lexeme)
{
TSLexeme *lexized,
tmplex[2];
if (inptr->flags & DT_USEASIS)
{ /* do not lexize */
tmplex[0] = *inptr;
tmplex[0].flags = 0;
tmplex[1].lexeme = NULL;
lexized = tmplex;
}
else
{
lexized = (TSLexeme *) DatumGetPointer(
FunctionCall4(
&(d->subdict->lexize),
PointerGetDatum(d->subdict->dictData),
PointerGetDatum(inptr->lexeme),
Int32GetDatum(strlen(inptr->lexeme)),
PointerGetDatum(NULL)
)
);
}
if (lexized && lexized->lexeme)
{
int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
while (lexized->lexeme)
{
if (outptr - d->subst[i].res + 1 >= n)
{
int diff = outptr - d->subst[i].res;
n *= 2;
d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
outptr = d->subst[i].res + diff;
}
*outptr = *lexized;
outptr->lexeme = pstrdup(lexized->lexeme);
outptr++;
lexized++;
}
if (toset > 0)
d->subst[i].res[toset].flags |= TSL_ADDPOS;
}
else if (lexized)
{
elog(NOTICE, "thesaurus word \"%s\" in substitution is a stop-word, ignored (rule %d)", inptr->lexeme, i + 1);
}
else
{
elog(ERROR, "thesaurus word \"%s\" in substitution isn't recognized (rule %d)", inptr->lexeme, i + 1);
}
if (inptr->lexeme)
pfree(inptr->lexeme);
inptr++;
}
if (outptr == d->subst[i].res)
elog(ERROR, "all words in thesaurus substitution are stop words (rule %d)", i + 1);
d->subst[i].reslen = outptr - d->subst[i].res;
pfree(rem);
}
}
Datum
thesaurus_init(PG_FUNCTION_ARGS)
{
DictThesaurus *d;
Map *cfg,
*pcfg;
text *in;
char *subdictname = NULL;
bool fileloaded = false;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for Thesaurus")));
in = PG_GETARG_TEXT_P(0);
parse_keyvalpairs(in, &cfg);
PG_FREE_IF_COPY(in, 0);
d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
pcfg = cfg;
while (pcfg->key)
{
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
{
if (fileloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile parameters")));
thesaurusRead(pcfg->value, d);
fileloaded = true;
}
else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
{
if (subdictname)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Dictionary parameters")));
subdictname = pstrdup(pcfg->value);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized Thesaurus parameter: \"%s\"",
pcfg->key)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (!fileloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing DictFile parameter")));
if (!subdictname)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing Dictionary parameter")));
d->subdictOid = TSDictionaryGetDictid(stringToQualifiedNameList(subdictname), false);
d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
compileTheLexeme(d);
compileTheSubstitute(d);
PG_RETURN_POINTER(d);
}
static LexemeInfo *
findTheLexeme(DictThesaurus * d, char *lexeme)
{
TheLexeme key = {lexeme, NULL}, *res;
if (d->nwrds == 0)
return NULL;
res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
if (res == NULL)
return NULL;
return res->entries;
}
static bool
matchIdSubst(LexemeInfo * stored, uint16 idsubst)
{
bool res = true;
if (stored)
{
res = false;
for (; stored; stored = stored->nextvariant)
if (stored->idsubst == idsubst)
{
res = true;
break;
}
}
return res;
}
static LexemeInfo *
findVariant(LexemeInfo * in, LexemeInfo * stored, uint16 curpos, LexemeInfo ** newin, int newn)
{
for (;;)
{
int i;
LexemeInfo *ptr = newin[0];
for (i = 0; i < newn; i++)
{
while (newin[i] && newin[i]->idsubst < ptr->idsubst)
newin[i] = newin[i]->nextentry;
if (newin[i] == NULL)
return in;
if (newin[i]->idsubst > ptr->idsubst)
{
ptr = newin[i];
i = -1;
continue;
}
while (newin[i]->idsubst == ptr->idsubst)
{
if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
{
ptr = newin[i];
break;
}
newin[i] = newin[i]->nextentry;
if (newin[i] == NULL)
return in;
}
if (newin[i]->idsubst != ptr->idsubst)
{
ptr = newin[i];
i = -1;
continue;
}
}
if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
{ /* found */
ptr->nextvariant = in;
in = ptr;
}
/* step forward */
for (i = 0; i < newn; i++)
newin[i] = newin[i]->nextentry;
}
return NULL;
}
static TSLexeme *
copyTSLexeme(TheSubstitute * ts)
{
TSLexeme *res;
uint16 i;
res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
for (i = 0; i < ts->reslen; i++)
{
res[i] = ts->res[i];
res[i].lexeme = pstrdup(ts->res[i].lexeme);
}
res[ts->reslen].lexeme = NULL;
return res;
}
static TSLexeme *
checkMatch(DictThesaurus * d, LexemeInfo * info, uint16 curpos, bool *moreres)
{
*moreres = false;
while (info)
{
Assert(info->idsubst < d->nsubst);
if (info->nextvariant)
*moreres = true;
if (d->subst[info->idsubst].lastlexeme == curpos)
return copyTSLexeme(d->subst + info->idsubst);
info = info->nextvariant;
}
return NULL;
}
Datum
thesaurus_lexize(PG_FUNCTION_ARGS)
{
DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
TSLexeme *res = NULL;
LexemeInfo *stored,
*info = NULL;
uint16 curpos = 0;
bool moreres = false;
if (PG_NARGS() < 4 || dstate == NULL)
elog(ERROR, "forbidden call of thesaurus or nested call");
if (dstate->isend)
PG_RETURN_POINTER(NULL);
stored = (LexemeInfo *) dstate->private;
if (stored)
curpos = stored->posinsubst + 1;
if (!d->subdict->isvalid)
d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
PointerGetDatum(d->subdict->dictData),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
PointerGetDatum(NULL)));
if (res && res->lexeme)
{
TSLexeme *ptr = res,
*basevar;
while (ptr->lexeme)
{
uint16 nv = ptr->nvariant;
uint16 i,
nlex = 0;
LexemeInfo **infos;
basevar = ptr;
while (ptr->lexeme && nv == ptr->nvariant)
{
nlex++;
ptr++;
}
infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
for (i = 0; i < nlex; i++)
if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
break;
if (i < nlex)
{
/* no chance to find */
pfree(infos);
continue;
}
info = findVariant(info, stored, curpos, infos, nlex);
}
}
else if (res)
{ /* stop-word */
LexemeInfo *infos = findTheLexeme(d, NULL);
info = findVariant(NULL, stored, curpos, &infos, 1);
}
else
{
info = NULL; /* word isn't recognized */
}
dstate->private = (void *) info;
if (!info)
{
dstate->getnext = false;
PG_RETURN_POINTER(NULL);
}
if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
{
dstate->getnext = moreres;
PG_RETURN_POINTER(res);
}
dstate->getnext = true;
PG_RETURN_POINTER(NULL);
}

236
src/backend/tsearch/regis.c Normal file
View File

@ -0,0 +1,236 @@
/*-------------------------------------------------------------------------
*
* regis.c
* Fast regex subset
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/regis.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_locale.h"
bool
RS_isRegis(const char *str)
{
while (str && *str)
{
if (t_isalpha(str) ||
t_iseq(str, '[') ||
t_iseq(str, ']') ||
t_iseq(str, '^'))
str += pg_mblen(str);
else
return false;
}
return true;
}
#define RS_IN_ONEOF 1
#define RS_IN_ONEOF_IN 2
#define RS_IN_NONEOF 3
#define RS_IN_WAIT 4
static RegisNode *
newRegisNode(RegisNode * prev, int len)
{
RegisNode *ptr;
ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
if (prev)
prev->next = ptr;
return ptr;
}
void
RS_compile(Regis * r, bool issuffix, char *str)
{
int len = strlen(str);
int state = RS_IN_WAIT;
char *c = (char *) str;
RegisNode *ptr = NULL;
memset(r, 0, sizeof(Regis));
r->issuffix = (issuffix) ? 1 : 0;
while (*c)
{
if (state == RS_IN_WAIT)
{
if (t_isalpha(c))
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
ptr = r->node = newRegisNode(NULL, len);
COPYCHAR(ptr->data, c);
ptr->type = RSF_ONEOF;
ptr->len = pg_mblen(c);
}
else if (t_iseq(c, '['))
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
ptr = r->node = newRegisNode(NULL, len);
ptr->type = RSF_ONEOF;
state = RS_IN_ONEOF;
}
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regis pattern: \"%s\"",
str)));
}
else if (state == RS_IN_ONEOF)
{
if (t_iseq(c, '^'))
{
ptr->type = RSF_NONEOF;
state = RS_IN_NONEOF;
}
else if (t_isalpha(c))
{
COPYCHAR(ptr->data, c);
ptr->len = pg_mblen(c);
state = RS_IN_ONEOF_IN;
}
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regis pattern: \"%s\"",
str)));
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
if (t_isalpha(c))
{
COPYCHAR(ptr->data + ptr->len, c);
ptr->len += pg_mblen(c);
}
else if (t_iseq(c, ']'))
state = RS_IN_WAIT;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regis pattern: \"%s\"",
str)));
}
else
elog(ERROR, "internal error in RS_compile: state %d", state);
c += pg_mblen(c);
}
ptr = r->node;
while (ptr)
{
r->nchar++;
ptr = ptr->next;
}
}
void
RS_free(Regis * r)
{
RegisNode *ptr = r->node,
*tmp;
while (ptr)
{
tmp = ptr->next;
pfree(ptr);
ptr = tmp;
}
r->node = NULL;
}
#ifdef TS_USE_WIDE
static bool
mb_strchr(char *str, char *c)
{
int clen = pg_mblen(c),
plen,
i;
char *ptr = str;
bool res = false;
clen = pg_mblen(c);
while (*ptr && !res)
{
plen = pg_mblen(ptr);
if (plen == clen)
{
i = plen;
res = true;
while (i--)
if (*(ptr + i) != *(c + i))
{
res = false;
break;
}
}
ptr += plen;
}
return res;
}
#else
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
#endif
bool
RS_execute(Regis * r, char *str)
{
RegisNode *ptr = r->node;
char *c = str;
int len = 0;
while (*c)
{
len++;
c += pg_mblen(c);
}
if (len < r->nchar)
return 0;
c = str;
if (r->issuffix)
{
len -= r->nchar;
while (len-- > 0)
c += pg_mblen(c);
}
while (ptr)
{
switch (ptr->type)
{
case RSF_ONEOF:
if (mb_strchr((char *) ptr->data, c) != true)
return false;
break;
case RSF_NONEOF:
if (mb_strchr((char *) ptr->data, c) == true)
return false;
break;
default:
elog(ERROR, "unrecognized regis node type: %d", ptr->type);
}
ptr = ptr->next;
c += pg_mblen(c);
}
return true;
}

1747
src/backend/tsearch/spell.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,3 @@
skies sky
booking book
bookings book

View File

@ -0,0 +1,20 @@
#
# Theasurus config file. Character ':' separates string from replacement, eg
# sample-words : substitute-words
#
# Any substitute-word can be marked by preceding '*' character,
# which means do not lexize this word
# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
one two three : *123
one two : *12
one : *1
two : *2
#foo bar : blah blah
#f bar : fbar
#e bar : ebar
#g bar bar : gbarbar
#asd:sdffff
#qwerty:qwer wert erty

View File

@ -0,0 +1,363 @@
/*-------------------------------------------------------------------------
*
* to_tsany.c
* to_ts* function definitions
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/namespace.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
{
PG_RETURN_OID(getTSCurrentConfig(true));
}
/*
* to_tsvector
*/
static int
compareWORD(const void *a, const void *b)
{
if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len)
{
int res = strncmp(
((ParsedWord *) a)->word,
((ParsedWord *) b)->word,
((ParsedWord *) b)->len);
if (res == 0)
return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
return res;
}
return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1;
}
static int
uniqueWORD(ParsedWord * a, int4 l)
{
ParsedWord *ptr,
*res;
int tmppos;
if (l == 1)
{
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
return l;
}
res = a;
ptr = a + 1;
qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
strncmp(ptr->word, res->word, res->len) == 0))
{
res++;
res->len = ptr->len;
res->word = ptr->word;
tmppos = LIMITPOS(ptr->pos.pos);
res->alen = 2;
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
res->pos.apos[0] = 1;
res->pos.apos[1] = tmppos;
}
else
{
pfree(ptr->word);
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
{
if (res->pos.apos[0] + 1 >= res->alen)
{
res->alen *= 2;
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
}
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
{
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
}
}
}
ptr++;
}
return res + 1 - a;
}
/*
* make value of tsvector, given parsed text
*/
TSVector
make_tsvector(ParsedText *prs)
{
int4 i,
j,
lenstr = 0,
totallen;
TSVector in;
WordEntry *ptr;
char *str,
*cur;
prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++)
{
lenstr += SHORTALIGN(prs->words[i].len);
if (prs->words[i].alen)
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
}
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = prs->curwords;
ptr = ARRPTR(in);
cur = str = STRPTR(in);
for (i = 0; i < prs->curwords; i++)
{
ptr->len = prs->words[i].len;
if (cur - str > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("string is too long for tsvector")));
ptr->pos = cur - str;
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
pfree(prs->words[i].word);
cur += SHORTALIGN(prs->words[i].len);
if (prs->words[i].alen)
{
WordEntryPos *wptr;
ptr->haspos = 1;
*(uint16 *) cur = prs->words[i].pos.apos[0];
wptr = POSDATAPTR(in, ptr);
for (j = 0; j < *(uint16 *) cur; j++)
{
WEP_SETWEIGHT(wptr[j], 0);
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
}
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos);
}
else
ptr->haspos = 0;
ptr++;
}
pfree(prs->words);
return in;
}
Datum
to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
ParsedText prs;
TSVector out;
prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of
* word's number */
if (prs.lenwords == 0)
prs.lenwords = 2;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
PG_FREE_IF_COPY(in, 1);
if (prs.curwords)
out = make_tsvector(&prs);
else
{
pfree(prs.words);
out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
}
PG_RETURN_POINTER(out);
}
Datum
to_tsvector(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
/*
* to_tsquery
*/
/*
* This function is used for morph parsing
*/
static void
pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
{
int4 count = 0;
ParsedText prs;
uint32 variant,
pos,
cntvar = 0,
cntpos = 0,
cnt = 0;
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(state->cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
while (count < prs.curwords)
{
pos = prs.words[count].pos.pos;
cntvar = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos)
{
variant = prs.words[count].nvariant;
cnt = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if (cnt)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
cnt++;
count++;
}
if (cntvar)
pushquery(state, OPR, (int4) '|', 0, 0, 0);
cntvar++;
}
if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
cntpos++;
}
pfree(prs.words);
}
else
pushval_asis(state, VALSTOP, NULL, 0, 0);
}
Datum
to_tsquery_byid(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery query;
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
res = clean_fakeval(GETQUERY(query), &len);
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
pfree(res);
PG_RETURN_TSQUERY(query);
}
Datum
to_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
Datum
plainto_tsquery_byid(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery query;
QueryItem *res;
int4 len;
query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
res = clean_fakeval(GETQUERY(query), &len);
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
pfree(res);
PG_RETURN_POINTER(query);
}
Datum
plainto_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}

View File

@ -0,0 +1,241 @@
/*-------------------------------------------------------------------------
*
* ts_locale.c
* locale compatiblility layer for tsearch
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#ifdef TS_USE_WIDE
#ifdef WIN32
size_t
wchar2char(char *to, const wchar_t *from, size_t len)
{
if (len == 0)
return 0;
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
NULL, NULL);
if (r == 0)
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("UTF-16 to UTF-8 translation failed: %lu",
GetLastError())));
Assert(r <= len);
return r;
}
return wcstombs(to, from, len);
}
#endif /* WIN32 */
size_t
char2wchar(wchar_t *to, const char *from, size_t len)
{
if (len == 0)
return 0;
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
if (!r)
{
pg_verifymbstr(from, len, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
Assert(r <= len);
return r;
}
else
#endif /* WIN32 */
if (lc_ctype_is_c())
{
/*
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
* allocated with sufficient space
*/
return pg_mb2wchar_with_len(from, (pg_wchar *) to, len);
}
else
{
/*
* mbstowcs require ending '\0'
*/
char *str = pnstrdup(from, len);
size_t tolen;
tolen = mbstowcs(to, str, len);
pfree(str);
return tolen;
}
}
int
_t_isalpha(const char *ptr)
{
wchar_t character[2];
if (lc_ctype_is_c())
return isalpha(TOUCHAR(ptr));
char2wchar(character, ptr, 1);
return iswalpha((wint_t) *character);
}
int
_t_isprint(const char *ptr)
{
wchar_t character[2];
if (lc_ctype_is_c())
return isprint(TOUCHAR(ptr));
char2wchar(character, ptr, 1);
return iswprint((wint_t) *character);
}
#endif /* TS_USE_WIDE */
/*
* Convert C-string from UTF8 to server encoding and
* lower it
*/
char *
recode_and_lowerstr(char *str)
{
char *recoded;
char *ret;
recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
PG_UTF8, GetDatabaseEncoding());
if (recoded == NULL)
elog(ERROR, "encoding conversion failed");
ret = lowerstr(recoded);
if (recoded != str)
pfree(recoded);
return ret;
}
char *
lowerstr(char *str)
{
return lowerstr_with_len(str, strlen(str));
}
char *
lowerstr_with_len(char *str, int len)
{
char *ptr = str;
char *out;
if (len == 0)
return pstrdup("");
#ifdef TS_USE_WIDE
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte. From
* backend/utils/adt/oracle_compat.c Teodor
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
wchar_t *wstr,
*wptr;
int wlen;
/*
* alloc number of wchar_t for worst case, len contains number of
* bytes <= number of characters and alloc 1 wchar_t for 0, because
* wchar2char(wcstombs in really) wants zero-terminated string
*/
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
/*
* str SHOULD be cstring, so wlen contains number of converted
* character
*/
wlen = char2wchar(wstr, str, len);
if (wlen < 0)
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("translation failed from server encoding to wchar_t")));
Assert(wlen <= len);
wstr[wlen] = 0;
while (*wptr)
{
*wptr = towlower((wint_t) *wptr);
wptr++;
}
/*
* Alloc result string for worst case + '\0'
*/
len = sizeof(char) * pg_database_encoding_max_length() *(wlen + 1);
out = (char *) palloc(len);
/*
* wlen now is number of bytes which is always >= number of characters
*/
wlen = wchar2char(out, wstr, len);
pfree(wstr);
if (wlen < 0)
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("translation failed from wchar_t to server encoding %d", errno)));
Assert(wlen <= len);
out[wlen] = '\0';
}
else
#endif
{
char *outptr;
outptr = out = (char *) palloc(sizeof(char) * (len + 1));
while (*ptr && ptr - str < len)
{
*outptr++ = tolower(*(unsigned char *) ptr);
ptr++;
}
*outptr = '\0';
}
return out;
}

View File

@ -0,0 +1,626 @@
/*-------------------------------------------------------------------------
*
* ts_parse.c
* main parse functions for tsearch
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#define IGNORE_LONGLEXEME 1
/*
* Lexize subsystem
*/
typedef struct ParsedLex
{
int type;
char *lemm;
int lenlemm;
bool resfollow;
struct ParsedLex *next;
} ParsedLex;
typedef struct ListParsedLex
{
ParsedLex *head;
ParsedLex *tail;
} ListParsedLex;
typedef struct
{
TSConfigCacheEntry *cfg;
Oid curDictId;
int posDict;
DictSubState dictState;
ParsedLex *curSub;
ListParsedLex towork; /* current list to work */
ListParsedLex waste; /* list of lexemes that already lexized */
/*
* fields to store last variant to lexize (basically, thesaurus or similar
* to, which wants several lexemes
*/
ParsedLex *lastRes;
TSLexeme *tmpRes;
} LexizeData;
static void
LexizeInit(LexizeData * ld, TSConfigCacheEntry * cfg)
{
ld->cfg = cfg;
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->towork.head = ld->towork.tail = ld->curSub = NULL;
ld->waste.head = ld->waste.tail = NULL;
ld->lastRes = NULL;
ld->tmpRes = NULL;
}
static void
LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
{
if (list->tail)
{
list->tail->next = newpl;
list->tail = newpl;
}
else
list->head = list->tail = newpl;
newpl->next = NULL;
}
static ParsedLex *
LPLRemoveHead(ListParsedLex * list)
{
ParsedLex *res = list->head;
if (list->head)
list->head = list->head->next;
if (list->head == NULL)
list->tail = NULL;
return res;
}
static void
LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
{
ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
newpl->type = type;
newpl->lemm = lemm;
newpl->lenlemm = lenlemm;
LPLAddTail(&ld->towork, newpl);
ld->curSub = ld->towork.tail;
}
static void
RemoveHead(LexizeData * ld)
{
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
ld->posDict = 0;
}
static void
setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
{
if (correspondLexem)
{
*correspondLexem = ld->waste.head;
}
else
{
ParsedLex *tmp,
*ptr = ld->waste.head;
while (ptr)
{
tmp = ptr->next;
pfree(ptr);
ptr = tmp;
}
}
ld->waste.head = ld->waste.tail = NULL;
}
static void
moveToWaste(LexizeData * ld, ParsedLex * stop)
{
bool go = true;
while (ld->towork.head && go)
{
if (ld->towork.head == stop)
{
ld->curSub = stop->next;
go = false;
}
RemoveHead(ld);
}
}
static void
setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
{
if (ld->tmpRes)
{
TSLexeme *ptr;
for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
pfree(ptr->lexeme);
pfree(ld->tmpRes);
}
ld->tmpRes = res;
ld->lastRes = lex;
}
static TSLexeme *
LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
{
int i;
ListDictionary *map;
TSDictionaryCacheEntry *dict;
TSLexeme *res;
if (ld->curDictId == InvalidOid)
{
/*
* usial mode: dictionary wants only one word, but we should keep in
* mind that we should go through all stack
*/
while (ld->towork.head)
{
ParsedLex *curVal = ld->towork.head;
map = ld->cfg->map + curVal->type;
if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
{
/* skip this type of lexeme */
RemoveHead(ld);
continue;
}
for (i = ld->posDict; i < map->len; i++)
{
dict = lookup_ts_dictionary_cache(map->dictIds[i]);
ld->dictState.isend = ld->dictState.getnext = false;
ld->dictState.private = NULL;
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
&(dict->lexize),
PointerGetDatum(dict->dictData),
PointerGetDatum(curVal->lemm),
Int32GetDatum(curVal->lenlemm),
PointerGetDatum(&ld->dictState)
));
if (ld->dictState.getnext)
{
/*
* dictionary wants next word, so setup and store current
* position and go to multiword mode
*/
ld->curDictId = DatumGetObjectId(map->dictIds[i]);
ld->posDict = i + 1;
ld->curSub = curVal->next;
if (res)
setNewTmpRes(ld, curVal, res);
return LexizeExec(ld, correspondLexem);
}
if (!res) /* dictionary doesn't know this lexeme */
continue;
RemoveHead(ld);
setCorrLex(ld, correspondLexem);
return res;
}
RemoveHead(ld);
}
}
else
{ /* curDictId is valid */
dict = lookup_ts_dictionary_cache(ld->curDictId);
/*
* Dictionary ld->curDictId asks us about following words
*/
while (ld->curSub)
{
ParsedLex *curVal = ld->curSub;
map = ld->cfg->map + curVal->type;
if (curVal->type != 0)
{
bool dictExists = false;
if (curVal->type >= ld->cfg->lenmap || map->len == 0)
{
/* skip this type of lexeme */
ld->curSub = curVal->next;
continue;
}
/*
* We should be sure that current type of lexeme is recognized
* by our dictinonary: we just check is it exist in list of
* dictionaries ?
*/
for (i = 0; i < map->len && !dictExists; i++)
if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
dictExists = true;
if (!dictExists)
{
/*
* Dictionary can't work with current tpe of lexeme,
* return to basic mode and redo all stored lexemes
*/
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
ld->dictState.isend = (curVal->type == 0) ? true : false;
ld->dictState.getnext = false;
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
&(dict->lexize),
PointerGetDatum(dict->dictData),
PointerGetDatum(curVal->lemm),
Int32GetDatum(curVal->lenlemm),
PointerGetDatum(&ld->dictState)
));
if (ld->dictState.getnext)
{
/* Dictionary wants one more */
ld->curSub = curVal->next;
if (res)
setNewTmpRes(ld, curVal, res);
continue;
}
if (res || ld->tmpRes)
{
/*
* Dictionary normalizes lexemes, so we remove from stack all
* used lexemes , return to basic mode and redo end of stack
* (if it exists)
*/
if (res)
{
moveToWaste(ld, ld->curSub);
}
else
{
res = ld->tmpRes;
moveToWaste(ld, ld->lastRes);
}
/* reset to initial state */
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->lastRes = NULL;
ld->tmpRes = NULL;
setCorrLex(ld, correspondLexem);
return res;
}
/*
* Dict don't want next lexem and didn't recognize anything, redo
* from ld->towork.head
*/
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
}
setCorrLex(ld, correspondLexem);
return NULL;
}
/*
* Parse string and lexize words
*/
void
parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
{
int type,
lenlemm;
char *lemm = NULL;
LexizeData ldata;
TSLexeme *norms;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
void *prsdata;
cfg = lookup_ts_config_cache(cfgId);
prsobj = lookup_ts_parser_cache(cfg->prsId);
prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
PointerGetDatum(buf),
Int32GetDatum(buflen)));
LexizeInit(&ldata, cfg);
do
{
type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
PointerGetDatum(prsdata),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)));
if (type > 0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
continue;
#else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long to be indexed")));
#endif
}
LexizeAddLemm(&ldata, type, lemm, lenlemm);
while ((norms = LexizeExec(&ldata, NULL)) != NULL)
{
TSLexeme *ptr = norms;
prs->pos++; /* set pos */
while (ptr->lexeme)
{
if (prs->curwords == prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
}
if (ptr->flags & TSL_ADDPOS)
prs->pos++;
prs->words[prs->curwords].len = strlen(ptr->lexeme);
prs->words[prs->curwords].word = ptr->lexeme;
prs->words[prs->curwords].nvariant = ptr->nvariant;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
prs->curwords++;
}
pfree(norms);
}
} while (type > 0);
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
/*
* Headline framework
*/
static void
hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
}
memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWord));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
memcpy(prs->words[prs->curwords].word, buf, buflen);
prs->curwords++;
}
static void
hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
{
int i;
QueryItem *item = GETQUERY(query);
HeadlineWord *word;
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
}
word = &(prs->words[prs->curwords - 1]);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWord));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
}
else
word->item = item;
}
item++;
}
}
static void
addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
{
ParsedLex *tmplexs;
TSLexeme *ptr;
while (lexs)
{
if (lexs->type > 0)
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
ptr = norms;
while (ptr && ptr->lexeme)
{
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
ptr++;
}
tmplexs = lexs->next;
pfree(lexs);
lexs = tmplexs;
}
if (norms)
{
ptr = norms;
while (ptr->lexeme)
{
pfree(ptr->lexeme);
ptr++;
}
pfree(norms);
}
}
void
hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen)
{
int type,
lenlemm;
char *lemm = NULL;
LexizeData ldata;
TSLexeme *norms;
ParsedLex *lexs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
void *prsdata;
cfg = lookup_ts_config_cache(cfgId);
prsobj = lookup_ts_parser_cache(cfg->prsId);
prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
PointerGetDatum(buf),
Int32GetDatum(buflen)));
LexizeInit(&ldata, cfg);
do
{
type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
PointerGetDatum(prsdata),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)));
if (type > 0 && lenlemm >= MAXSTRLEN)
{
#ifdef IGNORE_LONGLEXEME
ereport(NOTICE,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long to be indexed"),
errdetail("Words longer than %d characters are ignored.",
MAXSTRLEN)));
continue;
#else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long to be indexed")));
#endif
}
LexizeAddLemm(&ldata, type, lemm, lenlemm);
do
{
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
addHLParsedLex(prs, query, lexs, norms);
else
addHLParsedLex(prs, query, lexs, NULL);
} while (norms);
} while (type > 0);
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
text *
generatHeadline(HeadlineText * prs)
{
text *out;
int len = 128;
char *ptr;
HeadlineWord *wrd = prs->words;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
while (wrd - prs->words < prs->curwords)
{
while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
{
int dist = ptr - ((char *) out);
len *= 2;
out = (text *) repalloc(out, len);
ptr = ((char *) out) + dist;
}
if (wrd->in && !wrd->repeated)
{
if (wrd->replace)
{
*ptr = ' ';
ptr++;
}
else
{
if (wrd->selected)
{
memcpy(ptr, prs->startsel, prs->startsellen);
ptr += prs->startsellen;
}
memcpy(ptr, wrd->word, wrd->len);
ptr += wrd->len;
if (wrd->selected)
{
memcpy(ptr, prs->stopsel, prs->stopsellen);
ptr += prs->stopsellen;
}
}
}
else if (!wrd->repeated)
pfree(wrd->word);
wrd++;
}
SET_VARSIZE(out, ptr - ((char *) out));
return out;
}

View File

@ -0,0 +1,330 @@
/*-------------------------------------------------------------------------
*
* ts_utils.c
* various support functions
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "miscadmin.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#define CS_WAITKEY 0
#define CS_INKEY 1
#define CS_WAITEQ 2
#define CS_WAITVALUE 3
#define CS_INVALUE 4
#define CS_IN2VALUE 5
#define CS_WAITDELIM 6
#define CS_INESC 7
#define CS_IN2ESC 8
static char *
nstrdup(char *ptr, int len)
{
char *res = palloc(len + 1),
*cptr;
memcpy(res, ptr, len);
res[len] = '\0';
cptr = ptr = res;
while (*ptr)
{
if (t_iseq(ptr, '\\'))
ptr++;
COPYCHAR(cptr, ptr);
cptr += pg_mblen(ptr);
ptr += pg_mblen(ptr);
}
*cptr = '\0';
return res;
}
/*
* Parse a parameter string consisting of key = value clauses
*/
void
parse_keyvalpairs(text *in, Map ** m)
{
Map *mptr;
char *ptr = VARDATA(in),
*begin = NULL;
char num = 0;
int state = CS_WAITKEY;
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (t_iseq(ptr, ','))
num++;
ptr += pg_mblen(ptr);
}
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
memset(mptr, 0, sizeof(Map) * (num + 2));
ptr = VARDATA(in);
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (state == CS_WAITKEY)
{
if (t_isalpha(ptr))
{
begin = ptr;
state = CS_INKEY;
}
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INKEY)
{
if (t_isspace(ptr))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITEQ;
}
else if (t_iseq(ptr, '='))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITVALUE;
}
else if (!t_isalpha(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITEQ)
{
if (t_iseq(ptr, '='))
state = CS_WAITVALUE;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITVALUE)
{
if (t_iseq(ptr, '"'))
{
begin = ptr + 1;
state = CS_INVALUE;
}
else if (!t_isspace(ptr))
{
begin = ptr;
state = CS_IN2VALUE;
}
}
else if (state == CS_INVALUE)
{
if (t_iseq(ptr, '"'))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_IN2VALUE)
{
if (t_isspace(ptr) || t_iseq(ptr, ','))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_WAITDELIM)
{
if (t_iseq(ptr, ','))
state = CS_WAITKEY;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INESC)
state = CS_INVALUE;
else if (state == CS_IN2ESC)
state = CS_IN2VALUE;
else
elog(ERROR, "unrecognized parse_keyvalpairs state: %d", state);
ptr += pg_mblen(ptr);
}
if (state == CS_IN2VALUE)
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
}
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
/*
* Given the base name and extension of a tsearch config file, return
* its full path name. The base name is assumed to be user-supplied,
* and is checked to prevent pathname attacks. The extension is assumed
* to be safe.
*
* The result is a palloc'd string.
*/
char *
get_tsearch_config_filename(const char *basename,
const char *extension)
{
char sharepath[MAXPGPATH];
char *result;
const char *p;
/*
* We enforce that the basename is all alpha characters. This may be
* overly restrictive, but we don't want to allow access to anything
* outside the tsearch_data directory, so for instance '/' *must* be
* rejected. This is the same test used for timezonesets names.
*/
for (p = basename; *p; p++)
{
if (!isalpha((unsigned char) *p))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid text search configuration file name \"%s\"",
basename)));
}
get_share_path(my_exec_path, sharepath);
result = palloc(MAXPGPATH);
snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
sharepath, basename, extension);
return result;
}
#define STOPBUFLEN 4096
void
readstoplist(char *in, StopList * s)
{
char **stop = NULL;
s->len = 0;
if (in && *in)
{
char *filename = get_tsearch_config_filename(in, "stop");
FILE *hin;
char buf[STOPBUFLEN];
int reallen = 0;
int line = 0;
if ((hin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open stopword file \"%s\": %m",
filename)));
while (fgets(buf, STOPBUFLEN, hin))
{
char *pbuf = buf;
line++;
while (*pbuf && !isspace(*pbuf))
pbuf++;
*pbuf = '\0';
if (*buf == '\0')
continue;
if (!pg_verifymbstr(buf, strlen(buf), true))
{
FreeFile(hin);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte encoding at line %d in file \"%s\"",
line, filename)));
}
if (s->len >= reallen)
{
if (reallen == 0)
{
reallen = 16;
stop = (char **) palloc(sizeof(char *) * reallen);
}
else
{
reallen *= 2;
stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen);
}
}
if (s->wordop)
stop[s->len] = s->wordop(buf);
else
stop[s->len] = pstrdup(buf);
(s->len)++;
}
FreeFile(hin);
pfree(filename);
}
s->stop = stop;
}
static int
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
void
sortstoplist(StopList * s)
{
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
bool
searchstoplist(StopList * s, char *key)
{
return (s->stop && s->len > 0 &&
bsearch(&key, s->stop, s->len,
sizeof(char *), comparestr)) ? true : false;
}
char *
pnstrdup(const char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len] = '\0';
return out;
}

View File

@ -0,0 +1,360 @@
/*-------------------------------------------------------------------------
*
* wparser.c
* Standard interface to word parser
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "funcapi.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/skey.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_type.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/******sql-level interface******/
typedef struct
{
int cur;
LexDescr *list;
} TSTokenTypeStorage;
static void
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
TSTokenTypeStorage *st;
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
if (!OidIsValid(prs->lextypeOid))
elog(ERROR, "method lextype isn't defined for text search parser %u",
prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
st->cur = 0;
/* OidFunctionCall0 is absent */
st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
(Datum) 0));
funcctx->user_fctx = (void *) st;
tupdesc = CreateTemplateTupleDesc(3, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
TEXTOID, -1, 0);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
tt_process_call(FuncCallContext *funcctx)
{
TSTokenTypeStorage *st;
st = (TSTokenTypeStorage *) funcctx->user_fctx;
if (st->list && st->list[st->cur].lexid)
{
Datum result;
char *values[3];
char txtid[16];
HeapTuple tuple;
sprintf(txtid, "%d", st->list[st->cur].lexid);
values[0] = txtid;
values[1] = st->list[st->cur].alias;
values[2] = st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
pfree(values[2]);
st->cur++;
return result;
}
if (st->list)
pfree(st->list);
pfree(st);
return (Datum) 0;
}
Datum
ts_token_type_byid(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
}
funcctx = SRF_PERCALL_SETUP();
if ((result = tt_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_token_type_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *prsname = PG_GETARG_TEXT_P(0);
Oid prsId;
funcctx = SRF_FIRSTCALL_INIT();
prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false);
tt_setup_firstcall(funcctx, prsId);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = tt_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
typedef struct
{
int type;
char *lexeme;
} LexemeEntry;
typedef struct
{
int cur;
int len;
LexemeEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
char *lex = NULL;
int llen = 0,
type = 0;
void *prsdata;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st = (PrsStorage *) palloc(sizeof(PrsStorage));
st->cur = 0;
st->len = 16;
st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)));
while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
PointerGetDatum(prsdata),
PointerGetDatum(&lex),
PointerGetDatum(&llen)))) != 0)
{
if (st->cur >= st->len)
{
st->len = 2 * st->len;
st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
}
st->list[st->cur].lexeme = palloc(llen + 1);
memcpy(st->list[st->cur].lexeme, lex, llen);
st->list[st->cur].lexeme[llen] = '\0';
st->list[st->cur].type = type;
st->cur++;
}
FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
st->len = st->cur;
st->cur = 0;
funcctx->user_fctx = (void *) st;
tupdesc = CreateTemplateTupleDesc(2, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
TEXTOID, -1, 0);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
prs_process_call(FuncCallContext *funcctx)
{
PrsStorage *st;
st = (PrsStorage *) funcctx->user_fctx;
if (st->cur < st->len)
{
Datum result;
char *values[2];
char tid[16];
HeapTuple tuple;
values[0] = tid;
sprintf(tid, "%d", st->list[st->cur].type);
values[1] = st->list[st->cur].lexeme;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = HeapTupleGetDatum(tuple);
pfree(values[1]);
st->cur++;
return result;
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum) 0;
}
Datum
ts_parse_byid(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_parse_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *prsname = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
Oid prsId;
funcctx = SRF_FIRSTCALL_INIT();
prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false);
prs_setup_firstcall(funcctx, prsId, txt);
}
funcctx = SRF_PERCALL_SETUP();
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
Datum
ts_headline_byid_opt(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HeadlineText prs;
text *out;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
prsobj = lookup_ts_parser_cache(cfg->prsId);
memset(&prs, 0, sizeof(HeadlineText));
prs.lenwords = 32;
prs.words = (HeadlineWord *) palloc(sizeof(HeadlineWord) * prs.lenwords);
hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query));
out = generatHeadline(&prs);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
PG_RETURN_POINTER(out);
}
Datum
ts_headline_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
#
# Makefile for utils/adt
#
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.64 2007/04/02 03:49:39 tgl Exp $
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.65 2007/08/21 01:11:18 tgl Exp $
#
subdir = src/backend/utils/adt
@ -25,8 +25,11 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o \
uuid.o
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \
uuid.o xml.o
like.o: like.c like_match.c

View File

@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.102 2007/06/26 16:48:09 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.103 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -27,6 +27,8 @@
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
@ -1065,6 +1067,231 @@ regtypesend(PG_FUNCTION_ARGS)
}
/*
* regconfigin - converts "tsconfigname" to tsconfig OID
*
* We also accept a numeric OID, for symmetry with the output routine.
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_config entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regconfigin(PG_FUNCTION_ARGS)
{
char *cfg_name_or_oid = PG_GETARG_CSTRING(0);
Oid result;
List *names;
/* '-' ? */
if (strcmp(cfg_name_or_oid, "-") == 0)
PG_RETURN_OID(InvalidOid);
/* Numeric OID? */
if (cfg_name_or_oid[0] >= '0' &&
cfg_name_or_oid[0] <= '9' &&
strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid))
{
result = DatumGetObjectId(DirectFunctionCall1(oidin,
CStringGetDatum(cfg_name_or_oid)));
PG_RETURN_OID(result);
}
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_config entries in the current search path.
*/
names = stringToQualifiedNameList(cfg_name_or_oid);
result = TSConfigGetCfgid(names, false);
PG_RETURN_OID(result);
}
/*
* regconfigout - converts tsconfig OID to "tsconfigname"
*/
Datum
regconfigout(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
char *result;
HeapTuple cfgtup;
if (cfgid == InvalidOid)
{
result = pstrdup("-");
PG_RETURN_CSTRING(result);
}
cfgtup = SearchSysCache(TSCONFIGOID,
ObjectIdGetDatum(cfgid),
0, 0, 0);
if (HeapTupleIsValid(cfgtup))
{
Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup);
char *cfgname = NameStr(cfgform->cfgname);
char *nspname;
/*
* Would this config be found by regconfigin? If not, qualify it.
*/
if (TSConfigIsVisible(cfgid))
nspname = NULL;
else
nspname = get_namespace_name(cfgform->cfgnamespace);
result = quote_qualified_identifier(nspname, cfgname);
ReleaseSysCache(cfgtup);
}
else
{
/* If OID doesn't match any pg_ts_config row, return it numerically */
result = (char *) palloc(NAMEDATALEN);
snprintf(result, NAMEDATALEN, "%u", cfgid);
}
PG_RETURN_CSTRING(result);
}
/*
* regconfigrecv - converts external binary format to regconfig
*/
Datum
regconfigrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidrecv, so share code */
return oidrecv(fcinfo);
}
/*
* regconfigsend - converts regconfig to binary format
*/
Datum
regconfigsend(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidsend, so share code */
return oidsend(fcinfo);
}
/*
* regdictionaryin - converts "tsdictionaryname" to tsdictionary OID
*
* We also accept a numeric OID, for symmetry with the output routine.
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_dict entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regdictionaryin(PG_FUNCTION_ARGS)
{
char *dict_name_or_oid = PG_GETARG_CSTRING(0);
Oid result;
List *names;
/* '-' ? */
if (strcmp(dict_name_or_oid, "-") == 0)
PG_RETURN_OID(InvalidOid);
/* Numeric OID? */
if (dict_name_or_oid[0] >= '0' &&
dict_name_or_oid[0] <= '9' &&
strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid))
{
result = DatumGetObjectId(DirectFunctionCall1(oidin,
CStringGetDatum(dict_name_or_oid)));
PG_RETURN_OID(result);
}
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_dict entries in the current search path.
*/
names = stringToQualifiedNameList(dict_name_or_oid);
result = TSDictionaryGetDictid(names, false);
PG_RETURN_OID(result);
}
/*
* regdictionaryout - converts tsdictionary OID to "tsdictionaryname"
*/
Datum
regdictionaryout(PG_FUNCTION_ARGS)
{
Oid dictid = PG_GETARG_OID(0);
char *result;
HeapTuple dicttup;
if (dictid == InvalidOid)
{
result = pstrdup("-");
PG_RETURN_CSTRING(result);
}
dicttup = SearchSysCache(TSDICTOID,
ObjectIdGetDatum(dictid),
0, 0, 0);
if (HeapTupleIsValid(dicttup))
{
Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup);
char *dictname = NameStr(dictform->dictname);
char *nspname;
/*
* Would this dictionary be found by regdictionaryin?
* If not, qualify it.
*/
if (TSDictionaryIsVisible(dictid))
nspname = NULL;
else
nspname = get_namespace_name(dictform->dictnamespace);
result = quote_qualified_identifier(nspname, dictname);
ReleaseSysCache(dicttup);
}
else
{
/* If OID doesn't match any pg_ts_dict row, return it numerically */
result = (char *) palloc(NAMEDATALEN);
snprintf(result, NAMEDATALEN, "%u", dictid);
}
PG_RETURN_CSTRING(result);
}
/*
* regdictionaryrecv - converts external binary format to regdictionary
*/
Datum
regdictionaryrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidrecv, so share code */
return oidrecv(fcinfo);
}
/*
* regdictionarysend - converts regdictionary to binary format
*/
Datum
regdictionarysend(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidsend, so share code */
return oidsend(fcinfo);
}
/*
* text_regclass: convert text to regclass
*

View File

@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.234 2007/05/05 17:05:48 mha Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.235 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -2822,6 +2822,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
@ -2925,6 +2927,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}

View File

@ -0,0 +1,157 @@
/*-------------------------------------------------------------------------
*
* tsginidx.c
* GIN support functions for tsvector_ops
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
TSVector vector = PG_GETARG_TSVECTOR(0);
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
*nentries = 0;
if (vector->size > 0)
{
int i;
WordEntry *we = ARRPTR(vector);
*nentries = (uint32) vector->size;
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
for (i = 0; i < vector->size; i++)
{
text *txt = (text *) palloc(VARHDRSZ + we->len);
SET_VARSIZE(txt, VARHDRSZ + we->len);
memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
entries[i] = PointerGetDatum(txt);
we++;
}
}
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_POINTER(entries);
}
Datum
gin_extract_query(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
StrategyNumber strategy = PG_GETARG_UINT16(2);
Datum *entries = NULL;
*nentries = 0;
if (query->size > 0)
{
int4 i,
j = 0,
len;
QueryItem *item;
item = clean_NOT(GETQUERY(query), &len);
if (!item)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("query requires full scan, which is not supported by GIN indexes")));
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
{
text *txt;
txt = (text *) palloc(VARHDRSZ + item[i].length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"),
errhint("Use the @@@ operator instead.")));
}
}
else
*nentries = -1; /* nothing can be found */
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(entries);
}
typedef struct
{
QueryItem *frst;
bool *mapped_check;
} GinChkVal;
static bool
checkcondition_gin(void *checkval, QueryItem * val)
{
GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst];
}
Datum
gin_ts_consistent(PG_FUNCTION_ARGS)
{
bool *check = (bool *) PG_GETARG_POINTER(0);
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
TSQuery query = PG_GETARG_TSQUERY(2);
bool res = FALSE;
if (query->size > 0)
{
int4 i,
j = 0;
QueryItem *item;
GinChkVal gcv;
gcv.frst = item = GETQUERY(query);
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
gcv.mapped_check[i] = check[j++];
res = TS_execute(
GETQUERY(query),
&gcv,
true,
checkcondition_gin
);
}
PG_RETURN_BOOL(res);
}

View File

@ -0,0 +1,784 @@
/*-------------------------------------------------------------------------
*
* tsgistidx.c
* GiST support functions for tsvector_ops
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/tuptoaster.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/pg_crc.h"
#define SIGLENINT 31 /* >121 => key will toast, so it will not work
* !!! */
#define SIGLEN ( sizeof(int4) * SIGLENINT )
#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define LOOPBYTE(a) \
for(i=0;i<SIGLEN;i++) {\
a;\
}
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
/*
* type of GiST index key
*/
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */ ;
int4 flag;
char data[1];
} SignTSVector;
#define ARRKEY 0x01
#define SIGNKEY 0x02
#define ALLISTRUE 0x04
#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
static int4 sizebitvec(BITVECP sign);
Datum
gtsvectorin(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("gtsvector_in not implemented")));
PG_RETURN_DATUM(0);
}
#define SINGOUTSTR "%d true bits, %d false bits"
#define ARROUTSTR "%d unique words"
#define EXTRALEN ( 2*13 )
static int outbuf_maxlen = 0;
Datum
gtsvectorout(PG_FUNCTION_ARGS)
{
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
char *outbuf;
if (outbuf_maxlen == 0)
outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
outbuf = palloc(outbuf_maxlen);
if (ISARRKEY(key))
sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
else
{
int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
}
PG_FREE_IF_COPY(key, 0);
PG_RETURN_POINTER(outbuf);
}
static int
compareint(const void *a, const void *b)
{
if (*((int4 *) a) == *((int4 *) b))
return 0;
return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
}
static int
uniqueint(int4 *a, int4 l)
{
int4 *ptr,
*res;
if (l == 1)
return l;
ptr = res = a;
qsort((void *) a, l, sizeof(int4), compareint);
while (ptr - a < l)
if (*ptr != *res)
*(++res) = *ptr++;
else
ptr++;
return res + 1 - a;
}
static void
makesign(BITVECP sign, SignTSVector * a)
{
int4 k,
len = ARRNELEM(a);
int4 *ptr = GETARR(a);
MemSet((void *) sign, 0, sizeof(BITVEC));
for (k = 0; k < len; k++)
HASH(sign, ptr[k]);
}
Datum
gtsvector_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* tsvector */
SignTSVector *res;
TSVector val = DatumGetTSVector(entry->key);
int4 len;
int4 *arr;
WordEntry *ptr = ARRPTR(val);
char *words = STRPTR(val);
len = CALCGTSIZE(ARRKEY, val->size);
res = (SignTSVector *) palloc(len);
SET_VARSIZE(res, len);
res->flag = ARRKEY;
arr = GETARR(res);
len = val->size;
while (len--)
{
pg_crc32 c;
INIT_CRC32(c);
COMP_CRC32(c, words + ptr->pos, ptr->len);
FIN_CRC32(c);
*arr = *(int4 *) &c;
arr++;
ptr++;
}
len = uniqueint(GETARR(res), val->size);
if (len != val->size)
{
/*
* there is a collision of hash-function; len is always less than
* val->size
*/
len = CALCGTSIZE(ARRKEY, len);
res = (SignTSVector *) repalloc((void *) res, len);
SET_VARSIZE(res, len);
}
/* make signature, if array is too long */
if (VARSIZE(res) > TOAST_INDEX_TARGET)
{
SignTSVector *ressign;
len = CALCGTSIZE(SIGNKEY, 0);
ressign = (SignTSVector *) palloc(len);
SET_VARSIZE(ressign, len);
ressign->flag = SIGNKEY;
makesign(GETSIGN(ressign), res);
res = ressign;
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, FALSE);
}
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
!ISALLTRUE(DatumGetPointer(entry->key)))
{
int4 i,
len;
SignTSVector *res;
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
LOOPBYTE(
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
);
len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
res = (SignTSVector *) palloc(len);
SET_VARSIZE(res, len);
res->flag = SIGNKEY | ALLISTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsvector_decompress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
if (key != (SignTSVector *) DatumGetPointer(entry->key))
{
GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, FALSE);
PG_RETURN_POINTER(retval);
}
PG_RETURN_POINTER(entry);
}
typedef struct
{
int4 *arrb;
int4 *arre;
} CHKVAL;
/*
* is there value 'val' in array or not ?
*/
static bool
checkcondition_arr(void *checkval, QueryItem * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
int4 *StopMiddle;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val)
return (true);
else if (*StopMiddle < val->val)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return (false);
}
static bool
checkcondition_bit(void *checkval, QueryItem * val)
{
return GETBIT(checkval, HASHVAL(val->val));
}
Datum
gtsvector_consistent(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(1);
SignTSVector *key = (SignTSVector *) DatumGetPointer(
((GISTENTRY *) PG_GETARG_POINTER(0))->key
);
if (!query->size)
PG_RETURN_BOOL(false);
if (ISSIGNKEY(key))
{
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) GETSIGN(key), false,
checkcondition_bit
));
}
else
{ /* only leaf pages */
CHKVAL chkval;
chkval.arrb = GETARR(key);
chkval.arre = chkval.arrb + ARRNELEM(key);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) &chkval, true,
checkcondition_arr
));
}
}
static int4
unionkey(BITVECP sbase, SignTSVector * add)
{
int4 i;
if (ISSIGNKEY(add))
{
BITVECP sadd = GETSIGN(add);
if (ISALLTRUE(add))
return 1;
LOOPBYTE(
sbase[i] |= sadd[i];
);
}
else
{
int4 *ptr = GETARR(add);
for (i = 0; i < ARRNELEM(add); i++)
HASH(sbase, ptr[i]);
}
return 0;
}
Datum
gtsvector_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
BITVEC base;
int4 i,
len;
int4 flag = 0;
SignTSVector *result;
MemSet((void *) base, 0, sizeof(BITVEC));
for (i = 0; i < entryvec->n; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = ALLISTRUE;
break;
}
}
flag |= SIGNKEY;
len = CALCGTSIZE(flag, 0);
result = (SignTSVector *) palloc(len);
*size = len;
SET_VARSIZE(result, len);
result->flag = flag;
if (!ISALLTRUE(result))
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
PG_RETURN_POINTER(result);
}
Datum
gtsvector_same(PG_FUNCTION_ARGS)
{
SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (ISSIGNKEY(a))
{ /* then b also ISSIGNKEY */
if (ISALLTRUE(a) && ISALLTRUE(b))
*result = true;
else if (ISALLTRUE(a))
*result = false;
else if (ISALLTRUE(b))
*result = false;
else
{
int4 i;
BITVECP sa = GETSIGN(a),
sb = GETSIGN(b);
*result = true;
LOOPBYTE(
if (sa[i] != sb[i])
{
*result = false;
break;
}
);
}
}
else
{ /* a and b ISARRKEY */
int4 lena = ARRNELEM(a),
lenb = ARRNELEM(b);
if (lena != lenb)
*result = false;
else
{
int4 *ptra = GETARR(a),
*ptrb = GETARR(b);
int4 i;
*result = true;
for (i = 0; i < lena; i++)
if (ptra[i] != ptrb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int4
sizebitvec(BITVECP sign)
{
int4 size = 0,
i;
LOOPBYTE(
size += number_of_ones[(unsigned char) sign[i]];
);
return size;
}
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
diff,
dist = 0;
LOOPBYTE(
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
);
return dist;
}
static int
hemdist(SignTSVector * a, SignTSVector * b)
{
if (ISALLTRUE(a))
{
if (ISALLTRUE(b))
return 0;
else
return SIGLENBIT - sizebitvec(GETSIGN(b));
}
else if (ISALLTRUE(b))
return SIGLENBIT - sizebitvec(GETSIGN(a));
return hemdistsign(GETSIGN(a), GETSIGN(b));
}
Datum
gtsvector_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
BITVECP orig = GETSIGN(origval);
*penalty = 0.0;
if (ISARRKEY(newval))
{
BITVEC sign;
makesign(sign, newval);
if (ISALLTRUE(origval))
*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
else
*penalty = hemdistsign(sign, orig);
}
else
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
bool allistrue;
BITVEC sign;
} CACHESIGN;
static void
fillcache(CACHESIGN * item, SignTSVector * key)
{
item->allistrue = false;
if (ISARRKEY(key))
makesign(item->sign, key);
else if (ISALLTRUE(key))
item->allistrue = true;
else
memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
static int
hemdistcache(CACHESIGN * a, CACHESIGN * b)
{
if (a->allistrue)
{
if (b->allistrue)
return 0;
else
return SIGLENBIT - sizebitvec(b->sign);
}
else if (b->allistrue)
return SIGLENBIT - sizebitvec(a->sign);
return hemdistsign(a->sign, b->sign);
}
Datum
gtsvector_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
SignTSVector *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
int i;
CACHESIGN *cache;
SPLITCOST *costvector;
maxoff = entryvec->n - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
if (k == FirstOffsetNumber)
fillcache(&cache[j], GETENTRY(entryvec, j));
size_waste = hemdistcache(&(cache[j]), &(cache[k]));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (cache[seed_1].allistrue)
{
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_l->flag = SIGNKEY | ALLISTRUE;
}
else
{
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
datum_l->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
}
if (cache[seed_2].allistrue)
{
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_r->flag = SIGNKEY | ALLISTRUE;
}
else
{
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
datum_r->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
}
union_l = GETSIGN(datum_l);
union_r = GETSIGN(datum_r);
maxoff = OffsetNumberNext(maxoff);
fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
costvector[j - 1].cost = Abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
if (ISALLTRUE(datum_l) || cache[j].allistrue)
{
if (ISALLTRUE(datum_l) && cache[j].allistrue)
size_alpha = 0;
else
size_alpha = SIGLENBIT - sizebitvec(
(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
);
}
else
size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (ISALLTRUE(datum_r) && cache[j].allistrue)
size_beta = 0;
else
size_beta = SIGLENBIT - sizebitvec(
(cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
);
}
else
size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
{
if (ISALLTRUE(datum_l) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_l))
MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
}
else
{
ptr = cache[j].sign;
LOOPBYTE(
union_l[i] |= ptr[i];
);
}
*left++ = j;
v->spl_nleft++;
}
else
{
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_r))
MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
}
else
{
ptr = cache[j].sign;
LOOPBYTE(
union_r[i] |= ptr[i];
);
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}

View File

@ -0,0 +1,767 @@
/*-------------------------------------------------------------------------
*
* tsquery.c
* I/O functions for tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
#include "utils/pg_crc.h"
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
#define WAITFIRSTOPERAND 3
#define WAITSINGLEOPERAND 4
/*
* node of query tree, also used
* for storing polish notation in parser
*/
typedef struct ParseQueryNode
{
int2 weight;
int2 type;
int4 val;
int2 distance;
int2 length;
struct ParseQueryNode *next;
} ParseQueryNode;
static char *
get_weight(char *buf, int2 *weight)
{
*weight = 0;
if (!t_iseq(buf, ':'))
return buf;
buf++;
while (*buf && pg_mblen(buf) == 1)
{
switch (*buf)
{
case 'a':
case 'A':
*weight |= 1 << 3;
break;
case 'b':
case 'B':
*weight |= 1 << 2;
break;
case 'c':
case 'C':
*weight |= 1 << 1;
break;
case 'd':
case 'D':
*weight |= 1;
break;
default:
return buf;
}
buf++;
}
return buf;
}
/*
* get token from query string
*/
static int4
gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
{
while (1)
{
switch (state->state)
{
case WAITFIRSTOPERAND:
case WAITOPERAND:
if (t_iseq(state->buf, '!'))
{
(state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */
*val = (int4) '!';
state->state = WAITOPERAND;
return OPR;
}
else if (t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
state->state = WAITOPERAND;
return OPEN;
}
else if (t_iseq(state->buf, ':'))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
state->buffer)));
}
else if (!t_isspace(state->buf))
{
state->valstate.prsbuf = state->buf;
if (gettoken_tsvector(&(state->valstate)))
{
*strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
state->state = WAITOPERATOR;
return VAL;
}
else if (state->state == WAITFIRSTOPERAND)
return END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsearch query: \"%s\"",
state->buffer)));
}
break;
case WAITOPERATOR:
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
*val = (int4) *(state->buf);
(state->buf)++;
return OPR;
}
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
return (state->count < 0) ? ERR : CLOSE;
}
else if (*(state->buf) == '\0')
return (state->count) ? ERR : END;
else if (!t_isspace(state->buf))
return ERR;
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
return END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += strlen(state->buf);
state->count++;
return VAL;
default:
return ERR;
break;
}
state->buf += pg_mblen(state->buf);
}
return END;
}
/*
* push new one in polish notation reverse view
*/
void
pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
{
ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
tmp->weight = weight;
tmp->type = type;
tmp->val = val;
if (distance >= MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("value is too big in tsearch query: \"%s\"",
state->buffer)));
if (lenval >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer)));
tmp->distance = distance;
tmp->length = lenval;
tmp->next = state->str;
state->str = tmp;
state->num++;
}
/*
* This function is used for tsquery parsing
*/
void
pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
{
pg_crc32 c;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long in tsearch query: \"%s\"",
state->buffer)));
INIT_CRC32(c);
COMP_CRC32(c, strval, lenval);
FIN_CRC32(c);
pushquery(state, type, *(int4 *) &c,
state->curop - state->op, lenval, weight);
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
int4 tmp = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
state->curop = state->op + tmp;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1 /* \0 */ ;
return;
}
#define STACKDEPTH 32
/*
* make polish notation of query
*/
static int4
makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2))
{
int4 val = 0,
type;
int4 lenval = 0;
char *strval = NULL;
int4 stack[STACKDEPTH];
int4 lenstack = 0;
int2 weight = 0;
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
{
switch (type)
{
case VAL:
pushval(state, VAL, strval, lenval, weight);
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case OPR:
if (lenstack && val == (int4) '|')
pushquery(state, OPR, val, 0, 0, 0);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
stack[lenstack] = val;
lenstack++;
}
break;
case OPEN:
if (makepol(state, pushval) == ERR)
return ERR;
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case CLOSE:
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
break;
case ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"",
state->buffer)));
return ERR;
}
}
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
}
static void
findoprnd(QueryItem * ptr, int4 *pos)
{
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
{
ptr[*pos].left = 0;
(*pos)++;
}
else if (ptr[*pos].val == (int4) '!')
{
ptr[*pos].left = 1;
(*pos)++;
findoprnd(ptr, pos);
}
else
{
QueryItem *curitem = &ptr[*pos];
int4 tmp = *pos;
(*pos)++;
findoprnd(ptr, pos);
curitem->left = *pos - tmp;
findoprnd(ptr, pos);
}
}
/*
* input
*/
TSQuery
parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
{
TSQueryParserState state;
int4 i;
TSQuery query;
int4 commonlen;
QueryItem *ptr;
ParseQueryNode *tmp;
int4 pos = 0;
/* init state */
state.buffer = buf;
state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
state.cfg_id = cfg_id;
/* init value parser's state */
state.valstate.oprisdelim = true;
state.valstate.len = 32;
state.valstate.word = (char *) palloc(state.valstate.len);
/* init list of operand */
state.sumlen = 0;
state.lenop = 64;
state.curop = state.op = (char *) palloc(state.lenop);
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval);
pfree(state.valstate.word);
if (!state.num)
{
ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
return query;
}
/* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen);
query = (TSQuery) palloc(commonlen);
SET_VARSIZE(query, commonlen);
query->size = state.num;
ptr = GETQUERY(query);
/* set item in polish notation */
for (i = 0; i < state.num; i++)
{
ptr[i].weight = state.str->weight;
ptr[i].type = state.str->type;
ptr[i].val = state.str->val;
ptr[i].distance = state.str->distance;
ptr[i].length = state.str->length;
tmp = state.str->next;
pfree(state.str);
state.str = tmp;
}
/* set user friendly-operand view */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
/* set left operand's position for every operator */
pos = 0;
findoprnd(ptr, &pos);
return query;
}
/*
* in without morphology
*/
Datum
tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
pg_verifymbstr(in, strlen(in), false);
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
}
/*
* out function
*/
typedef struct
{
QueryItem *curpol;
char *buf;
char *cur;
char *op;
int4 buflen;
} INFIX;
#define RESIZEBUF(inf,addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \
int4 len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \
}
/*
* recursive walk on tree and print it in
* infix (human-readable) view
*/
static void
infix(INFIX * in, bool first)
{
if (in->curpol->type == VAL)
{
char *op = in->op + in->curpol->distance;
int clen;
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
{
if (t_iseq(op, '\''))
{
*(in->cur) = '\'';
in->cur++;
}
COPYCHAR(in->cur, op);
clen = pg_mblen(op);
op += clen;
in->cur += clen;
}
*(in->cur) = '\'';
in->cur++;
if (in->curpol->weight)
{
*(in->cur) = ':';
in->cur++;
if (in->curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
if (in->curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
if (in->curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
if (in->curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
}
}
*(in->cur) = '\0';
in->curpol++;
}
else if (in->curpol->val == (int4) '!')
{
bool isopr = false;
RESIZEBUF(in, 1);
*(in->cur) = '!';
in->cur++;
*(in->cur) = '\0';
in->curpol++;
if (in->curpol->type == OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
infix(in, isopr);
if (isopr)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
else
{
int4 op = in->curpol->val;
INFIX nrm;
in->curpol++;
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
nrm.curpol = in->curpol;
nrm.op = in->op;
nrm.buflen = 16;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
/* get right operand */
infix(&nrm, false);
/* get & print left operand */
in->curpol = nrm.curpol;
infix(in, false);
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
sprintf(in->cur, " %c %s", op, nrm.buf);
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
}
Datum
tsqueryout(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
INFIX nrm;
if (query->size == 0)
{
char *b = palloc(1);
*b = '\0';
PG_RETURN_POINTER(b);
}
nrm.curpol = GETQUERY(query);
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
PG_FREE_IF_COPY(query, 0);
PG_RETURN_CSTRING(nrm.buf);
}
Datum
tsquerysend(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
StringInfoData buf;
int i;
QueryItem *item = GETQUERY(query);
pq_begintypsend(&buf);
pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++)
{
int tmp;
pq_sendint(&buf, item->type, sizeof(item->type));
pq_sendint(&buf, item->weight, sizeof(item->weight));
pq_sendint(&buf, item->left, sizeof(item->left));
pq_sendint(&buf, item->val, sizeof(item->val));
/*
* We are sure that sizeof(WordEntry) == sizeof(int32), and about
* layout of QueryItem
*/
tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
pq_sendint(&buf, tmp, sizeof(tmp));
item++;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
item++;
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
Datum
tsqueryrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSQuery query;
int i,
size,
tmp,
len = HDRSIZETQ;
QueryItem *item;
int datalen = 0;
char *ptr;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
len += sizeof(QueryItem) * size;
query = (TSQuery) palloc(len);
query->size = size;
item = GETQUERY(query);
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
item->left = (int16) pq_getmsgint(buf, sizeof(int16));
item->val = (int32) pq_getmsgint(buf, sizeof(int32));
tmp = pq_getmsgint(buf, sizeof(int32));
memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
/*
* Sanity checks
*/
if (item->type == VAL)
{
datalen += item->length + 1; /* \0 */
}
else if (item->type == OPR)
{
if (item->val == '|' || item->val == '&')
{
if (item->left <= 0 || i + item->left >= size)
elog(ERROR, "invalid pointer to left operand");
}
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
}
else
elog(ERROR, "unknown tsquery node type");
item++;
}
query = (TSQuery) repalloc(query, len + datalen);
item = GETQUERY(query);
ptr = GETOPERAND(query);
for (i = 0; i < size; i++)
{
if (item->type == VAL)
{
item->distance = ptr - GETOPERAND(query);
memcpy(ptr,
pq_getmsgbytes(buf, item->length),
item->length);
ptr += item->length;
*ptr++ = '\0';
}
item++;
}
Assert(ptr - GETOPERAND(query) == datalen);
SET_VARSIZE(query, len + datalen);
PG_RETURN_TSVECTOR(query);
}
/*
* debug function, used only for view query
* which will be executed in non-leaf pages in index
*/
Datum
tsquerytree(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
INFIX nrm;
text *res;
QueryItem *q;
int4 len;
if (query->size == 0)
{
res = (text *) palloc(VARHDRSZ);
SET_VARSIZE(res, VARHDRSZ);
PG_RETURN_POINTER(res);
}
q = clean_NOT(GETQUERY(query), &len);
if (!q)
{
res = (text *) palloc(1 + VARHDRSZ);
SET_VARSIZE(res, 1 + VARHDRSZ);
*((char *) VARDATA(res)) = 'T';
}
else
{
nrm.curpol = q;
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ);
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
pfree(q);
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,261 @@
/*-------------------------------------------------------------------------
*
* tsquery_cleanup.c
* Cleanup query from NOT values and/or stopword
* Utility functions to correct work.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
typedef struct NODE
{
struct NODE *left;
struct NODE *right;
QueryItem *valnode;
} NODE;
/*
* make query tree from plain view of query
*/
static NODE *
maketree(QueryItem * in)
{
NODE *node = (NODE *) palloc(sizeof(NODE));
node->valnode = in;
node->right = node->left = NULL;
if (in->type == OPR)
{
node->right = maketree(in + 1);
if (in->val != (int4) '!')
node->left = maketree(in + in->left);
}
return node;
}
typedef struct
{
QueryItem *ptr;
int4 len;
int4 cur;
} PLAINTREE;
static void
plainnode(PLAINTREE * state, NODE * node)
{
if (state->cur == state->len)
{
state->len *= 2;
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL)
state->cur++;
else if (node->valnode->val == (int4) '!')
{
state->ptr[state->cur].left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
int4 cur = state->cur;
state->cur++;
plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
* make plain view of tree from 'normal' view of tree
*/
static QueryItem *
plaintree(NODE * root, int4 *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
{
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root);
}
else
pl.ptr = NULL;
*len = pl.cur;
return pl.ptr;
}
static void
freetree(NODE * node)
{
if (!node)
return;
if (node->left)
freetree(node->left);
if (node->right)
freetree(node->right);
pfree(node);
}
/*
* clean tree for ! operator.
* It's usefull for debug, but in
* other case, such view is used with search in index.
* Operator ! always return TRUE
*/
static NODE *
clean_NOT_intree(NODE * node)
{
if (node->valnode->type == VAL)
return node;
if (node->valnode->val == (int4) '!')
{
freetree(node);
return NULL;
}
/* operator & or | */
if (node->valnode->val == (int4) '|')
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
{
freetree(node);
return NULL;
}
}
else
{
NODE *res = node;
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
if (node->left == NULL && node->right == NULL)
{
pfree(node);
res = NULL;
}
else if (node->left == NULL)
{
res = node->right;
pfree(node);
}
else if (node->right == NULL)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
QueryItem *
clean_NOT(QueryItem * ptr, int4 *len)
{
NODE *root = maketree(ptr);
return plaintree(clean_NOT_intree(root), len);
}
#ifdef V_UNKNOWN /* exists in Windows headers */
#undef V_UNKNOWN
#endif
#define V_UNKNOWN 0
#define V_TRUE 1
#define V_FALSE 2
#define V_STOP 3
/*
* Clean query tree from values which is always in
* text (stopword)
*/
static NODE *
clean_fakeval_intree(NODE * node, char *result)
{
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
if (node->valnode->type == VAL)
return node;
else if (node->valnode->type == VALSTOP)
{
pfree(node);
*result = V_STOP;
return NULL;
}
if (node->valnode->val == (int4) '!')
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
{
*result = V_STOP;
freetree(node);
return NULL;
}
}
else
{
NODE *res = node;
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP)
{
freetree(node);
*result = V_STOP;
return NULL;
}
else if (lresult == V_STOP)
{
res = node->right;
pfree(node);
}
else if (rresult == V_STOP)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
NODE *resroot;
resroot = clean_fakeval_intree(root, &result);
if (result != V_UNKNOWN)
{
elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored");
*len = 0;
return NULL;
}
return plaintree(resroot, len);
}

View File

@ -0,0 +1,259 @@
/*-------------------------------------------------------------------------
*
* tsquery_gist.c
* GiST index support for tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_gist.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "access/gist.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#define GETENTRY(vec,pos) ((TSQuerySign *) DatumGetPointer((vec)->vector[(pos)].key))
Datum
gtsquery_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
*sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
gistentryinit(*retval, PointerGetDatum(sign),
entry->rel, entry->page,
entry->offset, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsquery_decompress(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(PG_GETARG_DATUM(0));
}
Datum
gtsquery_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TSQuerySign *key = (TSQuerySign *) DatumGetPointer(entry->key);
TSQuery query = PG_GETARG_TSQUERY(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
TSQuerySign sq = makeTSQuerySign(query);
bool retval;
switch (strategy)
{
case RTContainsStrategyNumber:
if (GIST_LEAF(entry))
retval = (*key & sq) == sq;
else
retval = (*key & sq) != 0;
break;
case RTContainedByStrategyNumber:
if (GIST_LEAF(entry))
retval = (*key & sq) == *key;
else
retval = (*key & sq) != 0;
break;
default:
retval = FALSE;
}
PG_RETURN_BOOL(retval);
}
Datum
gtsquery_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
int i;
memset(sign, 0, sizeof(TSQuerySign));
for (i = 0; i < entryvec->n; i++)
*sign |= *GETENTRY(entryvec, i);
*size = sizeof(TSQuerySign);
PG_RETURN_POINTER(sign);
}
Datum
gtsquery_same(PG_FUNCTION_ARGS)
{
TSQuerySign *a = (TSQuerySign *) PG_GETARG_POINTER(0);
TSQuerySign *b = (TSQuerySign *) PG_GETARG_POINTER(1);
PG_RETURN_POINTER(*a == *b);
}
static int
sizebitvec(TSQuerySign sign)
{
int size = 0,
i;
for (i = 0; i < TSQS_SIGLEN; i++)
size += 0x01 & (sign >> i);
return size;
}
static int
hemdist(TSQuerySign a, TSQuerySign b)
{
TSQuerySign res = a ^ b;
return sizebitvec(res);
}
Datum
gtsquery_penalty(PG_FUNCTION_ARGS)
{
TSQuerySign *origval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
TSQuerySign *newval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(*origval, *newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
Datum
gtsquery_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber maxoff = entryvec->n - 2;
OffsetNumber k,
j;
TSQuerySign *datum_l,
*datum_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
SPLITCOST *costvector;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
v->spl_nleft = v->spl_nright = 0;
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign));
*datum_l = *GETENTRY(entryvec, seed_1);
datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign));
*datum_r = *GETENTRY(entryvec, seed_2);
maxoff = OffsetNumberNext(maxoff);
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
{
*datum_l |= *GETENTRY(entryvec, j);
*left++ = j;
v->spl_nleft++;
}
else
{
*datum_r |= *GETENTRY(entryvec, j);
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}

View File

@ -0,0 +1,289 @@
/*-------------------------------------------------------------------------
*
* tsquery_op.c
* Various operations with tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/pg_crc.h"
Datum
tsquery_numnode(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
int nnode = query->size;
PG_FREE_IF_COPY(query, 0);
PG_RETURN_INT32(nnode);
}
static QTNode *
join_tsqueries(TSQuery a, TSQuery b)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
res->nchild = 2;
return res;
}
Datum
tsquery_and(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
QTNode *res;
TSQuery query;
if (a->size == 0)
{
PG_FREE_IF_COPY(a, 1);
PG_RETURN_POINTER(b);
}
else if (b->size == 0)
{
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '&';
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_TSQUERY(query);
}
Datum
tsquery_or(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
QTNode *res;
TSQuery query;
if (a->size == 0)
{
PG_FREE_IF_COPY(a, 1);
PG_RETURN_POINTER(b);
}
else if (b->size == 0)
{
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '|';
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(query);
}
Datum
tsquery_not(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
QTNode *res;
TSQuery query;
if (a->size == 0)
PG_RETURN_POINTER(a);
res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->val = '!';
res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
res->nchild = 1;
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_RETURN_POINTER(query);
}
static int
CompareTSQ(TSQuery a, TSQuery b)
{
if (a->size != b->size)
{
return (a->size < b->size) ? -1 : 1;
}
else if (VARSIZE(a) != VARSIZE(b))
{
return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
}
else
{
QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
int res = QTNodeCompare(an, bn);
QTNFree(an);
QTNFree(bn);
return res;
}
return 0;
}
Datum
tsquery_cmp(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
int res = CompareTSQ(a, b);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(res);
}
#define CMPFUNC( NAME, CONDITION ) \
Datum \
NAME(PG_FUNCTION_ARGS) { \
TSQuery a = PG_GETARG_TSQUERY_COPY(0); \
TSQuery b = PG_GETARG_TSQUERY_COPY(1); \
int res = CompareTSQ(a,b); \
\
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1); \
\
PG_RETURN_BOOL( CONDITION ); \
}
CMPFUNC(tsquery_lt, res < 0);
CMPFUNC(tsquery_le, res <= 0);
CMPFUNC(tsquery_eq, res == 0);
CMPFUNC(tsquery_ge, res >= 0);
CMPFUNC(tsquery_gt, res > 0);
CMPFUNC(tsquery_ne, res != 0);
TSQuerySign
makeTSQuerySign(TSQuery a)
{
int i;
QueryItem *ptr = GETQUERY(a);
TSQuerySign sign = 0;
for (i = 0; i < a->size; i++)
{
if (ptr->type == VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
ptr++;
}
return sign;
}
Datum
tsq_mcontains(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
TSQuery ex = PG_GETARG_TSQUERY(1);
TSQuerySign sq,
se;
int i,
j;
QueryItem *iq,
*ie;
if (query->size < ex->size)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
sq = makeTSQuerySign(query);
se = makeTSQuerySign(ex);
if ((sq & se) != se)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
ie = GETQUERY(ex);
for (i = 0; i < ex->size; i++)
{
iq = GETQUERY(query);
if (ie[i].type != VAL)
continue;
for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val)
{
j = query->size + 1;
break;
}
if (j == query->size)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(true);
}
Datum
tsq_mcontained(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(
DirectFunctionCall2(
tsq_mcontains,
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(0)
)
);
}

View File

@ -0,0 +1,524 @@
/*-------------------------------------------------------------------------
*
* tsquery_rewrite.c
* Utilities for reconstructing tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "executor/spi.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
static int
addone(int *counters, int last, int total)
{
counters[last]++;
if (counters[last] >= total)
{
if (last == 0)
return 0;
if (addone(counters, last - 1, total - 1) == 0)
return 0;
counters[last] = counters[last - 1] + 1;
}
return 1;
}
static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
return node;
if (node->flags & QTN_NOCHANGE)
return node;
if (node->valnode->type == OPR)
{
if (node->nchild == ex->nchild)
{
if (QTNEq(node, ex))
{
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
node = NULL;
*isfind = true;
}
}
else if (node->nchild > ex->nchild)
{
int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
memset(tnode, 0, sizeof(QTNode));
tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
tnode->nchild = ex->nchild;
tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*(tnode->valnode) = *(ex->valnode);
for (i = 0; i < ex->nchild; i++)
counters[i] = i;
do
{
tnode->sign = 0;
for (i = 0; i < ex->nchild; i++)
{
tnode->child[i] = node->child[counters[i]];
tnode->sign |= tnode->child[i]->sign;
}
if (QTNEq(tnode, ex))
{
int j = 0;
pfree(tnode->valnode);
pfree(tnode->child);
pfree(tnode);
if (subs)
{
tnode = QTNCopy(subs);
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
}
else
tnode = NULL;
node->child[counters[0]] = tnode;
for (i = 1; i < ex->nchild; i++)
node->child[counters[i]] = NULL;
for (i = 0; i < node->nchild; i++)
{
if (node->child[i])
{
node->child[j] = node->child[i];
j++;
}
}
node->nchild = j;
*isfind = true;
break;
}
} while (addone(counters, ex->nchild - 1, node->nchild));
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
{
pfree(tnode->valnode);
pfree(tnode->child);
pfree(tnode);
}
else
QTNSort(node);
pfree(counters);
}
}
else if (QTNEq(node, ex))
{
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
{
node = NULL;
}
*isfind = true;
}
return node;
}
static QTNode *
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
{
int i;
for (i = 0; i < root->nchild; i++)
root->child[i] = dofindsubquery(root->child[i], ex, subs, isfind);
}
return root;
}
static QTNode *
dropvoidsubtree(QTNode * root)
{
if (!root)
return NULL;
if (root->valnode->type == OPR)
{
int i,
j = 0;
for (i = 0; i < root->nchild; i++)
{
if (root->child[i])
{
root->child[j] = root->child[i];
j++;
}
}
root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0)
{
QTNFree(root);
root = NULL;
}
else if (root->nchild == 1)
{
QTNode *nroot = root->child[0];
pfree(root);
root = nroot;
}
}
return root;
}
static QTNode *
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
bool DidFind = false;
root = dofindsubquery(root, ex, subs, &DidFind);
if (!subs && DidFind)
root = dropvoidsubtree(root);
if (isfind)
*isfind = DidFind;
return root;
}
Datum
ts_rewrite_accum(PG_FUNCTION_ARGS)
{
TSQuery acc;
ArrayType *qa;
TSQuery q;
QTNode *qex = NULL,
*subs = NULL,
*acctree = NULL;
bool isfind = false;
Datum *elemsp;
int nelemsp;
MemoryContext aggcontext;
MemoryContext oldcontext;
aggcontext = ((AggState *) fcinfo->context)->aggcontext;
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
{
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
SET_VARSIZE(acc, HDRSIZETQ);
acc->size = 0;
}
else
acc = PG_GETARG_TSQUERY(0);
if (PG_ARGISNULL(1) || PG_GETARG_POINTER(1) == NULL)
PG_RETURN_TSQUERY(acc);
else
qa = PG_GETARG_ARRAYTYPE_P_COPY(1);
if (ARR_NDIM(qa) != 1)
elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
q = DatumGetTSQuery(elemsp[0]);
if (q->size == 0)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
if (!acc->size)
{
if (VARSIZE(acc) > HDRSIZETQ)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
else
acctree = QT2QTN(GETQUERY(q), GETOPERAND(q));
}
else
acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc));
QTNTernary(acctree);
QTNSort(acctree);
q = DatumGetTSQuery(elemsp[1]);
if (q->size == 0)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
qex = QT2QTN(GETQUERY(q), GETOPERAND(q));
QTNTernary(qex);
QTNSort(qex);
q = DatumGetTSQuery(elemsp[2]);
if (q->size)
subs = QT2QTN(GETQUERY(q), GETOPERAND(q));
acctree = findsubquery(acctree, qex, subs, &isfind);
if (isfind || !acc->size)
{
/* pfree( acc ); do not pfree(p), because nodeAgg.c will */
if (acctree)
{
QTNBinary(acctree);
oldcontext = MemoryContextSwitchTo(aggcontext);
acc = QTN2QT(acctree);
MemoryContextSwitchTo(oldcontext);
}
else
{
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
SET_VARSIZE(acc, HDRSIZETQ);
acc->size = 0;
}
}
pfree(elemsp);
QTNFree(qex);
QTNFree(subs);
QTNFree(acctree);
PG_RETURN_TSQUERY(acc);
}
Datum
ts_rewrite_finish(PG_FUNCTION_ARGS)
{
TSQuery acc = PG_GETARG_TSQUERY(0);
TSQuery rewrited;
if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0)
{
rewrited = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
}
else
{
rewrited = (TSQuery) palloc(VARSIZE(acc));
memcpy(rewrited, acc, VARSIZE(acc));
pfree(acc);
}
PG_RETURN_POINTER(rewrited);
}
Datum
tsquery_rewrite(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery rewrited = query;
MemoryContext outercontext = CurrentMemoryContext;
MemoryContext oldcontext;
QTNode *tree;
char *buf;
void *plan;
Portal portal;
bool isnull;
int i;
if (query->size == 0)
{
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER(rewrited);
}
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
QTNTernary(tree);
QTNSort(tree);
buf = TextPGetCString(in);
SPI_connect();
if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
SPI_cursor_fetch(portal, true, 100);
if (SPI_tuptable->tupdesc->natts != 2 ||
SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ts_rewrite query must return two tsquery columns")));
while (SPI_processed > 0 && tree)
{
for (i = 0; i < SPI_processed && tree; i++)
{
Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
Datum sdata;
if (isnull)
continue;
sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
if (!isnull)
{
TSQuery qtex = DatumGetTSQuery(qdata);
TSQuery qtsubs = DatumGetTSQuery(sdata);
QTNode *qex,
*qsubs = NULL;
if (qtex->size == 0)
{
if (qtex != (TSQuery) DatumGetPointer(qdata))
pfree(qtex);
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
pfree(qtsubs);
continue;
}
qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
QTNTernary(qex);
QTNSort(qex);
if (qtsubs->size)
qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
oldcontext = MemoryContextSwitchTo(outercontext);
tree = findsubquery(tree, qex, qsubs, NULL);
MemoryContextSwitchTo(oldcontext);
QTNFree(qex);
if (qtex != (TSQuery) DatumGetPointer(qdata))
pfree(qtex);
QTNFree(qsubs);
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
pfree(qtsubs);
}
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_fetch(portal, true, 100);
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_close(portal);
SPI_freeplan(plan);
SPI_finish();
if (tree)
{
QTNBinary(tree);
rewrited = QTN2QT(tree);
QTNFree(tree);
PG_FREE_IF_COPY(query, 0);
}
else
{
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
}
pfree(buf);
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER(rewrited);
}
Datum
tsquery_rewrite_query(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
TSQuery ex = PG_GETARG_TSQUERY(1);
TSQuery subst = PG_GETARG_TSQUERY(2);
TSQuery rewrited = query;
QTNode *tree,
*qex,
*subs = NULL;
if (query->size == 0 || ex->size == 0)
{
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
QTNTernary(tree);
QTNSort(tree);
qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
QTNTernary(qex);
QTNSort(qex);
if (subst->size)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex);
QTNFree(subs);
if (!tree)
{
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}
else
{
QTNBinary(tree);
rewrited = QTN2QT(tree);
QTNFree(tree);
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}

View File

@ -0,0 +1,317 @@
/*-------------------------------------------------------------------------
*
* tsquery_util.c
* Utilities for tsquery datatype
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
QTNode *
QT2QTN(QueryItem * in, char *operand)
{
QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
node->valnode = in;
if (in->type == OPR)
{
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign;
if (in->val == (int4) '!')
node->nchild = 1;
else
{
node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand);
node->sign |= node->child[1]->sign;
}
}
else if (operand)
{
node->word = operand + in->distance;
node->sign = 1 << (in->val % 32);
}
return node;
}
void
QTNFree(QTNode * in)
{
if (!in)
return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
if (in->child)
{
if (in->valnode)
{
if (in->valnode->type == OPR && in->nchild > 0)
{
int i;
for (i = 0; i < in->nchild; i++)
QTNFree(in->child[i]);
}
if (in->flags & QTN_NEEDFREE)
pfree(in->valnode);
}
pfree(in->child);
}
pfree(in);
}
int
QTNodeCompare(QTNode * an, QTNode * bn)
{
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
{
return (an->nchild > bn->nchild) ? -1 : 1;
}
else
{
int i,
res;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
}
static int
cmpQTN(const void *a, const void *b)
{
return QTNodeCompare(*(QTNode **) a, *(QTNode **) b);
}
void
QTNSort(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNSort(in->child[i]);
if (in->nchild > 1)
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
}
bool
QTNEq(QTNode * a, QTNode * b)
{
uint32 sign = a->sign & b->sign;
if (!(sign == a->sign && sign == b->sign))
return 0;
return (QTNodeCompare(a, b) == 0) ? true : false;
}
void
QTNTernary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNTernary(in->child[i]);
for (i = 0; i < in->nchild; i++)
{
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
{
QTNode *cc = in->child[i];
int oldnchild = in->nchild;
in->nchild += cc->nchild - 1;
in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
if (i + 1 != oldnchild)
memmove(in->child + i + cc->nchild, in->child + i + 1,
(oldnchild - i - 1) * sizeof(QTNode *));
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1;
pfree(cc);
}
}
}
void
QTNBinary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNBinary(in->child[i]);
if (in->nchild <= 2)
return;
while (in->nchild > 2)
{
QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
nn->nchild = 2;
nn->flags = QTN_NEEDFREE;
nn->child[0] = in->child[0];
nn->child[1] = in->child[1];
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val;
in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1];
in->nchild--;
}
}
static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
{
*nnode += 1;
if (in->valnode->type == OPR)
{
int i;
for (i = 0; i < in->nchild; i++)
cntsize(in->child[i], sumlen, nnode);
}
else
{
*sumlen += in->valnode->length + 1;
}
}
typedef struct
{
QueryItem *curitem;
char *operand;
char *curoperand;
} QTN2QTState;
static void
fillQT(QTN2QTState * state, QTNode * in)
{
*(state->curitem) = *(in->valnode);
if (in->valnode->type == VAL)
{
memcpy(state->curoperand, in->word, in->valnode->length);
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0';
state->curoperand += in->valnode->length + 1;
state->curitem++;
}
else
{
QueryItem *curitem = state->curitem;
Assert(in->nchild <= 2);
state->curitem++;
fillQT(state, in->child[0]);
if (in->nchild == 2)
{
curitem->left = state->curitem - curitem;
fillQT(state, in->child[1]);
}
}
}
TSQuery
QTN2QT(QTNode *in)
{
TSQuery out;
int len;
int sumlen = 0,
nnode = 0;
QTN2QTState state;
cntsize(in, &sumlen, &nnode);
len = COMPUTESIZE(nnode, sumlen);
out = (TSQuery) palloc(len);
SET_VARSIZE(out, len);
out->size = nnode;
state.curitem = GETQUERY(out);
state.operand = state.curoperand = GETOPERAND(out);
fillQT(&state, in);
return out;
}
QTNode *
QTNCopy(QTNode *in)
{
QTNode *out = (QTNode *) palloc(sizeof(QTNode));
*out = *in;
out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL)
{
out->word = palloc(in->valnode->length + 1);
memcpy(out->word, in->word, in->valnode->length);
out->word[in->valnode->length] = '\0';
out->flags |= QTN_WORDFREE;
}
else
{
int i;
out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
for (i = 0; i < in->nchild; i++)
out->child[i] = QTNCopy(in->child[i]);
}
return out;
}

View File

@ -0,0 +1,804 @@
/*-------------------------------------------------------------------------
*
* tsrank.c
* rank tsvector by tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
static float weights[] = {0.1, 0.2, 0.4, 1.0};
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
#define RANK_NO_NORM 0x00
#define RANK_NORM_LOGLENGTH 0x01
#define RANK_NORM_LENGTH 0x02
#define RANK_NORM_EXTDIST 0x04
#define RANK_NORM_UNIQ 0x08
#define RANK_NORM_LOGUNIQ 0x10
#define DEF_NORM_METHOD RANK_NO_NORM
static float calc_rank_or(float *w, TSVector t, TSQuery q);
static float calc_rank_and(float *w, TSVector t, TSQuery q);
/*
* Returns a weight of a word collocation
*/
static float4
word_distance(int4 w)
{
if (w > 100)
return 1e-30;
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
}
static int
cnt_length(TSVector t)
{
WordEntry *ptr = ARRPTR(t),
*end = (WordEntry *) STRPTR(t);
int len = 0,
clen;
while (ptr < end)
{
if ((clen = POSDATALEN(t, ptr)) == 0)
len += 1;
else
len += clen;
ptr++;
}
return len;
}
static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
{
if (ptr->len == item->length)
return strncmp(
eval + ptr->pos,
qval + item->distance,
item->length);
return (ptr->len > item->length) ? 1 : -1;
}
static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
WordEntry *StopMiddle;
int difference;
/* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
if (difference == 0)
return StopMiddle;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return NULL;
}
static int
compareQueryItem(const void *a, const void *b, void *arg)
{
char *operand = (char *) arg;
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
return strncmp(operand + (*(QueryItem **) a)->distance,
operand + (*(QueryItem **) b)->distance,
(*(QueryItem **) b)->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
}
static QueryItem **
SortAndUniqItems(char *operand, QueryItem * item, int *size)
{
QueryItem **res,
**ptr,
**prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
while ((*size)--)
{
if (item->type == VAL)
{
*ptr = item;
ptr++;
}
item++;
}
*size = ptr - res;
if (*size < 2)
return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
ptr = res + 1;
prevptr = res;
while (ptr - res < *size)
{
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
}
ptr++;
}
*size = prevptr + 1 - res;
return res;
}
static WordEntryPos POSNULL[] = {
0,
0
};
static float
calc_rank_and(float *w, TSVector t, TSQuery q)
{
uint16 **pos;
int i,
k,
l,
p;
WordEntry *entry;
WordEntryPos *post,
*ct;
int4 dimt,
lenct,
dist;
float res = -1.0;
QueryItem **item;
int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
if (size < 2)
{
pfree(item);
return calc_rank_or(w, t, q);
}
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
memset(pos, 0, sizeof(uint16 *) * q->size);
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
for (i = 0; i < size; i++)
{
entry = find_wordentry(t, q, item[i]);
if (!entry)
continue;
if (entry->haspos)
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
else
pos[i] = (uint16 *) POSNULL;
dimt = *(uint16 *) (pos[i]);
post = (WordEntryPos *) (pos[i] + 1);
for (k = 0; k < i; k++)
{
if (!pos[k])
continue;
lenct = *(uint16 *) (pos[k]);
ct = (WordEntryPos *) (pos[k] + 1);
for (l = 0; l < dimt; l++)
{
for (p = 0; p < lenct; p++)
{
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
{
float curw;
if (!dist)
dist = MAXENTRYPOS;
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
}
}
}
}
}
pfree(pos);
pfree(item);
return res;
}
static float
calc_rank_or(float *w, TSVector t, TSQuery q)
{
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
float res = 0.0;
QueryItem **item;
int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
for (i = 0; i < size; i++)
{
float resj,
wjm;
int4 jm;
entry = find_wordentry(t, q, item[i]);
if (!entry)
continue;
if (entry->haspos)
{
dimt = POSDATALEN(t, entry);
post = POSDATAPTR(t, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
resj = 0.0;
wjm = -1.0;
jm = 0;
for (j = 0; j < dimt; j++)
{
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
if (wpos(post[j]) > wjm)
{
wjm = wpos(post[j]);
jm = j;
}
}
/*
limit (sum(i/i^2),i->inf) = pi^2/6
resj = sum(wi/i^2),i=1,noccurence,
wi - should be sorted desc,
don't sort for now, just choose maximum weight. This should be corrected
Oleg Bartunov
*/
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
}
if (size > 0)
res = res / size;
pfree(item);
return res;
}
static float
calc_rank(float *w, TSVector t, TSQuery q, int4 method)
{
QueryItem *item = GETQUERY(q);
float res = 0.0;
int len;
if (!t->size || !q->size)
return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
res = 1e-20;
if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
if (method & RANK_NORM_LENGTH)
{
len = cnt_length(t);
if (len > 0)
res /= (float) len;
}
if ((method & RANK_NORM_UNIQ) && t->size > 0)
res /= (float) (t->size);
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
res /= log((double) (t->size + 1)) / log(2.0);
return res;
}
static float *
getWeights(ArrayType *win)
{
static float ws[lengthof(weights)];
int i;
float4 *arrdata;
if (win == 0)
return weights;
if (ARR_NDIM(win) != 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight must be one-dimensional")));
if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight is too short")));
if (ARR_HASNULL(win))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array of weight must not contain nulls")));
arrdata = (float4 *) ARR_DATA_PTR(win);
for (i = 0; i < lengthof(weights); i++)
{
ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
if (ws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
}
return ws;
}
Datum
ts_rank_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY(2);
int method = PG_GETARG_INT32(3);
float res;
res = calc_rank(getWeights(win), txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY(2);
float res;
res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY(1);
int method = PG_GETARG_INT32(2);
float res;
res = calc_rank(getWeights(NULL), txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY(1);
float res;
res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
typedef struct
{
QueryItem **item;
int16 nitem;
bool needfree;
uint8 wclass;
int32 pos;
} DocRepresentation;
static int
compareDocR(const void *a, const void *b)
{
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
return 0;
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
}
static bool
checkcondition_QueryItem(void *checkval, QueryItem * val)
{
return (bool) (val->istrue);
}
static void
reset_istrue_flag(TSQuery query)
{
QueryItem *item = GETQUERY(query);
int i;
/* reset istrue flag */
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
item->istrue = 0;
item++;
}
}
typedef struct
{
int pos;
int p;
int q;
DocRepresentation *begin;
DocRepresentation *end;
} Extention;
static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
int i;
bool found = false;
reset_istrue_flag(query);
ext->p = 0x7fffffff;
ext->q = 0;
ptr = doc + ext->pos;
/* find upper bound of cover from current position, move up */
while (ptr - doc < len)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
{
if (ptr->pos > ext->q)
{
ext->q = ptr->pos;
ext->end = ptr;
lastpos = ptr - doc;
found = true;
}
break;
}
ptr++;
}
if (!found)
return false;
reset_istrue_flag(query);
ptr = doc + lastpos;
/* find lower bound of cover from founded upper bound, move down */
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
{
if (ptr->pos < ext->p)
{
ext->begin = ptr;
ext->p = ptr->pos;
}
break;
}
ptr--;
}
if (ext->p <= ext->q)
{
/*
* set position for next try to next lexeme after begining of founded
* cover
*/
ext->pos = (ptr - doc) + 1;
return true;
}
ext->pos++;
return Cover(doc, len, query, ext);
}
static DocRepresentation *
get_docrep(TSVector txt, TSQuery query, int *doclen)
{
QueryItem *item = GETQUERY(query);
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
int len = query->size * 4,
cur = 0;
DocRepresentation *doc;
char *operand;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
operand = GETOPERAND(query);
reset_istrue_flag(query);
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL || item[i].istrue)
continue;
entry = find_wordentry(txt, query, &(item[i]));
if (!entry)
continue;
if (entry->haspos)
{
dimt = POSDATALEN(txt, entry);
post = POSDATAPTR(txt, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
while (cur + dimt >= len)
{
len *= 2;
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
}
for (j = 0; j < dimt; j++)
{
if (j == 0)
{
QueryItem *kptr,
*iptr = item + i;
int k;
doc[cur].needfree = false;
doc[cur].nitem = 0;
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
for (k = 0; k < query->size; k++)
{
kptr = item + k;
if (k == i ||
(item[k].type == VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0))
{
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
kptr->istrue = 1;
}
}
}
else
{
doc[cur].needfree = false;
doc[cur].nitem = doc[cur - 1].nitem;
doc[cur].item = doc[cur - 1].item;
}
doc[cur].pos = WEP_GETPOS(post[j]);
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
cur++;
}
}
*doclen = cur;
if (cur > 0)
{
if (cur > 1)
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
pfree(doc);
return NULL;
}
static float4
calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
{
DocRepresentation *doc;
int len,
i,
doclen = 0;
Extention ext;
double Wdoc = 0.0;
double invws[lengthof(weights)];
double SumDist = 0.0,
PrevExtPos = 0.0,
CurExtPos = 0.0;
int NExtent = 0;
for (i = 0; i < lengthof(weights); i++)
{
invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
if (invws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
invws[i] = 1.0 / invws[i];
}
doc = get_docrep(txt, query, &doclen);
if (!doc)
return 0.0;
MemSet(&ext, 0, sizeof(Extention));
while (Cover(doc, doclen, query, &ext))
{
double Cpos = 0.0;
double InvSum = 0.0;
int nNoise;
DocRepresentation *ptr = ext.begin;
while (ptr <= ext.end)
{
InvSum += invws[ptr->wclass];
ptr++;
}
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
/*
* if doc are big enough then ext.q may be equal to ext.p due to limit
* of posional information. In this case we approximate number of
* noise word as half cover's length
*/
nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
if (nNoise < 0)
nNoise = (ext.end - ext.begin) / 2;
Wdoc += Cpos / ((double) (1 + nNoise));
CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
* zero in a case of
multiple lexize */ )
SumDist += 1.0 / (CurExtPos - PrevExtPos);
PrevExtPos = CurExtPos;
NExtent++;
}
if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
Wdoc /= log((double) (cnt_length(txt) + 1));
if (method & RANK_NORM_LENGTH)
{
len = cnt_length(txt);
if (len > 0)
Wdoc /= (double) len;
}
if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
Wdoc /= ((double) NExtent) / SumDist;
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
Wdoc /= (double) (txt->size);
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
for (i = 0; i < doclen; i++)
if (doc[i].needfree)
pfree(doc[i].item);
pfree(doc);
return (float4) Wdoc;
}
Datum
ts_rankcd_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
int method = PG_GETARG_INT32(3);
float res;
res = calc_rank_cd(getWeights(win), txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
int method = PG_GETARG_INT32(2);
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}

View File

@ -0,0 +1,683 @@
/*-------------------------------------------------------------------------
*
* tsvector.c
* I/O functions for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
static int
comparePos(const void *a, const void *b)
{
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
return 0;
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
}
static int
uniquePos(WordEntryPos * a, int4 l)
{
WordEntryPos *ptr,
*res;
if (l == 1)
return l;
res = a;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
ptr = a + 1;
while (ptr - a < l)
{
if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
{
res++;
*res = *ptr;
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
break;
}
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
ptr++;
}
return res + 1 - a;
}
static int
compareentry(const void *a, const void *b, void *arg)
{
char *BufferStr = (char *) arg;
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
{
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
&BufferStr[((WordEntryIN *) b)->entry.pos],
((WordEntryIN *) a)->entry.len);
}
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
WordEntryIN *ptr,
*res;
res = a;
if (l == 1)
{
if (a->entry.haspos)
{
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
}
return l;
}
ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
while (ptr - a < l)
{
if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
{
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
memcpy(res, ptr, sizeof(WordEntryIN));
}
else if (ptr->entry.haspos)
{
if (res->entry.haspos)
{
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
pfree(ptr->pos);
}
else
{
res->entry.haspos = 1;
res->pos = ptr->pos;
}
}
ptr++;
}
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
return res + 1 - a;
}
static int
WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
{
return compareentry(a, b, buf);
}
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
bool
gettoken_tsvector(TSVectorParseState *state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen = 0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
state->state = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
return true;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
if (state->oprisdelim)
return true;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
state->state = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
return true;
}
else
state->state = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (state->state == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
state->state = INPOSINFO;
else
return true;
}
else if (state->state == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
state->state = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (state->state == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
state->state = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
return true;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
Datum
tsvectorin(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
TSVectorParseState state;
WordEntryIN *arr;
WordEntry *inarr;
int4 len = 0,
totallen = 64;
TSVector in;
char *tmpbuf,
*cur;
int4 i,
buflen = 256;
pg_verifymbstr(buf, strlen(buf), false);
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
cur = tmpbuf = (char *) palloc(buflen);
while (gettoken_tsvector(&state))
{
/*
* Realloc buffers if it's needed
*/
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long (%d bytes, max %d bytes)",
state.curpos - state.word, MAXSTRLEN)));
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large")));
arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
if (state.alen)
{
arr[len].entry.haspos = 1;
arr[len].pos = state.pos;
}
else
arr[len].entry.haspos = 0;
len++;
}
pfree(state.word);
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
else
buflen = 0;
totallen = CALCDATASIZE(len, buflen);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = len;
cur = STRPTR(in);
inarr = ARRPTR(in);
for (i = 0; i < len; i++)
{
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos = cur - STRPTR(in);
cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos)
{
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
pfree(arr[i].pos);
}
inarr[i] = arr[i].entry;
}
PG_RETURN_TSVECTOR(in);
}
Datum
tsvectorout(PG_FUNCTION_ARGS)
{
TSVector out = PG_GETARG_TSVECTOR(0);
char *outbuf;
int4 i,
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
char *curbegin,
*curin,
*curout;
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
{
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
if (ptr[i].haspos)
lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
}
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curbegin = curin = STRPTR(out) + ptr->pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
while (curin - curbegin < ptr->len)
{
int len = pg_mblen(curin);
if (t_iseq(curin, '\''))
*curout++ = '\'';
while (len--)
*curout++ = *curin++;
}
*curout++ = '\'';
if ((pp = POSDATALEN(out, ptr)) != 0)
{
WordEntryPos *wptr;
*curout++ = ':';
wptr = POSDATAPTR(out, ptr);
while (pp)
{
curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
switch (WEP_GETWEIGHT(*wptr))
{
case 3:
*curout++ = 'A';
break;
case 2:
*curout++ = 'B';
break;
case 1:
*curout++ = 'C';
break;
case 0:
default:
break;
}
if (pp > 1)
*curout++ = ',';
pp--;
wptr++;
}
}
ptr++;
}
*curout = '\0';
PG_FREE_IF_COPY(out, 0);
PG_RETURN_CSTRING(outbuf);
}
Datum
tsvectorsend(PG_FUNCTION_ARGS)
{
TSVector vec = PG_GETARG_TSVECTOR(0);
StringInfoData buf;
int i,
j;
WordEntry *weptr = ARRPTR(vec);
pq_begintypsend(&buf);
pq_sendint(&buf, vec->size, sizeof(int32));
for (i = 0; i < vec->size; i++)
{
/*
* We are sure that sizeof(WordEntry) == sizeof(int32)
*/
pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
if (weptr->haspos)
{
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
for (j = 0; j < POSDATALEN(vec, weptr); j++)
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
}
weptr++;
}
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
Datum
tsvectorrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
int i,
size,
len = DATAHDRSIZE;
WordEntry *weptr;
int datalen = 0;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector");
len += sizeof(WordEntry) * size;
len *= 2;
vec = (TSVector) palloc0(len);
vec->size = size;
weptr = ARRPTR(vec);
for (i = 0; i < size; i++)
{
int tmp;
weptr = ARRPTR(vec) + i;
/*
* We are sure that sizeof(WordEntry) == sizeof(int32)
*/
tmp = pq_getmsgint(buf, sizeof(int32));
*weptr = *(WordEntry *) & tmp;
while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
weptr = ARRPTR(vec) + i;
}
memcpy(STRPTR(vec) + weptr->pos,
pq_getmsgbytes(buf, weptr->len),
weptr->len);
datalen += SHORTALIGN(weptr->len);
if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
elog(ERROR, "lexemes are unordered");
if (weptr->haspos)
{
uint16 j,
npos;
WordEntryPos *wepptr;
npos = (uint16) pq_getmsgint(buf, sizeof(int16));
if (npos > MAXNUMPOS)
elog(ERROR, "unexpected number of positions");
while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
weptr = ARRPTR(vec) + i;
}
memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
wepptr = POSDATAPTR(vec, weptr);
for (j = 0; j < npos; j++)
{
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
elog(ERROR, "position information is unordered");
}
datalen += (npos + 1) * sizeof(WordEntry);
}
}
SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
PG_RETURN_TSVECTOR(vec);
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More