2656167f6e
FossilOrigin-Name: 96ea600440de05ee663e71c3f0d0de2c64108bf9
223 lines
5.5 KiB
Tcl
223 lines
5.5 KiB
Tcl
#
|
|
# 2014 Jun 09
|
|
#
|
|
# The author disclaims copyright to this source code. In place of
|
|
# a legal notice, here is a blessing:
|
|
#
|
|
# May you do good and not evil.
|
|
# May you find forgiveness for yourself and forgive others.
|
|
# May you share freely, never taking more than you give.
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# This script generates the implementations of the following C functions,
|
|
# which are part of the porter tokenizer implementation:
|
|
#
|
|
# static int fts5PorterStep1B(char *aBuf, int *pnBuf);
|
|
# static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
|
|
# static int fts5PorterStep2(char *aBuf, int *pnBuf);
|
|
# static int fts5PorterStep3(char *aBuf, int *pnBuf);
|
|
# static int fts5PorterStep4(char *aBuf, int *pnBuf);
|
|
#
|
|
|
|
set O(Step1B2) {
|
|
{ at {} ate 1 }
|
|
{ bl {} ble 1 }
|
|
{ iz {} ize 1 }
|
|
}
|
|
|
|
set O(Step1B) {
|
|
{ "eed" fts5Porter_MGt0 "ee" 0 }
|
|
{ "ed" fts5Porter_Vowel "" 1 }
|
|
{ "ing" fts5Porter_Vowel "" 1 }
|
|
}
|
|
|
|
set O(Step2) {
|
|
{ "ational" fts5Porter_MGt0 "ate" }
|
|
{ "tional" fts5Porter_MGt0 "tion" }
|
|
{ "enci" fts5Porter_MGt0 "ence" }
|
|
{ "anci" fts5Porter_MGt0 "ance" }
|
|
{ "izer" fts5Porter_MGt0 "ize" }
|
|
{ "logi" fts5Porter_MGt0 "log" }
|
|
{ "bli" fts5Porter_MGt0 "ble" }
|
|
{ "alli" fts5Porter_MGt0 "al" }
|
|
{ "entli" fts5Porter_MGt0 "ent" }
|
|
{ "eli" fts5Porter_MGt0 "e" }
|
|
{ "ousli" fts5Porter_MGt0 "ous" }
|
|
{ "ization" fts5Porter_MGt0 "ize" }
|
|
{ "ation" fts5Porter_MGt0 "ate" }
|
|
{ "ator" fts5Porter_MGt0 "ate" }
|
|
{ "alism" fts5Porter_MGt0 "al" }
|
|
{ "iveness" fts5Porter_MGt0 "ive" }
|
|
{ "fulness" fts5Porter_MGt0 "ful" }
|
|
{ "ousness" fts5Porter_MGt0 "ous" }
|
|
{ "aliti" fts5Porter_MGt0 "al" }
|
|
{ "iviti" fts5Porter_MGt0 "ive" }
|
|
{ "biliti" fts5Porter_MGt0 "ble" }
|
|
}
|
|
|
|
set O(Step3) {
|
|
{ "icate" fts5Porter_MGt0 "ic" }
|
|
{ "ative" fts5Porter_MGt0 "" }
|
|
{ "alize" fts5Porter_MGt0 "al" }
|
|
{ "iciti" fts5Porter_MGt0 "ic" }
|
|
{ "ical" fts5Porter_MGt0 "ic" }
|
|
{ "ful" fts5Porter_MGt0 "" }
|
|
{ "ness" fts5Porter_MGt0 "" }
|
|
}
|
|
|
|
set O(Step4) {
|
|
{ "al" fts5Porter_MGt1 "" }
|
|
{ "ance" fts5Porter_MGt1 "" }
|
|
{ "ence" fts5Porter_MGt1 "" }
|
|
{ "er" fts5Porter_MGt1 "" }
|
|
{ "ic" fts5Porter_MGt1 "" }
|
|
{ "able" fts5Porter_MGt1 "" }
|
|
{ "ible" fts5Porter_MGt1 "" }
|
|
{ "ant" fts5Porter_MGt1 "" }
|
|
{ "ement" fts5Porter_MGt1 "" }
|
|
{ "ment" fts5Porter_MGt1 "" }
|
|
{ "ent" fts5Porter_MGt1 "" }
|
|
{ "ion" fts5Porter_MGt1_and_S_or_T "" }
|
|
{ "ou" fts5Porter_MGt1 "" }
|
|
{ "ism" fts5Porter_MGt1 "" }
|
|
{ "ate" fts5Porter_MGt1 "" }
|
|
{ "iti" fts5Porter_MGt1 "" }
|
|
{ "ous" fts5Porter_MGt1 "" }
|
|
{ "ive" fts5Porter_MGt1 "" }
|
|
{ "ize" fts5Porter_MGt1 "" }
|
|
}
|
|
|
|
proc sort_cb {lhs rhs} {
|
|
set L [string range [lindex $lhs 0] end-1 end-1]
|
|
set R [string range [lindex $rhs 0] end-1 end-1]
|
|
string compare $L $R
|
|
}
|
|
|
|
proc create_step_function {name data} {
|
|
|
|
set T(function) {
|
|
static int fts5Porter${name}(char *aBuf, int *pnBuf){
|
|
int ret = 0;
|
|
int nBuf = *pnBuf;
|
|
switch( aBuf[nBuf-2] ){
|
|
${switchbody}
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
set T(case) {
|
|
case '${k}':
|
|
${ifstmts}
|
|
break;
|
|
}
|
|
|
|
set T(if_0_0_0) {
|
|
if( ${match} ){
|
|
*pnBuf = nBuf - $n;
|
|
}
|
|
}
|
|
set T(if_1_0_0) {
|
|
if( ${match} ){
|
|
if( ${cond} ){
|
|
*pnBuf = nBuf - $n;
|
|
}
|
|
}
|
|
}
|
|
set T(if_0_1_0) {
|
|
if( ${match} ){
|
|
${memcpy}
|
|
*pnBuf = nBuf - $n + $nRep;
|
|
}
|
|
}
|
|
set T(if_1_1_0) {
|
|
if( ${match} ){
|
|
if( ${cond} ){
|
|
${memcpy}
|
|
*pnBuf = nBuf - $n + $nRep;
|
|
}
|
|
}
|
|
}
|
|
set T(if_1_0_1) {
|
|
if( ${match} ){
|
|
if( ${cond} ){
|
|
*pnBuf = nBuf - $n;
|
|
ret = 1;
|
|
}
|
|
}
|
|
}
|
|
set T(if_0_1_1) {
|
|
if( ${match} ){
|
|
${memcpy}
|
|
*pnBuf = nBuf - $n + $nRep;
|
|
ret = 1;
|
|
}
|
|
}
|
|
set T(if_1_1_1) {
|
|
if( ${match} ){
|
|
if( ${cond} ){
|
|
${memcpy}
|
|
*pnBuf = nBuf - $n + $nRep;
|
|
ret = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
set switchbody ""
|
|
|
|
foreach I $data {
|
|
set k [string range [lindex $I 0] end-1 end-1]
|
|
lappend aCase($k) $I
|
|
}
|
|
foreach k [lsort [array names aCase]] {
|
|
set ifstmts ""
|
|
foreach I $aCase($k) {
|
|
set zSuffix [lindex $I 0] ;# Suffix text for this rule
|
|
set zRep [lindex $I 2] ;# Replacement text for rule
|
|
set xCond [lindex $I 1] ;# Condition callback (or "")
|
|
|
|
set n [string length $zSuffix]
|
|
set nRep [string length $zRep]
|
|
|
|
set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
|
|
set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
|
|
set cond "${xCond}(aBuf, nBuf-$n)"
|
|
|
|
set bMemcpy [expr {$nRep>0}]
|
|
set bCond [expr {$xCond!=""}]
|
|
set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
|
|
|
|
set t $T(if_${bCond}_${bMemcpy}_${bRet})
|
|
lappend ifstmts [string trim [subst -nocommands $t]]
|
|
}
|
|
|
|
set ifstmts [join $ifstmts "else "]
|
|
|
|
append switchbody [subst -nocommands $T(case)]
|
|
}
|
|
|
|
|
|
puts [subst -nocommands $T(function)]
|
|
}
|
|
|
|
|
|
puts [string trim {
|
|
/**************************************************************************
|
|
***************************************************************************
|
|
** GENERATED CODE STARTS HERE (mkportersteps.tcl)
|
|
*/
|
|
}]
|
|
foreach step [array names O] {
|
|
create_step_function $step $O($step)
|
|
}
|
|
puts [string trim {
|
|
/*
|
|
** GENERATED CODE ENDS HERE (mkportersteps.tcl)
|
|
***************************************************************************
|
|
**************************************************************************/
|
|
}]
|
|
|
|
|
|
|