spell(1), from OpenBSD.

This code came originally from v7/32v and thence from 4.4BSD. It was
freed by Caldera. Todd Miller cleaned up and ANSIfied the code, and
then changed it to use the mmap/binary search algorithm for looking up
words in the dictionary that look(1) uses, replacing the hash based
lookups which were faster but broken by the size of the current
dictionary.

I've done a teeny bit of additional cleanup and replaced Todd's ksh
spell(1) script with a /bin/sh script, and re-structured the code to
follow the bsd makefile way, with one executable per directory.

I also added a TODO list recommending a bunch of kinds of cleanup.

The code is, frankly, awful. It was fine in the 1970s, a time of much
more limited resources and tastes, but the world has moved on a bunch
since then. The reason for pulling this in at all is that it will make
it much easier to check in-tree documentation for spelling errors
automatically.
This commit is contained in:
perry 2005-06-29 21:06:12 +00:00
parent cd4e76b8f7
commit fcc96823f6
14 changed files with 4845 additions and 0 deletions

5
usr.bin/spell/Makefile Normal file
View File

@ -0,0 +1,5 @@
# $NetBSD: Makefile,v 1.1 2005/06/29 21:06:12 perry Exp $
SUBDIR= spell spellprog
.include <bsd.subdir.mk>

View File

@ -0,0 +1,7 @@
# $NetBSD: Makefile.inc,v 1.1 2005/06/29 21:06:12 perry Exp $
WARNS?= 0
WFORMAT?= 2
BINDIR?=/usr/bin
#.include "../Makefile.inc"

23
usr.bin/spell/TODO Normal file
View File

@ -0,0 +1,23 @@
$NetBSD: TODO,v 1.1 2005/06/29 21:06:12 perry Exp $
Things to do with spell(1)
a) This code needs to be made WARNSable. Right now it isn't.
b) Make sure that we are very compatible with the SVID and Solaris
versions of the command (since those are the best models we have,
there being no SuS spec.)
c) Perhaps the last BSD version of /usr/dict/words that was 32V
derived should be merged in to the modern /usr/dict/words file. It
has a bunch of useful additions over the original 32V file etc.
d) The code should be commented and cleaned. The code style is very
1970s.
e) The way this works is bad. All the rules of how to deal with
spelling, suffixes, etc, are hard-coded in. That made sense in v7,
but it doesn't any more.
f) The word search algorithm might be made better. millert turned it
into a binary search from the orignal hash mechanism.
g) Also, the general mode that this program works in is VERY 1970s --
it just spits out a list of words that are misspelled without
context. By merging spell and spellprog, and doing a wee bit of
hacking, it should be possible to do things like complaining about
the line number that things are misspelled on etc.

View File

@ -0,0 +1,19 @@
# $NetBSD: Makefile,v 1.1 2005/06/29 21:06:12 perry Exp $
MAN= spell.1
SCRIPTS=spell.sh
BINDIR= /usr/bin
SPECIALFILES= special.math special.netbsd
FILES= american british stop ${SPECIALFILES}
FILESMODE= ${NONBINMODE}
FILESDIR= /usr/share/dict
.for F in ${SPECIALFILES}
FILESNAME_${F}= ${F:S/special.//}
FILESDIR_${F}= /usr/share/dict/special
.endfor
.include <bsd.prog.mk>

View File

@ -0,0 +1,353 @@
acknowledgment
aggrandize
aluminize
aluminum
amor
amorous
amphitheater
analog
analyze
anemia
anemic
anesthesia
anesthetic
anesthetize
anglicize
antagonize
apologize
appareled
appareling
appetize
arbor
archeology
Archeopteryx
ardor
arithmetize
armor
armory
axiomatize
baptize
barreled
barreling
behavior
behavioral
behoove
belabor
beveled
beveler
beveling
canceled
canceler
canceling
candor
carburetor
catalog
catalyze
catechize
categorize
cauterize
center
centimeter
channeled
channeler
channeling
chiseled
chiseler
chiseling
clamor
clamorous
clangor
cognizant
cognize
color
colorable
colorful
corbeled
corbeling
counseled
counseling
crystallize
cudgeled
cudgeler
cudgeling
decentralize
decolonize
decriminalize
defense
dehumanize
deionize
demagnetize
demeanor
demineralize
demoralize
demythologize
depersonalize
depolarize
desensitize
destabilize
desynchronize
detribalize
dialog
dialyze
diarrhea
dichotomize
discolor
disemboweled
disemboweling
disfavor
disheveled
disheveler
disheveling
dishonor
dishonorable
disorganize
doweled
doweler
doweling
dramatize
dueled
dueler
dueling
duelist
economize
ecumenical
edema
emphasize
enameled
enameling
enamor
encyclopedia
endeavor
energize
eon
epicenter
epilog
esophagus
esthetic
eulogize
favor
favorable
favorite
fervor
fiber
flavor
fraternize
fueled
fueler
fueling
funneled
funneler
funneling
furor
galvanize
gaveled
gaveler
gaveling
glamorize
gram
graveled
graveling
gray
graybeard
graywacke
groveled
groveler
groveling
gynecology
harbor
harmonize
hemoglobin
hemolytic
hemophilia
hemophiliac
hemorrhage
hemorrhoid
hemosiderin
hiccup
hiccupped
hiccupping
homeopathy
homogenize
homolog
honor
honorable
humor
hydrolyze
hypnotize
hypostatize
hypothesize
jail
jeweled
jeweler
jeweling
judgment
kilogram
kinesthesis
kinesthetic
labeled
labeler
labeling
labor
laborite
legitimize
leukemia
leveled
leveler
leveling
libeled
libeler
libeling
license
liter
logorrhea
louver
luster
marveled
marveler
marveling
mechanize
medieval
memorize
mesmerize
metallize
milligram
milliliter
millimeter
modeled
modeler
modeling
nanogram
nanometer
naught
neighbor
neighborhood
notarize
ocher
odor
offense
optimize
orientation
ostracize
pajama
paleontology
pallor
paneled
paneling
paralleled
paralleling
paralyze
parametrize
parceled
parceler
parceling
parenthesize
parlor
pasteurize
peptize
photolyze
photosynthesize
picogram
plagiarize
platinize
program
prolog
proselytize
psychoanalyze
pulverize
pummeled
pummeler
pummeling
pyorrhea
pyrolyze
quantize
quarreled
quarreler
quarreling
radiosterilize
rancor
raveled
raveler
raveling
realize
recognize
reconnoiter
reveled
reveler
reveling
rigor
rumor
saber
saltpeter
savior
savor
savory
scepter
schematize
scrutinize
sensitize
sepulcher
shoveled
shoveler
shoveling
shriveled
shriveling
siphon
sniveled
sniveler
sniveling
soliloquize
specialty
specter
spirochete
splendor
squirreled
squirreling
stigmatize
succor
summarize
swiveled
swiveling
symmetrize
sympathize
synchronize
synthesize
systematize
tantalize
tasseled
tasseling
temporize
theater
theatergoer
theatergoing
theorize
tinseled
tinseling
titer
topologize
toweled
toweling
trammeled
traumatize
traveled
traveler
traveling
travelog
tricolor
tumor
tunneled
tunneler
tunneling
tyrannize
valor
vapor
varicolored
vigor
vulcanize
wagon
watercolor
weaseled
weaseling
whiskey
woolen
yodeled
yodeling

353
usr.bin/spell/spell/british Normal file
View File

@ -0,0 +1,353 @@
aeon
aerodrome
aeroplane
aggrandise
alarum
aluminise
aluminium
amour
amourous
amphitheatre
anaemia
anaemic
anaesthesia
anaesthetic
anaesthetise
analyse
anglicise
antagonise
apologise
apparelled
apparelling
appetise
arbour
archaeology
Archaeopteryx
ardour
arithmetise
armour
armoury
axiomatise
baptise
barrelled
barrelling
behaviour
behavioural
behove
belabour
bevelled
beveller
bevelling
cancelled
canceller
cancelling
candour
carburettor
catalyse
catechise
categorise
cauterise
centimetre
centre
channelled
channeller
channelling
cheque
chiselled
chiseller
chiselling
clamour
clamourous
clangour
clew
cognisant
cognise
colour
colourable
colourful
connexion
corbelled
corbelling
counselled
counselling
crystallise
cudgelled
cudgeller
cudgelling
decentralise
decolonise
decriminalise
defence
deflexion
dehumanise
deionise
demagnetise
demeanour
demineralise
demoralise
demythologise
depersonalise
depolarise
desensitise
destabilise
desynchronise
detribalise
dialyse
diarrhoea
dichotomise
discolour
disembowelled
disembowelling
disfavour
dishevelled
disheveller
dishevelling
dishonour
dishonourable
disorganise
dowelled
doweller
dowelling
dramatise
duelled
dueller
duelling
duellist
economise
emphasise
enamelled
enamelling
enamour
encyclopaedia
endeavour
energise
epicentre
eulogise
favour
favourable
favourite
fervour
fibre
flavour
fraternise
fuelled
fueller
fuelling
funnelled
funneller
funnelling
furore
fuze
galvanise
gaol
gavelled
gaveller
gavelling
glamourise
gramme
gravelled
gravelling
greybeard
greywacke
grovelled
groveller
grovelling
gynaecology
haemoglobin
haemolytic
haemophilia
haemophiliac
haemorrhage
haemorrhoid
haemosiderin
harbour
harmonise
hiccough
homoeopathy
homogenise
honour
honourable
humour
hydrolyse
hypnotise
hypostatise
hypothesise
inflexion
jewelled
jeweller
jewelling
kilogramme
kinaesthesis
kinaesthetic
labelled
labeller
labelling
labour
labourite
legitimise
leukaemia
levelled
leveller
levelling
libelled
libeller
libelling
licence
litre
logorrhoea
louvre
lustre
marvelled
marveller
marvelling
mechanise
mediaeval
memorise
mesmerise
metallise
metre
milligramme
millilitre
millimetre
modelled
modeller
modelling
nanogramme
nanometre
neighbour
neighbourhood
notarise
ochre
odour
oecumenical
oedema
oesophagus
offence
optimise
orientate
ostracise
pallour
panelled
panelling
parallelled
parallelling
paralyse
parametrise
parcelled
parceller
parcelling
parenthesise
parlour
pasteurise
peptise
photolyse
photosynthesise
picogramme
plagiarise
platinise
practise
programme
proselytise
psychoanalyse
pulverise
pummelled
pummeller
pummelling
pyjama
pyorrhoea
pyrolyse
quantise
quarrelled
quarreller
quarrelling
radiosterilise
rancour
ravelled
raveller
ravelling
realise
recognise
reconnoitre
reflexion
revelled
reveller
revelling
rigour
rumour
sabre
saltpetre
saviour
savour
savoury
sceptre
schematise
scrutinise
sensitise
sepulchre
shew
shovelled
shoveller
shovelling
shrivelled
shrivelling
snivelled
sniveller
snivelling
soliloquise
speciality
spectre
splendour
squirrelled
squirrelling
stigmatise
succour
summarise
swede
swivelled
swivelling
symmetrise
sympathise
synchronise
synthesise
syphon
systematise
tantalise
tasselled
tasselling
temporise
theatre
theatregoer
theatregoing
theorise
tinselled
tinselling
titre
topologise
towelled
towelling
trammelled
traumatise
travelled
traveller
travelling
tricolour
tumour
tunnelled
tunneller
tunnelling
tyrannise
tyre
valour
vapour
varicoloured
vigour
vulcanise
waggon
watercolour
weaselled
weaselling
whilst
whisky
woollen
yodelled
yodelling

View File

@ -0,0 +1,248 @@
Ab
abelian
Abhyankar
adele
adic
a.e
Aequationes
Alaoglu
Amitsur
A.M.S
AMS
Apostol
Arcata
artin
artinian
ary
Arzela
Ascoli
Atiyah
Auslander
automorphic
Azumaya
Baer
Baire
barycenter
Beltrami
bialgebra
bicommutant
bicontinuous
bifunctor
biinvariant
bimodule
Birkhauser
Birkhoff
bivariant
Bolyai
Borel
Bott
Brouwer
Brouwerian
Bures
Cartan
cartesian
Cartier
Chern
Chevalley
Choleski
Civita
Clebsch
clopen
coaction
coaddition
coalgebra
coassociative
cobordant
cobordism
cochain
cocommutative
cocomplete
coderivative
codiagonal
codimension
cofinal
cofinite
cogenerate
cogroup
cohomotopy
coideal
coidentity
coimage
coinfinite
coinverse
cokernel
colimit
comaximal
commutant
comodule
Comp
complexify
componentwise
composable
composita
compositum
comultiplication
contractible
copower
coprime
coprojection
coreflective
cosemigroup
counit
counital
Courant
Coxeter
Dedekind
Dekker
DeRham
determinantal
Dieudonne
Doklady
Dordrecht
eigenring
Eilenberg
epimorph
epimorphic
equicontinuity
equicontinuous
equivariant
Erdos
Esztergom
Fary
Feferman
Feynman
finitary
Formanek
Fraenkel
Frechet
Freyd
Frobenius
Fubini
Fundamenta
Gel'fand
GL
Goldie
Golod
Grothendieck
Halmos
Hensel
Herstein
Hironaka
Hochschild
Hochster
holomorphic
homotopic
Hopf
Hurewicz
IAS
idele
IHES
indiscrete
infinitary
involutive
Jategaonkar
Jonsson
Kac
Kaehler
Kan
Kaplansky
Kato
Kesthely
Kirillov
Kishinev
Knuth
Kolmogorov
Kostant
Krein
Krull
Kurosh
Kutta
Lawvere
Lefschetz
Levitzki
Liouville
Mal'cev
Martindale
Mathematicae
Mazur
meromorphic
metabelian
metaplectic
Milman
Milnor
mod
monodromy
monoidal
monomorphic
Morita
MOS
MSRI
nabla
Nagata
Nikodym
Noether
noetherian
nullstellensatz
Oberwolfach
Passman
pathwise
Plancherel
plethysm
Pontrjagin
prenex
Procesi
profinite
Prufer
PSL
PSU
quasiinverse
quasiinvertible
quasimultiplication
quasivariety
quaternion
quaternionic
Reidel
riemannian
Sard
Seidenberg
Seifert
Serre
sesquilinear
Sethian
sfield
Shafarevich
Shelah
simplices
SL
Sobolev
socle
Spivak
Steenrod
Steinitz
Stiefel
straightedge
tensorial
Thurston
Tietze
torsionfree
trinomial
tripleable
Tychonoff
ultrafiltral
umbral
unimodular
Urysohn
Vandermonde
Varadarajan
Verma
Waerden
Weil
Weyl
w.r.t
Yoneda
Zariski
Zassenhaus
Zermelo
zeroary
ZF
ZFC

File diff suppressed because it is too large Load Diff

264
usr.bin/spell/spell/spell.1 Normal file
View File

@ -0,0 +1,264 @@
.\" $NetBSD: spell.1,v 1.1 2005/06/29 21:06:12 perry Exp $
.\"
.\" derived from: OpenBSD: spell.1,v 1.6 2003/06/10 09:12:11 jmc Exp
.\"
.\" Copyright (C) 1985, 1993, 1994
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" Copyright (C) Caldera International Inc. 2001-2002.
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code and documentation must retain the above
.\" copyright notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed or owned by Caldera
.\" International, Inc.
.\" 4. Neither the name of Caldera International, Inc. nor the names of other
.\" contributors may be used to endorse or promote products derived from
.\" this software without specific prior written permission.
.\"
.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
.\" IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
.\" INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.\" @(#)spell.1 8.2 (Berkeley) 4/18/94
.\"
.Dd April 18, 1994
.Dt SPELL 1
.Os
.Sh NAME
.Nm spell
.Nd find spelling errors
.Sh SYNOPSIS
.Nm spell
.Op Fl biltvx
.Op Fl d Ar list
.Op Fl h Ar spellhist
.Oo
.Fl m
.Ar m | s | e | a | l
.Oc
.Op Fl s Ar stop
.Op + Ns Ar extra_list
.Op Ar
.Sh DESCRIPTION
.Nm spell
collects words from the named documents and looks them up in a spelling list.
Words that neither occur among nor are derivable (by applying certain
inflections, prefixes or suffixes) from words in the spelling list
are printed on the standard output.
.Pp
If no files are named, words are collected from the standard input.
.Nm
ignores most
.Xr troff 1 ,
.Xr tbl 1 ,
.Xr eqn 1
and
.Xr pic 1
constructions.
Copies of all output may be accumulated in the history file,
if one is specified.
.Pp
By default,
.Nm
(like
.Xr deroff 1 )
follows chains of included files (`.so' and `.nx' commands).
.Pp
The default spelling list is based on Webster's Second International
dictionary and should be fairly complete.
Words that appear in the
.Dq stop list
are immediately flagged as misspellings, regardless of whether or not
they exist in one of the word lists.
This helps filter out misspellings (e.g. thier=thy\-y+ier)
that would otherwise pass.
Additionally, the
.Pa british
file is also used as a stop list unless the
.Fl b
option is specified.
.Pp
Site administrators may add words to the local word list,
.Pa /usr/local/share/dict/words
or the local stop list,
.Pa /usr/local/share/dict/stop .
.Pp
All word (and stop) lists must be sorted in lexigraphical order
with case folded.
The simplest way to achieve this is to use
.Dq sort -df .
If the word files are incorrectly sorted,
.Nm
will not be able to operate correctly.
.Pp
The options are as follows:
.Bl -tag -width Ds
.It Fl b
Check British spelling.
Besides preferring
.Em centre , colour , speciality , travelled ,
etc., this option insists upon
.Fl ise
in words like
.Em standardise ,
Fowler and the OED to the contrary notwithstanding.
In this mode, American variants of words are added to the stop list.
.It Fl i
Instruct
.Xr deroff 1
to ignore `.so' and `.nx' commands.
.It Fl l
Use
.Xr delatex
instead of
.Xr deroff 1
if it is present on the system.
.It Fl t
Use
.Xr detex
instead of
.Xr deroff 1
if it is present on the system.
.It Fl v
Print all words not literally in the spelling list in addition to
plausible derivations from spelling list words.
.It Fl x
Print every plausible stem, prefixed with `='.
.It Fl d Ar word_list
Use the specified word list instead of the default system word list.
The word list must be sorted as specified above.
.It Fl h Ar spellhist
Store misspelled words in the specified history file.
The output of
.Li who -m
is appended to the history file after the list of misspelled words.
.It Fl m
Enable support for common
.Xr troff 1
macro packages; this option is passed verbatim to
.Xr deroff 1 .
The
.Fl m
option takes the following arguments:
.Bl -tag -width Ds
.It a
recognize
.Xr man 7
macros.
.It e
recognize
.Xr me 7
macros.
.It m
recognize
.Xr me 7
macros.
.It s
recognize
.Xr me 7
macros.
.It l
recognize
.Xr mm 7
macros and delete
.Xr mm 7
lists.
.El
.It Fl s Ar stop_list
Use the specified stop list instead of the default system stop list.
The stop list must be sorted as specified above.
.It + Ns Ar extra_list
Use
.Ar extra_list
in addition to the default word list.
The extra word list must be sorted as specified above.
.El
.Sh FILES
.Bl -tag -width /usr/local/share/dict/wordsxx -compact
.It Pa /usr/share/dict/words
Default spelling list
.It Pa /usr/share/dict/american
American spelling of certain words
.It Pa /usr/share/dict/british
British spelling of certain words
.It Pa /usr/share/dict/stop
Default stop list.
.It Pa /usr/local/share/dict/words
Local spelling list (optional)
.It Pa /usr/local/share/dict/stop
Local stop list (optional)
.It Pa /usr/libexec/spellprog
Binary executed by the shell script
.Pa /usr/bin/spell .
.El
.Sh SEE ALSO
.Xr deroff 1 ,
.Xr look 1 ,
.Xr sed 1 ,
.Xr sort 1 ,
.Xr tee 1 ,
.Xr troff 1
.Sh HISTORY
The
.Nm spell
command appeared in
.At v6 .
.Pp
Unlike historic versions, the
.Ox
.Nm
command does not use hashed word files.
Instead, it uses lexigraphically sorted files and the same technique as
.Xr look 1 .
.Sh BUGS
The spelling list lacks many technical terms; new installations will
probably wish to monitor the output for several months to gather local
additions.
.Pp
British spelling was done by an American.
.Pp
In
.Fl x
mode it would be nicer if the stems were grouped with the appropriate word.

View File

@ -0,0 +1,120 @@
#!/bin/sh
#
# $NetBSD: spell.sh,v 1.1 2005/06/29 21:06:12 perry Exp $
#
# Taken from:
# OpenBSD: spell.ksh,v 1.8 2004/02/02 22:36:50 fgsch Exp
#
# Converted to sh from ksh by Perry E. Metzger
#
# Copyright (c) 2001, 2003 Todd C. Miller <Todd.Miller@courtesan.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# Sponsored in part by the Defense Advanced Research Projects
# Agency (DARPA) and Air Force Research Laboratory, Air Force
# Materiel Command, USAF, under agreement number F39502-99-1-0512.
#
SPELLPROG=/usr/libexec/spellprog
DICT=/usr/share/dict/words
LOCAL_DICT=/usr/local/share/dict/words
STOP=/usr/share/dict/stop
LOCAL_STOP=/usr/local/share/dict/stop
AMERICAN=/usr/share/dict/american
BRITISH=/usr/share/dict/british
LANG=$AMERICAN
STOP_LANG=$BRITISH
EXTRA=
FLAGS=
DEROFF="deroff -w"
HISTFILE=
TMP=`mktemp /tmp/spell.XXXXXXXX` || exit 1
VTMP=
USAGE="usage: spell [-biltvx] [-d list] [-h spellhist] [-s stop] [+extra_list] [file ...]"
trap "rm -f $TMP $VTMP; exit 0" 0 1 2 15
# Use local word/stop lists if they exist
if [ -f $LOCAL_DICT ]; then
DICT="$DICT $LOCAL_DICT"
fi
if [ -f $LOCAL_STOP ]; then
STOP="$STOP $LOCAL_STOP"
fi
while getopts "biltvxd:h:m:s:" c; do
case $c in
b) LANG=$BRITISH
STOP_LANG=$AMERICAN
FLAGS="$FLAGS -b"
;;
i) DEROFF="$DEROFF -i"
;;
l) DEROFF="delatex"
;;
m) DEROFF="$DEROFF -m $OPTARG"
;;
t) DEROFF="detex"
;;
v) VTMP=`mktemp /tmp/spell.XXXXXXXX` || {
rm -f $TMP
exit 1
}
FLAGS="$FLAGS -v -o $VTMP"
;;
x) FLAGS="$FLAGS -x"
;;
d) DICT="$OPTARG"
LANG=
;;
s) STOP="$OPTARG"
STOP_LANG=
LOCAL_STOP=
;;
h) HISTFILE="$OPTARG"
;;
*) echo "$USAGE" 1>&2
exit 1
;;
esac
done
shift $(( $OPTIND - 1 ))
while test $# -ne 0; do
case "$1" in
+*) EXTRA="$EXTRA ${1#+}"
shift
;;
*) break
;;
esac
done
# Any parameters left are files to be checked, pass them to deroff
DEROFF="$DEROFF $@"
if [ -n "$HISTFILE" ]; then
$DEROFF | sort -u | $SPELLPROG -o $TMP $STOP $STOP_LANG | \
$SPELLPROG $FLAGS $DICT $LANG $EXTRA | sort -u -k1f - $TMP | \
tee -a $HISTFILE
who -m >> $HISTFILE
else
$DEROFF | sort -u | $SPELLPROG -o $TMP $STOP $STOP_LANG | \
$SPELLPROG $FLAGS $DICT $LANG $EXTRA | sort -u -k1f - $TMP
fi
if [ -n "$VTMP" ]; then
sort -u -k2f -k1 $VTMP
fi
exit 0

968
usr.bin/spell/spell/stop Normal file
View File

@ -0,0 +1,968 @@
abator
abeted
abeter
abeting
abuted
abuter
abuting
accessable
accompanyist
acquaince
acquiter
acquiting
acter
addendums
adly
admirality
admitable
admited
admiter
admiting
ahly
allotable
alloted
alloter
alloting
amly
andless
animadvertion
anly
annulable
annuled
annuler
annuling
ans
areless
Argentinan
arguement
arised
asly
assesser
Athenan
ation
ative
atly
ats
auditer
avered
averer
avering
avertion
awared
axises
axly
baned
baning
beared
beated
bedimed
bedimer
bediming
befited
befiter
befiting
befoged
befoger
befoging
begeted
begeter
begeting
begined
beginer
begining
beholded
beless
besetable
beseted
beseter
beseting
bespeaked
bestired
bestirer
bestiring
betted
bidded
bies
binded
bited
blader
bleeded
blowed
breaked
breeded
bringed
bursted
buyed
byly
Canadan
Carolinan
casted
catched
certainity
checksumable
checksumed
checksuming
choosed
clinged
collectable
collecter
comed
commitable
commited
commiter
commiting
compelable
compeled
compeler
compeling
compositer
compositon
compressable
compresser
conducter
coner
conferable
confered
conferer
confering
coning
connecter
conquerer
consigner
constricter
constructable
constructer
contemptable
contracter
contributer
controlable
controled
controler
controling
convertable
convertion
convokable
corpuses
correcter
corrigendums
corrodable
corruptable
counseler
crediter
creeped
crotchity
cruelity
crystalise
crystalize
curiousity
currance
currancy
curriculas
cutted
datas
datums
dealed
debared
debarer
debaring
debator
debter
debuged
debuger
debuging
decontroled
decontroler
decontroling
deductable
defecter
deferable
defered
deferer
defering
defication
deflator
deflecter
degased
degaser
degasing
degumed
degumer
deguming
demitable
demited
demiter
demiting
demured
demuring
denyable
depositer
depressable
depresser
desolator
destructer
detecter
deterable
detered
deterer
detering
determinancy
detracter
digestable
dimed
dimest
directer
discernable
discomfited
discomfiter
discomfiting
disintered
disinterer
disintering
dispelable
dispeled
dispeler
dispeling
dispence
divertion
doly
doned
doner
doning
drawed
drinked
drived
Dudly
duely
dus
dutyable
eated
eavesdroped
eavesdroper
eavesdroping
editer
effluvias
effluviums
ehly
ehs
ejecter
electer
elly
embedable
embeded
embeder
embeding
emitable
emited
emiter
emiting
emly
enaction
enbalm
enbank
enbark
enbattle
enbay
enbed
enbit
enblaze
enblazon
enbody
enboss
enbow
enbowel
enbrace
enbrittle
enbroil
encant
encur
endebted
enend
enflame
enform
engin
enirate
enit
enly
enpanel
enpathetic
enplace
enplane
enploy
enpower
enpress
enpurple
enroach
enroad
entend
enterity
entier
entirity
entone
envoice
envyable
equilibriums
equipable
equiped
equiper
equiping
erodable
errer
esophaguses
estoped
estoper
estoping
etly
ets
evokable
exasperator
exceled
exceler
exceling
exegesises
exhaustable
exhibiter
expection
expelable
expeled
expeler
expeling
expositer
expressable
extendable
extolable
extoled
extoler
extoling
extracter
extremums
extrovertion
facter
fadded
fadding
fallable
falled
feeded
feeled
fifity
fighted
finded
flamable
flexable
flinged
flirtion
Floridan
fluter
forbeared
forbided
foreage
forebade
forebid
forebidden
forebidding
forebore
foreborn
foreborne
foreensic
foreest
foreever
forefend
forefit
foregave
foreget
foregettable
foregetting
foregive
foregiven
foregot
foregotten
foremat
foremate
forematted
forematting
foremica
foresake
foreswear
foreswore
foresworn
foretress
foretune
forevery
foreward
forgetable
forgeted
forless
forsaked
fraility
freezed
froliced
frolicing
frustrator
gayity
generousity
genesises
genuses
getless
getted
Ghanan
giddaped
giddaper
giddaping
gived
goly
gos
gotless
governer
grammer
grimer
grimest
grinded
growed
guility
gullable
hadless
haly
hasless
haveless
heared
hely
hesitator
holded
huggest
huging
humbility
hurted
ifless
ifly
iis
impartion
impelable
impeled
impeler
impeling
importion
impressable
impugnity
incured
incurer
incuring
Indochinan
inducter
infered
inferer
infering
inflamable
inflator
ingestable
inheriter
inless
inly
inspecter
instructer
intence
interable
intercepter
intered
interer
intering
interruptable
intimator
inventer
invertable
invertion
invester
invokable
isless
isly
itly
ivly
ivs
ixes
ixly
jurer
juter
keeped
kiloohm
knowed
laly
lammest
lapeled
layed
lended
letted
licenser
loaned
locomoter
loging
loly
losed
loyality
luminousity
mader
madest
mading
maked
maly
maner
maning
manumitable
manumited
manumiter
manumiting
mared
meaned
meeted
mely
metafor
metafore
mies
mily
monstrousity
muchless
multiplexer
myly
nely
neurosises
nicity
noding
noless
noly
novelity
nuly
objecter
occured
occurer
occuring
offsetable
offseted
offseter
offseting
ofless
ofly
ofs
ohly
ons
oppresser
opuses
orless
outin
outof
outthat
outthe
outto
overthe
overto
ows
oxes
oxly
pairity
paly
paner
paniced
panicing
paralysises
parenthesises
paster
payed
pennance
pepperment
perceptable
perfectable
permitable
permited
permiter
permiting
pervertion
phenomenas
phenomenons
photosynthesises
piity
pipper
pityable
placator
plumer
plummer
plyable
pompousity
porousity
possesser
postprocesser
predicter
preempter
prefered
preferer
prefering
prerequite
presense
presuably
presure
processer
professer
programable
programed
programer
programing
projecter
propagandaist
propeled
propeler
propeling
properity
prosecuter
prospecter
protecter
publical
purveyer
quitable
quiter
quiting
rackity
ratter
readed
reah
realter
rean
reas
reat
reax
rebat
rebe
rebeled
rebeler
rebeling
rebutable
rebuted
rebuter
rebuting
reby
recal
recapable
recaped
recaper
recaping
recloth
recommitable
recommited
recommiter
recommiting
recured
recurer
recuring
redacter
reden
redish
redu
reem
reen
reet
refered
referer
refering
reflecter
rego
regretable
regreted
regreter
regreting
reguard
reha
rehe
rehi
reho
reif
reii
reis
reit
reiv
reix
rela
religiousity
relo
rema
remad
remaned
remaner
remaning
reme
remedyable
remi
remitable
remited
remiter
remiting
remy
rended
renoun
renu
reof
reoh
reon
reor
reow
reox
repa
repelable
repeled
repeler
repeling
repi
rere
rerout
rerunable
reruned
reruner
reruning
resa
reshiped
reshiper
reshiping
resistable
reso
reti
reto
reup
revertion
revi
revisor
revokable
rewe
rewok
rexi
reye
rickity
rided
rised
rocketed
rocketer
rocketing
royality
runable
runed
runned
saging
saly
saturator
sayed
scrupulousity
sculpter
secter
seeked
selecter
selled
senation
senative
sended
senser
setted
shaked
shedded
shelfs
shily
shooted
shrinked
shutted
siner
sining
sinked
siply
sitted
sixity
slayed
sleeped
slided
slightlyless
slinged
slinked
smited
soliciter
soly
somewhatless
sovereignity
speaked
spearment
spended
spinable
spiner
spining
spinned
spiting
splitted
spreaded
stagged
standed
stealed
sticked
stinked
stratas
strictlyless
strided
striked
submitable
submited
submiter
submiting
subtlity
successer
suggestable
suiter
suntaned
suntaning
suppressable
surity
surveyer
suspenser
suzerainity
sweared
sweeped
swinged
synopsises
synthesises
taked
teached
teching
telled
theless
thesises
thier
thinked
thrombosises
throwed
thrusted
tiner
tining
toly
tracter
traiter
transfered
transferer
transfering
transgresser
transmitable
transmited
transmiter
transmiting
transportion
trivias
triviums
truely
typeseter
typeseting
unactivate
unadequacy
unattention
unboard
unbreed
uncant
uncapacity
uncompletion
uncorporate
uncrease
uncredulity
unculpable
uncur
uncurred
uncurrer
uncurring
undebt
undeed
undefinite
undelicate
undent
undenture
undermind
underthat
underthe
underthis
underwhich
undices
undignity
undiscriminate
undisposition
undoor
unduct
undwell
unefficacy
unequity
unfamous
unfelicity
unfest
unfield
unfiltrate
unfinity
unflame
unflammable
unflow
unfluence
unflux
unformant
unformation
unfuse
unfusion
ungather
ungrate
ungratitude
unhabitant
unhabitation
unhale
unhere
unholding
unhumane
unhumanity
unjure
unjury
unnumerable
unoperable
unput
unquest
unscribe
unscription
unsect
unside
unspire
unstall
unstance
unstead
untact
untake
untemperance
untend
untestate
untill
untolerant
untuition
unvade
unvalidate
unvent
unverse
unversion
unvertebrate
unviolate
unvocate
unward
unwieldly
uply
upseter
upseting
usly
usses
varyable
vendable
Viennan
virtuousity
viscousity
visiter
warer
wasless
weared
weeped
wely
wereless
wharfs
whitter
whitting
wifes
winable
winned
withless
wringed
writed
yiper

View File

@ -0,0 +1,8 @@
# $NetBSD: Makefile,v 1.1 2005/06/29 21:06:12 perry Exp $
PROG= spellprog
NOMAN= 1
SRCS= spellprog.c look.c
BINDIR= /usr/libexec
.include <bsd.prog.mk>

View File

@ -0,0 +1,183 @@
/* $NetBSD: look.c,v 1.1 2005/06/29 21:06:12 perry Exp $ */
/* derived from: OpenBSD: look.c,v 1.3 2003/06/03 02:56:16 millert Exp */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* David Hitz of Auspex Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
#if 0
static const char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95";
#endif
static const char rcsid[] = "$NetBSD: look.c,v 1.1 2005/06/29 21:06:12 perry Exp $";
#endif /* not lint */
#include <sys/types.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <err.h>
u_char *binary_search(u_char *, u_char *, u_char *);
u_char *linear_search(u_char *, u_char *, u_char *);
int compare(u_char *, u_char *, u_char *);
int look(u_char *, u_char *, u_char *);
int
look(u_char *string, u_char *front, u_char *back)
{
u_char *s;
/* Convert string to lower case before searching. */
for (s = string; *s; s++) {
if (isupper(*s))
*s = _tolower(*s);
}
front = binary_search(string, front, back);
front = linear_search(string, front, back);
return (front != NULL);
}
/*
* Binary search for "string" in memory between "front" and "back".
*
* This routine is expected to return a pointer to the start of a line at
* *or before* the first word matching "string". Relaxing the constraint
* this way simplifies the algorithm.
*
* Invariants:
* front points to the beginning of a line at or before the first
* matching string.
*
* back points to the beginning of a line at or after the first
* matching line.
*
* Base of the Invariants.
* front = NULL;
* back = EOF;
*
* Advancing the Invariants:
*
* p = first newline after halfway point from front to back.
*
* If the string at "p" is not greater than the string to match,
* p is the new front. Otherwise it is the new back.
*
* Termination:
*
* The definition of the routine allows it return at any point,
* since front is always at or before the line to print.
*
* In fact, it returns when the chosen "p" equals "back". This
* implies that there exists a string is least half as long as
* (back - front), which in turn implies that a linear search will
* be no more expensive than the cost of simply printing a string or two.
*
* Trying to continue with binary search at this point would be
* more trouble than it's worth.
*/
#define SKIP_PAST_NEWLINE(p, back) \
while (p < back && *p++ != '\n');
u_char *
binary_search(u_char *string, u_char *front, u_char *back)
{
u_char *p;
p = front + (back - front) / 2;
SKIP_PAST_NEWLINE(p, back);
/*
* If the file changes underneath us, make sure we don't
* infinitely loop.
*/
while (p < back && back > front) {
if (compare(string, p, back) > 0)
front = p;
else
back = p;
p = front + (back - front) / 2;
SKIP_PAST_NEWLINE(p, back);
}
return (front);
}
/*
* Find the first line that matches string, linearly searching from front
* to back.
*
* Return NULL for no such line.
*
* This routine assumes:
*
* o front points at the first character in a line.
* o front is before or at the first line to be printed.
*/
u_char *
linear_search(u_char *string, u_char *front, u_char *back)
{
int result;
while (front < back) {
result = compare(string, front, back);
if (result == 0)
return (front); /* found it */
if (result < 0)
return (NULL); /* not there */
SKIP_PAST_NEWLINE(front, back);
}
return (NULL);
}
int
compare(u_char *s1, u_char *s2, u_char *back)
{
int ch;
/* Note that s1 is already upper case. */
for (;; ++s1, ++s2) {
if (*s2 == '\n' || s2 == back)
ch = '\0';
else if (isupper(*s2))
ch = _tolower(*s2);
else
ch = *s2;
if (*s1 != ch)
return (*s1 - ch);
if (ch == '\0')
return (0);
}
}

View File

@ -0,0 +1,812 @@
/* $NetBSD: spellprog.c,v 1.1 2005/06/29 21:06:12 perry Exp $ */
/* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)spell.h 8.1 (Berkeley) 6/6/93
*/
/*
* Copyright (C) Caldera International Inc. 2001-2002.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code and documentation must retain the above
* copyright notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed or owned by Caldera
* International, Inc.
* 4. Neither the name of Caldera International, Inc. nor the names of other
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
* INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
* INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef lint
static const char copyright[] =
"@(#) Copyright (c) 1991, 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
#if 0
static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93";
#else
#endif
static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
#endif /* not lint */
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define DLEV 2
int an(char *, char *, char *, int);
int bility(char *, char *, char *, int);
int es(char *, char *, char *, int);
int dict(char *, char *);
int i_to_y(char *, char *, char *, int);
int ily(char *, char *, char *, int);
int ize(char *, char *, char *, int);
int metry(char *, char *, char *, int);
int monosyl(char *, char *);
int ncy(char *, char *, char *, int);
int nop(void);
int trypref(char *, char *, int);
int tryword(char *, char *, int);
int s(char *, char *, char *, int);
int strip(char *, char *, char *, int);
int suffix(char *, int);
int tion(char *, char *, char *, int);
int vowel(int);
int y_to_e(char *, char *, char *, int);
int CCe(char *, char *, char *, int);
int VCe(char *, char *, char *, int);
char *lookuppref(char **, char *);
char *skipv(char *);
char *estrdup(const char *);
void ise(void);
void print_word(FILE *);
void ztos(char *);
__dead void usage(void);
/* from look.c */
int look(unsigned char *, unsigned char *, unsigned char *);
struct suftab {
char *suf;
int (*p1)(); /* XXX - variable args */
int n1;
char *d1;
char *a1;
int (*p2)(); /* XXX - variable args */
int n2;
char *d2;
char *a2;
} suftab[] = {
{"ssen", ily, 4, "-y+iness", "+ness" },
{"ssel", ily, 4, "-y+i+less", "+less" },
{"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
{"s'", s, 2, "", "+'s"},
{"s", s, 1, "", "+s"},
{"ecn", ncy, 1, "", "-t+ce"},
{"ycn", ncy, 1, "", "-cy+t"},
{"ytilb", nop, 0, "", ""},
{"ytilib", bility, 5, "-le+ility", ""},
{"elbaif", i_to_y, 4, "-y+iable", ""},
{"elba", CCe, 4, "-e+able", "+able"},
{"yti", CCe, 3, "-e+ity", "+ity"},
{"ylb", y_to_e, 1, "-e+y", ""},
{"yl", ily, 2, "-y+ily", "+ly"},
{"laci", strip, 2, "", "+al"},
{"latnem", strip, 2, "", "+al"},
{"lanoi", strip, 2, "", "+al"},
{"tnem", strip, 4, "", "+ment"},
{"gni", CCe, 3, "-e+ing", "+ing"},
{"reta", nop, 0, "", ""},
{"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
{"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
{"citsi", strip, 2, "", "+ic"},
{"cihparg", i_to_y, 1, "-y+ic", ""},
{"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
{"cirtem", i_to_y, 1, "-y+ic", ""},
{"yrtem", metry, 0, "-ry+er", ""},
{"cigol", i_to_y, 1, "-y+ic", ""},
{"tsigol", i_to_y, 2, "-y+ist", ""},
{"tsi", VCe, 3, "-e+ist", "+ist"},
{"msi", VCe, 3, "-e+ism", "+ist"},
{"noitacif", i_to_y, 6, "-y+ication", ""},
{"noitazi", ize, 5, "-e+ation", ""},
{"rota", tion, 2, "-e+or", ""},
{"noit", tion, 3, "-e+ion", "+ion"},
{"naino", an, 3, "", "+ian"},
{"na", an, 1, "", "+n"},
{"evit", tion, 3, "-e+ive", "+ive"},
{"ezi", CCe, 3, "-e+ize", "+ize"},
{"pihs", strip, 4, "", "+ship"},
{"dooh", ily, 4, "-y+hood", "+hood"},
{"ekil", strip, 4, "", "+like"},
{ NULL }
};
char *preftab[] = {
"anti",
"bio",
"dis",
"electro",
"en",
"fore",
"hyper",
"intra",
"inter",
"iso",
"kilo",
"magneto",
"meta",
"micro",
"milli",
"mis",
"mono",
"multi",
"non",
"out",
"over",
"photo",
"poly",
"pre",
"pseudo",
"re",
"semi",
"stereo",
"sub",
"super",
"thermo",
"ultra",
"under", /* must precede un */
"un",
NULL
};
struct wlist {
int fd;
unsigned char *front;
unsigned char *back;
} *wlists;
int vflag;
int xflag;
char word[LINE_MAX];
char original[LINE_MAX];
char *deriv[40];
char affix[40];
/*
* The spellprog utility accepts a newline-delimited list of words
* on stdin. For arguments it expects the path to a word list and
* the path to a file in which to store found words.
*
* In normal usage, spell is called twice. The first time it is
* called with a stop list to flag commonly mispelled words. The
* remaining words are then passed to spell again, this time with
* the dictionary file as the first (non-flag) argument.
*
* Unlike historic versions of spellprog, this one does not use
* hashed files. Instead it simply requires that files be sorted
* lexigraphically and uses the same algorithm as the look utility.
*
* Note that spellprog should be called via the spell shell script
* and is not meant to be invoked directly by the user.
*/
int
main(int argc, char **argv)
{
char *ep, *cp, *dp;
char *outfile;
int ch, fold, i;
struct stat sb;
FILE *file, *found;
setlocale(LC_ALL, "");
outfile = NULL;
while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
switch (ch) {
case 'b':
/* Use British dictionary and convert ize -> ise. */
ise();
break;
case 'o':
outfile = optarg;
break;
case 'v':
/* Also write derivations to "found" file. */
vflag++;
break;
case 'x':
/* Print plausible stems to stdout. */
xflag++;
break;
default:
usage();
}
}
argc -= optind;
argv += optind;
if (argc < 1)
usage();
/* Open and mmap the word/stop lists. */
if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
err(1, "malloc");
for (i = 0; argc--; i++) {
wlists[i].fd = open(argv[i], O_RDONLY, 0);
if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
err(1, "%s", argv[i]);
if (sb.st_size > SIZE_T_MAX)
errx(1, "%s: %s", argv[i], strerror(EFBIG));
wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
MAP_PRIVATE, wlists[i].fd, (off_t)0);
if (wlists[i].front == MAP_FAILED)
err(1, "%s", argv[i]);
wlists[i].back = wlists[i].front + sb.st_size;
}
wlists[i].fd = -1;
/* Open file where found words are to be saved. */
if (outfile == NULL)
found = NULL;
else if ((found = fopen(outfile, "w")) == NULL)
err(1, "cannot open %s", outfile);
for (;; print_word(file)) {
affix[0] = '\0';
file = found;
for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
if (ep - word == sizeof(word) - 1) {
*ep = '\0';
warnx("word too long (%s)", word);
while ((ch = getchar()) != '\n')
; /* slurp until EOL */
}
if (ch == EOF) {
if (found != NULL)
fclose(found);
exit(0);
}
}
for (cp = word, dp = original; cp < ep; )
*dp++ = *cp++;
*dp = '\0';
fold = 0;
for (cp = word; cp < ep; cp++)
if (islower((unsigned char)*cp))
goto lcase;
if (trypref(ep, ".", 0))
continue;
++fold;
for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
*dp = tolower((unsigned char)*cp);
lcase:
if (trypref(ep, ".", 0) || suffix(ep, 0))
continue;
if (isupper((unsigned char)word[0])) {
for (cp = original, dp = word; (*dp = *cp++); dp++) {
if (fold)
*dp = tolower((unsigned char)*dp);
}
word[0] = tolower((unsigned char)word[0]);
goto lcase;
}
file = stdout;
}
exit(0);
}
void
print_word(FILE *f)
{
if (f != NULL) {
if (vflag && affix[0] != '\0' && affix[0] != '.')
fprintf(f, "%s\t%s\n", affix, original);
else
fprintf(f, "%s\n", original);
}
}
/*
* For each matching suffix in suftab, call the function associated
* with that suffix (p1 and p2).
*/
int
suffix(char *ep, int lev)
{
struct suftab *t;
char *cp, *sp;
lev += DLEV;
deriv[lev] = deriv[lev-1] = 0;
for (t = suftab; (sp = t->suf); t++) {
cp = ep;
while (*sp) {
if (*--cp != *sp++)
goto next;
}
for (sp = cp; --sp >= word && !vowel(*sp);)
; /* nothing */
if (sp < word)
return (0);
if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
return (1);
if (t->p2 != NULL) {
deriv[lev] = deriv[lev+1] = '\0';
return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
}
return (0);
next: ;
}
return (0);
}
int
nop(void)
{
return (0);
}
int
strip(char *ep, char *d, char *a, int lev)
{
return (trypref(ep, a, lev) || suffix(ep, lev));
}
int
s(char *ep, char *d, char *a, int lev)
{
if (lev > DLEV + 1)
return (0);
if (*ep == 's' && ep[-1] == 's')
return (0);
return (strip(ep, d, a, lev));
}
int
an(char *ep, char *d, char *a, int lev)
{
if (!isupper((unsigned char)*word)) /* must be proper name */
return (0);
return (trypref(ep,a,lev));
}
int
ize(char *ep, char *d, char *a, int lev)
{
*ep++ = 'e';
return (strip(ep ,"", d, lev));
}
int
y_to_e(char *ep, char *d, char *a, int lev)
{
char c = *ep;
*ep++ = 'e';
if (strip(ep, "", d, lev))
return (1);
ep[-1] = c;
return (0);
}
int
ily(char *ep, char *d, char *a, int lev)
{
if (ep[-1] == 'i')
return (i_to_y(ep, d, a, lev));
else
return (strip(ep, d, a, lev));
}
int
ncy(char *ep, char *d, char *a, int lev)
{
if (skipv(skipv(ep-1)) < word)
return (0);
ep[-1] = 't';
return (strip(ep, d, a, lev));
}
int
bility(char *ep, char *d, char *a, int lev)
{
*ep++ = 'l';
return (y_to_e(ep, d, a, lev));
}
int
i_to_y(char *ep, char *d, char *a, int lev)
{
if (ep[-1] == 'i') {
ep[-1] = 'y';
a = d;
}
return (strip(ep, "", a, lev));
}
int
es(char *ep, char *d, char *a, int lev)
{
if (lev > DLEV)
return (0);
switch (ep[-1]) {
default:
return (0);
case 'i':
return (i_to_y(ep, d, a, lev));
case 's':
case 'h':
case 'z':
case 'x':
return (strip(ep, d, a, lev));
}
}
int
metry(char *ep, char *d, char *a, int lev)
{
ep[-2] = 'e';
ep[-1] = 'r';
return (strip(ep, d, a, lev));
}
int
tion(char *ep, char *d, char *a, int lev)
{
switch (ep[-2]) {
case 'c':
case 'r':
return (trypref(ep, a, lev));
case 'a':
return (y_to_e(ep, d, a, lev));
}
return (0);
}
/*
* Possible consonant-consonant-e ending.
*/
int
CCe(char *ep, char *d, char *a, int lev)
{
switch (ep[-1]) {
case 'l':
if (vowel(ep[-2]))
break;
switch (ep[-2]) {
case 'l':
case 'r':
case 'w':
break;
default:
return (y_to_e(ep, d, a, lev));
}
break;
case 's':
if (ep[-2] == 's')
break;
case 'c':
case 'g':
if (*ep == 'a')
return (0);
case 'v':
case 'z':
if (vowel(ep[-2]))
break;
case 'u':
if (y_to_e(ep, d, a, lev))
return (1);
if (!(ep[-2] == 'n' && ep[-1] == 'g'))
return (0);
}
return (VCe(ep, d, a, lev));
}
/*
* Possible consonant-vowel-consonant-e ending.
*/
int
VCe(char *ep, char *d, char *a, int lev)
{
char c;
c = ep[-1];
if (c == 'e')
return (0);
if (!vowel(c) && vowel(ep[-2])) {
c = *ep;
*ep++ = 'e';
if (trypref(ep, d, lev) || suffix(ep, lev))
return (1);
ep--;
*ep = c;
}
return (strip(ep, d, a, lev));
}
char *
lookuppref(char **wp, char *ep)
{
char **sp;
char *bp,*cp;
for (sp = preftab; *sp; sp++) {
bp = *wp;
for (cp = *sp; *cp; cp++, bp++) {
if (tolower((unsigned char)*bp) != *cp)
goto next;
}
for (cp = bp; cp < ep; cp++) {
if (vowel(*cp)) {
*wp = bp;
return (*sp);
}
}
next: ;
}
return (0);
}
/*
* If the word is not in the dictionary, try stripping off prefixes
* until the word is found or we run out of prefixes to check.
*/
int
trypref(char *ep, char *a, int lev)
{
char *cp;
char *bp;
char *pp;
int val = 0;
char space[20];
deriv[lev] = a;
if (tryword(word, ep, lev))
return (1);
bp = word;
pp = space;
deriv[lev+1] = pp;
while ((cp = lookuppref(&bp, ep))) {
*pp++ = '+';
while ((*pp = *cp++))
pp++;
if (tryword(bp, ep, lev+1)) {
val = 1;
break;
}
if (pp - space >= sizeof(space))
return (0);
}
deriv[lev+1] = deriv[lev+2] = '\0';
return (val);
}
int
tryword(char *bp, char *ep, int lev)
{
int i, j;
char duple[3];
if (ep-bp <= 1)
return (0);
if (vowel(*ep) && monosyl(bp, ep))
return (0);
i = dict(bp, ep);
if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
ep--;
deriv[++lev] = duple;
duple[0] = '+';
duple[1] = *ep;
duple[2] = '\0';
i = dict(bp, ep);
}
if (vflag == 0 || i == 0)
return (i);
/* Also tack on possible derivations. (XXX - warn on truncation?) */
for (j = lev; j > 0; j--) {
if (deriv[j])
strlcat(affix, deriv[j], sizeof(affix));
}
return (i);
}
int
monosyl(char *bp, char *ep)
{
if (ep < bp + 2)
return (0);
if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
return (0);
while (--ep >= bp)
if (vowel(*ep))
return (0);
return (1);
}
char *
skipv(char *s)
{
if (s >= word && vowel(*s))
s--;
while (s >= word && !vowel(*s))
s--;
return (s);
}
int
vowel(int c)
{
switch (tolower(c)) {
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
case 'y':
return (1);
}
return (0);
}
/*
* Crummy way to Britishise.
*/
void
ise(void)
{
struct suftab *tab;
for (tab = suftab; tab->suf; tab++) {
/* Assume that suffix will contain 'z' if a1 or d1 do */
if (strchr(tab->suf, 'z')) {
tab->suf = estrdup(tab->suf);
ztos(tab->suf);
if (strchr(tab->d1, 'z')) {
tab->d1 = estrdup(tab->d1);
ztos(tab->d1);
}
if (strchr(tab->a1, 'z')) {
tab->a1 = estrdup(tab->a1);
ztos(tab->a1);
}
}
}
}
void
ztos(char *s)
{
for (; *s; s++)
if (*s == 'z')
*s = 's';
}
char *
estrdup(const char *s)
{
char *d;
if ((d = strdup(s)) == NULL)
err(1, "strdup");
return (d);
}
/*
* Look up a word in the dictionary.
* Returns 1 if found, 0 if not.
*/
int
dict(char *bp, char *ep)
{
char c;
int i, rval;
c = *ep;
*ep = '\0';
if (xflag)
printf("=%s\n", bp);
for (i = rval = 0; wlists[i].fd != -1; i++) {
if ((rval = look((unsigned char *)bp, wlists[i].front,
wlists[i].back)) == 1)
break;
}
*ep = c;
return (rval);
}
__dead void
usage(void)
{
extern char *__progname;
fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
__progname);
exit(1);
}