2006-10-16 12:48:45 +04:00
|
|
|
.\" $NetBSD: bm.3,v 1.8 2006/10/16 08:48:45 wiz Exp $
|
2001-04-09 16:07:57 +04:00
|
|
|
.\"
|
1994-06-22 04:15:22 +04:00
|
|
|
.\" Copyright (c) 1994
|
|
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
|
|
.\"
|
|
|
|
.\" This code is derived from software contributed to Berkeley by
|
|
|
|
.\" Andrew Hume of AT&T Bell Laboratories.
|
|
|
|
.\"
|
|
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
|
|
.\" modification, are permitted provided that the following conditions
|
|
|
|
.\" are met:
|
|
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
|
|
.\" documentation and/or other materials provided with the distribution.
|
2003-08-07 20:42:00 +04:00
|
|
|
.\" 3. Neither the name of the University nor the names of its contributors
|
1994-06-22 04:15:22 +04:00
|
|
|
.\" may be used to endorse or promote products derived from this software
|
|
|
|
.\" without specific prior written permission.
|
|
|
|
.\"
|
|
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
.\" SUCH DAMAGE.
|
|
|
|
.\"
|
1994-06-22 04:17:04 +04:00
|
|
|
.\" from: @(#)bm.3 8.4 (Berkeley) 6/21/94
|
1994-06-22 04:15:22 +04:00
|
|
|
.\"
|
2001-04-09 16:07:57 +04:00
|
|
|
.Dd April 8, 2001
|
|
|
|
.Dt BM 3
|
|
|
|
.Os
|
|
|
|
.Sh NAME
|
|
|
|
.Nm bm_comp ,
|
|
|
|
.Nm bm_exec ,
|
|
|
|
.Nm bm_free
|
|
|
|
.Nd Boyer-Moore string search
|
|
|
|
.Sh LIBRARY
|
|
|
|
.Lb libc
|
|
|
|
.Sh SYNOPSIS
|
2003-04-16 17:34:34 +04:00
|
|
|
.In sys/types.h
|
|
|
|
.In bm.h
|
2001-04-09 16:07:57 +04:00
|
|
|
.Ft bm_pat *
|
|
|
|
.Fn bm_comp "u_char *pattern" "size_t patlen" "u_char freq[256]"
|
|
|
|
.Ft u_char *
|
|
|
|
.Fn bm_exec "bm_pat *pdesc" "u_char *text" "size_t len"
|
|
|
|
.Ft void
|
|
|
|
.Fn bm_free "bm_pat *pdesc"
|
|
|
|
.Sh DESCRIPTION
|
1994-06-22 04:15:22 +04:00
|
|
|
These routines implement an efficient mechanism to find an
|
|
|
|
occurrence of a byte string within another byte string.
|
2001-04-09 16:07:57 +04:00
|
|
|
.Pp
|
|
|
|
.Fn bm_comp
|
1994-06-22 04:15:22 +04:00
|
|
|
evaluates the
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa patlen
|
1994-06-22 04:15:22 +04:00
|
|
|
bytes starting at
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa pattern ,
|
1994-06-22 04:15:22 +04:00
|
|
|
and returns a pointer to a structure describing them.
|
|
|
|
The bytes referenced by
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa pattern
|
1994-06-22 04:15:22 +04:00
|
|
|
may be of any value.
|
2001-04-09 16:07:57 +04:00
|
|
|
.Pp
|
1994-06-22 04:15:22 +04:00
|
|
|
The search takes advantage of the frequency distribution of the
|
|
|
|
bytes in the text to be searched.
|
|
|
|
If specified,
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa freq
|
1994-06-22 04:15:22 +04:00
|
|
|
should be an array of 256 values,
|
|
|
|
with higher values indicating that the corresponding character occurs
|
|
|
|
more frequently.
|
|
|
|
(A less than optimal frequency distribution can only result in less
|
|
|
|
than optimal performance, not incorrect results.)
|
|
|
|
If
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa freq
|
2006-10-16 12:48:45 +04:00
|
|
|
is
|
|
|
|
.Dv NULL ,
|
1994-06-22 04:15:22 +04:00
|
|
|
a system default table is used.
|
2001-04-09 16:07:57 +04:00
|
|
|
.Pp
|
|
|
|
.Fn bm_exec
|
1994-06-22 04:15:22 +04:00
|
|
|
returns a pointer to the leftmost occurrence of the string given to
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fn bm_comp
|
1994-06-22 04:15:22 +04:00
|
|
|
within
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa text ,
|
2006-10-16 12:48:45 +04:00
|
|
|
or
|
|
|
|
.Dv NULL
|
|
|
|
if none occurs.
|
1994-06-22 04:15:22 +04:00
|
|
|
The number of bytes in
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa text
|
1994-06-22 04:15:22 +04:00
|
|
|
must be specified by
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fa len .
|
|
|
|
.Pp
|
1994-06-22 04:15:22 +04:00
|
|
|
Space allocated for the returned description is discarded
|
|
|
|
by calling
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fn bm_free
|
1994-06-22 04:15:22 +04:00
|
|
|
with the returned description as an argument.
|
2001-04-09 16:07:57 +04:00
|
|
|
.Pp
|
1994-06-22 04:15:22 +04:00
|
|
|
The asymptotic speed of
|
2001-04-09 16:07:57 +04:00
|
|
|
.Fn bm_exec
|
|
|
|
is O(len/patlen).
|
|
|
|
.Sh SEE ALSO
|
|
|
|
.Xr regexp 3 ,
|
|
|
|
.Xr strstr 3
|
|
|
|
.Rs
|
|
|
|
.%A Hume and Sunday
|
|
|
|
.%D November 1991
|
|
|
|
.%J "Software Practice and Experience"
|
|
|
|
.%P pp. 1221-48
|
|
|
|
.%T "Fast String Searching"
|
|
|
|
.%V Vol. 21, 11
|
|
|
|
.Re
|