tests/grep: demonstrate word search bug in usr.bin/grep

By default, MKBSDGREP is "no", therefore GNU Grep is installed as /usr/bin/grep instead of src/usr.bin/grep, and this bug does not occur in practice.
2021-08-30 23:10:58 +00:00 · 2021-08-30 23:10:58 +00:00 · bc8c69fd2e
parent 6af009133b
commit bc8c69fd2e
1 changed files with 84 additions and 2 deletions
--- a/tests/usr.bin/grep/t_grep.sh
+++ b/tests/usr.bin/grep/t_grep.sh
@ -1,4 +1,4 @@
-# $NetBSD: t_grep.sh,v 1.4 2021/08/30 22:17:32 rillig Exp $
+# $NetBSD: t_grep.sh,v 1.5 2021/08/30 23:10:58 rillig Exp $
 #
 # Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 # All rights reserved.
@ -104,11 +104,91 @@ word_locale_body()
 	atf_check -o file:"input" \
 	    env LC_ALL=C grep -w "array" "input"

-	# XXX: In an UTF-8 locale, '[' seems to be a word character.
+	# XXX: In an UTF-8 locale, GNU Grep treats '[' as a word character.
 	atf_check -s exit:1 -o empty \
 	    env LC_ALL="C.UTF-8" grep -w "array" "input"
 }

+atf_test_case word_in_line
+word_in_line_head()
+{
+	atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_body()
+{
+	# See usr.bin/grep/util.c, "Check for whole word match", which
+	# looks suspiciously wrong.  And indeed, NetBSD grep does not
+	# survive this test.  GNU Grep does.
+
+	echo "begin middle end" > "input"
+
+	# A word at the beginning of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "begin" "input"
+
+	# A word in the middle of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "middle" "input"
+
+	# A word at the end of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL=C grep -w "end" "input"
+
+	# A subword at the beginning of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "be" "input"
+
+	# A subword in the middle of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "mid" "input"
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "dle" "input"
+
+	# A subword at the end of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL=C grep -w "nd" "input"
+}
+
+atf_test_case word_in_line_utf8
+word_in_line_utf8_head()
+{
+	atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_utf8_body()
+{
+	# See usr.bin/grep/util.c, "Check for whole word match", which
+	# looks suspiciously wrong.  And indeed, NetBSD grep does not
+	# survive this test.  GNU Grep does.
+
+	echo "begin middle end" > "input"
+
+	# A word at the beginning of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "begin" "input"
+
+	# A word in the middle of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "middle" "input"
+
+	# A word at the end of a line is found.
+	atf_check -o file:"input" \
+	    env LC_ALL="C.UTF-8" grep -w "end" "input"
+
+	# A subword at the beginning of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "be" "input"
+
+	# A subword in the middle of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "mid" "input"
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "dle" "input"
+
+	# A subword at the end of a line is not found.
+	atf_check -s exit:1 -o empty \
+	    env LC_ALL="C.UTF-8" grep -w "nd" "input"
+}
+
 atf_test_case begin_end
 begin_end_head()
 {
@ -255,6 +335,8 @@ atf_init_test_cases()
 	atf_add_test_case recurse_symlink
 	atf_add_test_case word_regexps
 	atf_add_test_case word_locale
+	atf_add_test_case word_in_line
+	atf_add_test_case word_in_line_utf8
 	atf_add_test_case begin_end
 	atf_add_test_case ignore_case
 	atf_add_test_case invert