Back out #155, it was problematic on HEAD. Reapply when it's

reworked and better tested.
This commit is contained in:
riz 2014-10-24 23:13:22 +00:00
parent dfdc11763a
commit 6763367ddb
2 changed files with 4 additions and 115 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: join.awk,v 1.3.42.1 2014/10/24 07:30:14 martin Exp $
# $NetBSD: join.awk,v 1.3.42.2 2014/10/24 23:13:22 riz Exp $
#
# Copyright (c) 2002 The NetBSD Foundation, Inc.
# All rights reserved.
@ -30,102 +30,7 @@
# join.awk F1 F2
# Similar to join(1), this reads a list of words from F1
# and outputs lines in F2 with a first word that is in F1.
# The first word is canonicalised via vis(unvis(word))).
# Neither file needs to be sorted.
function unvis(s) \
{
# XXX: We don't handle the complete range of vis encodings
unvis_result = ""
while (length(s) > 0) {
unvis_pos = match(s, "\\\\.")
if (unvis_pos == 0) {
unvis_result = unvis_result "" s
s = ""
break
}
# copy the part before the next backslash
unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
s = substr(s, unvis_pos)
# process the backslash and next few chars
if (substr(s, 1, 2) == "\\\\") {
# double backslash -> single backslash
unvis_result = unvis_result "\\"
s = substr(s, 3)
} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
# \ooo with three octal digits.
# XXX: use strtonum() when that is available
unvis_result = unvis_result "" sprintf("%c", \
0+substr(s, 2, 1) * 64 + \
0+substr(s, 3, 1) * 8 + \
0+substr(s, 4, 1))
s = substr(s, 5)
} else {
# unrecognised escape: keep the literal backslash
printf "%s: %s:%s: unrecognised escape %s\n", \
ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
substr(s, 1, 2) \
>"/dev/stderr"
unvis_result = unvis_result "" substr(s, 1, 1)
s = substr(s, 2)
}
}
return unvis_result
}
function vis(s) \
{
# We need to encode backslash, space, and tab, because they
# would interfere with scripts that attempt to manipulate
# the set files.
#
# We make no attempt to encode shell special characters
# such as " ' $ ( ) { } [ ] < > * ?, because nothing that
# parses set files would need that.
#
# We would like to handle other white space or non-graph
# characters, because they may be confusing for human readers,
# but they are too difficult to handle in awk without the ord()
# function, so we print an error message.
#
# As of October 2014, no files in the set lists contain
# characters that would need any kind of encoding.
#
vis_result = ""
while (length(s) > 0) {
vis_pos = match(s, "(\\\\|[[:space:]]|[^[:graph:]])")
if (vis_pos == 0) {
vis_result = vis_result "" s
s = ""
break
}
# copy the part before the next special char
vis_result = vis_result "" substr(s, 1, vis_pos - 1)
vis_char = substr(s, vis_pos, 1)
s = substr(s, vis_pos + 1)
# process the special char
if (vis_char == "\\") {
# backslash -> double backslash
vis_result = vis_result "\\\\"
} else if (vis_char == " ") {
# space -> \040
vis_result = vis_result "\\040"
} else if (vis_char == "\t") {
# tab -> \011
vis_result = vis_result "\\011"
} else {
# generalised \ooo with three octal digits.
# XXX: I don't know how to do this in awk without ord()
printf "%s: %s:%s: cannot perform vis encoding\n", \
ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR \
>"/dev/stderr"
vis_result = vis_result "" vis_char
}
}
return vis_result
}
// { $1 = vis(unvis($1)); print }
# Neither file needs to be sorted
BEGIN \
{
@ -133,15 +38,11 @@ BEGIN \
printf("Usage: join file1 file2\n") >"/dev/stderr"
exit 1
}
while ( (getline < ARGV[1]) > 0) {
$1 = vis(unvis($1))
while ( (getline < ARGV[1]) > 0)
words[$1] = $0
}
delete ARGV[1]
}
// { $1 = vis(unvis($1)) }
$1 in words \
{
f1=$1

View File

@ -1,4 +1,4 @@
# $NetBSD: CHANGES-7.0,v 1.1.2.64 2014/10/24 07:32:25 martin Exp $
# $NetBSD: CHANGES-7.0,v 1.1.2.65 2014/10/24 23:13:22 riz Exp $
A complete list of changes from the initial NetBSD 7.0 branch on 11 Aug 2014
until the 7.0 release:
@ -1826,15 +1826,3 @@ sys/netinet/tcp_output.c 1.177
IPsec outbound policy will not be checked when ipsec_used is false.
[hikaru, ticket #154]
distrib/sets/join.awk 1.4-1.5
Add an unvis function and use it on file names in mtree specs.
This should correct the problem that ./bin/[ was missing from the
base.tgz set, despite being listed in src/distrib/sets/base/mi
and being present in METALOG. The corresponding entry in
METALOG.sanitised has ./bin/\133 instead of ./bin/[, and that made
join.awk omit it.
Add vis() function, and canonicalise file names via vis(unvis($1)).
[apb, ticket #155]