Back out #155, it was problematic on HEAD. Reapply when it's
reworked and better tested.
This commit is contained in:
parent
dfdc11763a
commit
6763367ddb
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: join.awk,v 1.3.42.1 2014/10/24 07:30:14 martin Exp $
|
||||
# $NetBSD: join.awk,v 1.3.42.2 2014/10/24 23:13:22 riz Exp $
|
||||
#
|
||||
# Copyright (c) 2002 The NetBSD Foundation, Inc.
|
||||
# All rights reserved.
|
||||
|
@ -30,102 +30,7 @@
|
|||
# join.awk F1 F2
|
||||
# Similar to join(1), this reads a list of words from F1
|
||||
# and outputs lines in F2 with a first word that is in F1.
|
||||
# The first word is canonicalised via vis(unvis(word))).
|
||||
# Neither file needs to be sorted.
|
||||
|
||||
function unvis(s) \
|
||||
{
|
||||
# XXX: We don't handle the complete range of vis encodings
|
||||
unvis_result = ""
|
||||
while (length(s) > 0) {
|
||||
unvis_pos = match(s, "\\\\.")
|
||||
if (unvis_pos == 0) {
|
||||
unvis_result = unvis_result "" s
|
||||
s = ""
|
||||
break
|
||||
}
|
||||
# copy the part before the next backslash
|
||||
unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
|
||||
s = substr(s, unvis_pos)
|
||||
# process the backslash and next few chars
|
||||
if (substr(s, 1, 2) == "\\\\") {
|
||||
# double backslash -> single backslash
|
||||
unvis_result = unvis_result "\\"
|
||||
s = substr(s, 3)
|
||||
} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
|
||||
# \ooo with three octal digits.
|
||||
# XXX: use strtonum() when that is available
|
||||
unvis_result = unvis_result "" sprintf("%c", \
|
||||
0+substr(s, 2, 1) * 64 + \
|
||||
0+substr(s, 3, 1) * 8 + \
|
||||
0+substr(s, 4, 1))
|
||||
s = substr(s, 5)
|
||||
} else {
|
||||
# unrecognised escape: keep the literal backslash
|
||||
printf "%s: %s:%s: unrecognised escape %s\n", \
|
||||
ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
|
||||
substr(s, 1, 2) \
|
||||
>"/dev/stderr"
|
||||
unvis_result = unvis_result "" substr(s, 1, 1)
|
||||
s = substr(s, 2)
|
||||
}
|
||||
}
|
||||
return unvis_result
|
||||
}
|
||||
|
||||
function vis(s) \
|
||||
{
|
||||
# We need to encode backslash, space, and tab, because they
|
||||
# would interfere with scripts that attempt to manipulate
|
||||
# the set files.
|
||||
#
|
||||
# We make no attempt to encode shell special characters
|
||||
# such as " ' $ ( ) { } [ ] < > * ?, because nothing that
|
||||
# parses set files would need that.
|
||||
#
|
||||
# We would like to handle other white space or non-graph
|
||||
# characters, because they may be confusing for human readers,
|
||||
# but they are too difficult to handle in awk without the ord()
|
||||
# function, so we print an error message.
|
||||
#
|
||||
# As of October 2014, no files in the set lists contain
|
||||
# characters that would need any kind of encoding.
|
||||
#
|
||||
vis_result = ""
|
||||
while (length(s) > 0) {
|
||||
vis_pos = match(s, "(\\\\|[[:space:]]|[^[:graph:]])")
|
||||
if (vis_pos == 0) {
|
||||
vis_result = vis_result "" s
|
||||
s = ""
|
||||
break
|
||||
}
|
||||
# copy the part before the next special char
|
||||
vis_result = vis_result "" substr(s, 1, vis_pos - 1)
|
||||
vis_char = substr(s, vis_pos, 1)
|
||||
s = substr(s, vis_pos + 1)
|
||||
# process the special char
|
||||
if (vis_char == "\\") {
|
||||
# backslash -> double backslash
|
||||
vis_result = vis_result "\\\\"
|
||||
} else if (vis_char == " ") {
|
||||
# space -> \040
|
||||
vis_result = vis_result "\\040"
|
||||
} else if (vis_char == "\t") {
|
||||
# tab -> \011
|
||||
vis_result = vis_result "\\011"
|
||||
} else {
|
||||
# generalised \ooo with three octal digits.
|
||||
# XXX: I don't know how to do this in awk without ord()
|
||||
printf "%s: %s:%s: cannot perform vis encoding\n", \
|
||||
ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR \
|
||||
>"/dev/stderr"
|
||||
vis_result = vis_result "" vis_char
|
||||
}
|
||||
}
|
||||
return vis_result
|
||||
}
|
||||
|
||||
// { $1 = vis(unvis($1)); print }
|
||||
# Neither file needs to be sorted
|
||||
|
||||
BEGIN \
|
||||
{
|
||||
|
@ -133,15 +38,11 @@ BEGIN \
|
|||
printf("Usage: join file1 file2\n") >"/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
while ( (getline < ARGV[1]) > 0) {
|
||||
$1 = vis(unvis($1))
|
||||
while ( (getline < ARGV[1]) > 0)
|
||||
words[$1] = $0
|
||||
}
|
||||
delete ARGV[1]
|
||||
}
|
||||
|
||||
// { $1 = vis(unvis($1)) }
|
||||
|
||||
$1 in words \
|
||||
{
|
||||
f1=$1
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: CHANGES-7.0,v 1.1.2.64 2014/10/24 07:32:25 martin Exp $
|
||||
# $NetBSD: CHANGES-7.0,v 1.1.2.65 2014/10/24 23:13:22 riz Exp $
|
||||
|
||||
A complete list of changes from the initial NetBSD 7.0 branch on 11 Aug 2014
|
||||
until the 7.0 release:
|
||||
|
@ -1826,15 +1826,3 @@ sys/netinet/tcp_output.c 1.177
|
|||
IPsec outbound policy will not be checked when ipsec_used is false.
|
||||
[hikaru, ticket #154]
|
||||
|
||||
distrib/sets/join.awk 1.4-1.5
|
||||
|
||||
Add an unvis function and use it on file names in mtree specs.
|
||||
|
||||
This should correct the problem that ./bin/[ was missing from the
|
||||
base.tgz set, despite being listed in src/distrib/sets/base/mi
|
||||
and being present in METALOG. The corresponding entry in
|
||||
METALOG.sanitised has ./bin/\133 instead of ./bin/[, and that made
|
||||
join.awk omit it.
|
||||
|
||||
Add vis() function, and canonicalise file names via vis(unvis($1)).
|
||||
[apb, ticket #155]
|
||||
|
|
Loading…
Reference in New Issue