Part 2 of pattern matching (glob etc) fixes.

Attempt to correctly deal with \ (both when it is a literal,
in appropriate cases, and when it appears as CTLESC when it was
detected as a quoting character during parsing).

In a pattern, in sh, no quoted character can ever be anything other
than a literal character.   This is quite different than regular
expressions, and even different than other uses of glob matching,
where shell quoting is not an issue.

In something like

	ls ?\*.c

the ? is a meta-character, the * is a literal (it was quoted).  This
is nothing new, sh has handled that properly for ever.

But the same happens with
	VAR='?\*.c'
and
	ls $VAR

which has not always been handled correctly.   Of course, in

	ls "$VAR"

nothing in VAR is a meta-character (the entire expansion is quoted)
so even the '\' must match literally (or more accurately, no matching
happens - VAR simply contains an "unusual" filename).  But if it had
been

	ls *"$VAR"

then we would be looking for filenames that end with the literal 5
characters that make up $VAR.

The same kinds of things are requires of matching patterns in case
statements, and sub-strings with the % and # operators in variable
expansions.

While here, the final remnant of the ancient !! pattern matching
hack has been removed (the code that actually implemented it was
long gone, but one small piece remained, not doing any real harm,
but potentially wasting time - if someone gave a pattern which would
once have invoked that hack.)
This commit is contained in:
kre 2018-07-22 23:07:48 +00:00
parent d211c89f40
commit 14482abc9a
2 changed files with 74 additions and 13 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $ */
/* $NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $ */
/*-
* Copyright (c) 1991, 1993
@ -37,7 +37,7 @@
#if 0
static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95";
#else
__RCSID("$NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $");
__RCSID("$NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $");
#endif
#endif /* not lint */
@ -927,7 +927,9 @@ evalvar(const char *p, int flag)
varlen++;
} else {
while (*val) {
if (quotes && syntax[(int)*val] == CCTL)
if (quotes && (varflags & VSQUOTE) &&
(syntax[(int)*val] == CCTL ||
syntax[(int)*val] == CBACK))
STPUTC(CTLESC, expdest);
STPUTC(*val++, expdest);
}
@ -1465,22 +1467,59 @@ expmeta(char *enddir, char *name)
metaflag = 1;
else if (*p == '[') {
q = p + 1;
if (*q == '!')
if (*q == '!' || *q == '^')
q++;
for (;;) {
while (*q == CTLQUOTEMARK || *q == CTLNONL)
q++;
if (*q == CTLESC)
if (*q == ']') {
q++;
if (*q == '/' || *q == '\0')
break;
if (*++q == ']') {
metaflag = 1;
break;
}
if (*q == '[' && q[1] == ':') {
/*
* character class, look for :] ending
* also stop on ']' (end bracket expr)
* or '\0' or '/' (end pattern)
*/
while (*++q != '\0' && *q != ']' &&
*q != '/') {
if (*q == CTLESC) {
if (*++q == '\0')
break;
if (*q == '/')
break;
} else if (*q == ':' &&
q[1] == ']')
break;
}
if (*q == ':') {
/*
* stopped at ':]'
* still in [...]
* skip ":]" and continue;
*/
q += 2;
continue;
}
/* done at end of pattern, not [...] */
if (*q == '\0' || *q == '/')
break;
/* found the ']', we have a [...] */
metaflag = 1;
q++; /* skip ']' */
break;
}
if (*q == CTLESC)
q++;
/* end of pattern cannot be escaped */
if (*q == '/' || *q == '\0')
break;
q++;
}
} else if (*p == '!' && p[1] == '!' && (p == name || p[-1] == '/')) {
metaflag = 1;
} else if (*p == '\0')
break;
else if (*p == CTLQUOTEMARK || *p == CTLNONL)
@ -1707,11 +1746,25 @@ patmatch(const char *pattern, const char *string, int squoted)
for (;;) {
switch (c = *p++) {
case '\0':
if (squoted && *q == CTLESC) {
if (q[1] == '\0')
q++;
}
if (*q != '\0')
goto backtrack;
VTRACE(DBG_MATCH, ("match\n"));
return 1;
case CTLESC:
if (squoted && *q == CTLESC)
q++;
if (*p == '\0' && *q == '\0') {
VTRACE(DBG_MATCH, ("match-\\\n"));
return 1;
}
if (*q++ != *p++)
goto backtrack;
break;
case '\\':
if (squoted && *q == CTLESC)
q++;
if (*q++ != *p++)
@ -1747,6 +1800,10 @@ patmatch(const char *pattern, const char *string, int squoted)
q++;
}
}
if (c == CTLESC && p[1] == '\0') {
VTRACE(DBG_MATCH, ("match+\\\n"));
return 1;
}
/*
* First try the shortest match for the '*' that
* could work. We can forget any earlier '*' since
@ -1798,6 +1855,8 @@ patmatch(const char *pattern, const char *string, int squoted)
VTRACE(DBG_MATCH, ("[]fail\n"));
return 0;
}
if (squoted && *q == CTLESC)
q++;
chr = (unsigned char)*q++;
c = *p++;
do {

View File

@ -1,4 +1,4 @@
/* $NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $ */
/* $NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $ */
/*-
* Copyright (c) 1991, 1993
@ -37,7 +37,7 @@
#if 0
static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95";
#else
__RCSID("$NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $");
__RCSID("$NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $");
#endif
#endif /* not lint */
@ -1817,8 +1817,10 @@ readtoken1(int firstc, char const *syn, int magicq)
}
quotef = 1; /* current token is quoted */
if (ISDBLQUOTE() && c != '\\' && c != '`' &&
c != '$' && (c != '"' || magicq))
c != '$' && (c != '"' || magicq)) {
USTPUTC(CTLESC, out);
USTPUTC('\\', out);
}
if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
USTPUTC(CTLESC, out);
else if (!magicq) {