filevercmp: fix several unexpected results.

Sync with gnulib 9f48fb992a3d7e96610c4ce8be969cff2d61a01b.
Problems reported by Michael Debertol in <https://bugs.gnu.org/49239>.

Signed-off-by: Andrew Borodin <aborodin@vmail.ru>
This commit is contained in:
Andrew Borodin 2022-02-20 17:06:32 +03:00
parent d00a1c22b9
commit 7408272361
3 changed files with 297 additions and 133 deletions

View File

@ -540,24 +540,51 @@ int str_verscmp (const char *s1, const char *s2);
/* Compare version strings:
This function compares strings s1 and s2:
1) By PREFIX in the same way as strcmp.
2) Then by VERSION (most similarly to version compare of Debian's dpkg).
Leading zeros in version numbers are ignored.
3) If both (PREFIX and VERSION) are equal, strcmp function is used for
comparison. So this function can return 0 if (and only if) strings s1
and s2 are identical.
Compare strings a and b as file names containing version numbers, and return an integer
that is negative, zero, or positive depending on whether a compares less than, equal to,
or greater than b.
It returns number > 0 for s1 > s2, 0 for s1 == s2 and number < 0 for s1 < s2.
Use the following version sort algorithm:
This function compares strings, in a way that if VER1 and VER2 are version
numbers and PREFIX and SUFFIX (SUFFIX defined as (\.[A-Za-z~][A-Za-z0-9~]*)*)
are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX,
PREFIX VER2 SUFFIX) < 0.
1. Compare the strings' maximal-length non-digit prefixes lexically.
If there is a difference return that difference.
Otherwise discard the prefixes and continue with the next step.
This function is intended to be a replacement for strverscmp.
2. Compare the strings' maximal-length digit prefixes, using numeric comparison
of the numbers represented by each prefix. (Treat an empty prefix as zero; this can
happen only at string end.)
If there is a difference, return that difference.
Otherwise discard the prefixes and continue with the next step.
3. If both strings are empty, return 0. Otherwise continue with step 1.
In version sort, lexical comparison is left to right, byte by byte, using the byte's numeric
value (0-255), except that:
1. ASCII letters sort before other bytes.
2. A tilde sorts before anything, even an empty string.
In addition to the version sort rules, the following strings have special priority and sort
before all other strings (listed in order):
1. The empty string.
2. ".".
3. "..".
4. Strings starting with "." sort before other strings.
Before comparing two strings where both begin with non-".", or where both begin with "."
but neither is "." or "..", suffixes matching the C-locale extended regular expression
(\.[A-Za-z~][A-Za-z0-9~]*)*$ are removed and the strings compared without them, using version sort
without special priority; if they do not compare equal, this comparison result is used and
the suffixes are effectively ignored. Otherwise, the entire strings are compared using version sort.
*/
int filevercmp (const char *s1, const char *s2);
int filevercmp (const char *a, const char *b);
/* Like filevercmp, except compare the byte arrays a (of length alen) and b (of length blen)
so that a and b can contain '\0', which sorts just before '\1'. But if alen is -1 treat
a as a string terminated by '\0', and similarly for blen.
*/
int filenvercmp (char const *a, ssize_t alen, char const *b, ssize_t blen);
/* return how many lines and columns will text occupy on terminal

View File

@ -1,27 +1,25 @@
/*
Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk>
Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au>
Copyright (C) 2008-2018 Free Software Foundation, Inc.
Copyright (C) 2008-2022 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
GNU Lesser General Public License for more details.
You should have received a copy of the GNU General Public License
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "lib/strutil.h"
@ -38,53 +36,68 @@
/*** file scope functions ************************************************************************/
/* --------------------------------------------------------------------------------------------- */
/* Match a file suffix defined by this regular expression: /(\.[A-Za-z~][A-Za-z0-9~]*)*$/
/* Return the length of a prefix of @s that corresponds to the suffix defined by this extended
* regular expression in the C locale: (\.[A-Za-z~][A-Za-z0-9~]*)*$
*
* @str pointer to string to scan.
*
* @return pointer to the matching suffix, or NULL if not found.
* Upon return, @str points to terminating NUL.
* If *len is -1, s is a string; set *lem to s's length.
* Otherwise, *len should be nonnegative, s is a char array, and *len does not change.
*/
static const char *
match_suffix (const char **str)
static ssize_t
file_prefixlen (const char *s, ssize_t * len)
{
const char *match = NULL;
gboolean read_alpha = FALSE;
size_t n = (size_t) (*len); /* SIZE_MAX if N == -1 */
size_t i;
while (**str != '\0')
for (i = 0;; i++)
{
if (read_alpha)
{
read_alpha = FALSE;
if (!g_ascii_isalpha (**str) && **str != '~')
match = NULL;
}
else if (**str == '.')
{
read_alpha = TRUE;
if (match == NULL)
match = *str;
}
else if (!g_ascii_isalnum (**str) && **str != '~')
match = NULL;
(*str)++;
}
size_t prefixlen = i;
gboolean done;
return match;
while (i + 1 < n && s[i] == '.' && (g_ascii_isalpha (s[i + 1]) || s[i + 1] == '~'))
for (i += 2; i < n && (g_ascii_isalnum (s[i]) || s[i] == '~'); i++)
;
if (*len < 0)
done = s[i] == '\0';
else
done = i == n;
if (done)
{
*len = (ssize_t) i;
return (ssize_t) prefixlen;
}
}
}
/* --------------------------------------------------------------------------------------------- */
/* verrevcmp helper function */
/* Return a version sort comparison value for @s's byte at position @pos.
*
* @param s a string
* @param pos a position in @s
* @param len a length of @s. If @pos == @len, sort before all non-'~' bytes.
*/
static int
order (unsigned char c)
order (const char *s, size_t pos, size_t len)
{
unsigned char c;
if (pos == len)
return (-1);
c = s[pos];
if (g_ascii_isdigit (c))
return 0;
if (g_ascii_isalpha (c))
return c;
if (c == '~')
return -1;
return (-2);
g_assert (UCHAR_MAX <= (INT_MAX - 1 - 2) / 2);
return (int) c + UCHAR_MAX + 1;
}
@ -99,18 +112,18 @@ order (unsigned char c)
* implements that from s5.6.12 of Debian Policy v3.8.0.1
* https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version
*
* @s1 first string to compare
* @s1_len length of @s1
* @s2 second string to compare
* @s2_len length of @s2
* @param s1 first char array to compare
* @param s1_len length of @s1
* @param s2 second char array to compare
* @param s2_len length of @s2
*
* @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2.
*/
static int
verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
verrevcmp (const char *s1, ssize_t s1_len, const char *s2, ssize_t s2_len)
{
size_t s1_pos = 0;
size_t s2_pos = 0;
ssize_t s1_pos = 0;
ssize_t s2_pos = 0;
while (s1_pos < s1_len || s2_pos < s2_len)
{
@ -119,13 +132,10 @@ verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
while ((s1_pos < s1_len && !g_ascii_isdigit (s1[s1_pos]))
|| (s2_pos < s2_len && !g_ascii_isdigit (s2[s2_pos])))
{
int s1_c = 0;
int s2_c = 0;
int s1_c, s2_c;
if (s1_pos != s1_len)
s1_c = order (s1[s1_pos]);
if (s2_pos != s2_len)
s2_c = order (s2[s2_pos]);
s1_c = order (s1, s1_pos, s1_len);
s2_c = order (s2, s2_pos, s2_len);
if (s1_c != s2_c)
return (s1_c - s2_c);
@ -134,12 +144,13 @@ verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
s2_pos++;
}
while (s1[s1_pos] == '0')
while (s1_pos < s1_len && s1[s1_pos] == '0')
s1_pos++;
while (s2[s2_pos] == '0')
while (s2_pos < s2_len && s2[s2_pos] == '0')
s2_pos++;
while (g_ascii_isdigit (s1[s1_pos]) && g_ascii_isdigit (s2[s2_pos]))
while (s1_pos < s1_len && s2_pos < s2_len
&& g_ascii_isdigit (s1[s1_pos]) && g_ascii_isdigit (s2[s2_pos]))
{
if (first_diff == 0)
first_diff = s1[s1_pos] - s2[s2_pos];
@ -148,10 +159,10 @@ verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
s2_pos++;
}
if (g_ascii_isdigit (s1[s1_pos]))
if (s1_pos < s1_len && g_ascii_isdigit (s1[s1_pos]))
return 1;
if (g_ascii_isdigit (s2[s2_pos]))
return -1;
if (s2_pos < s2_len && g_ascii_isdigit (s2[s2_pos]))
return (-1);
if (first_diff != 0)
return first_diff;
}
@ -165,68 +176,84 @@ verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
/* Compare version strings.
*
* @s1 first string to compare
* @s2 second string to compare
* @param s1 first string to compare
* @param s2 second string to compare
*
* @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2.
*/
int
filevercmp (const char *s1, const char *s2)
{
const char *s1_pos, *s2_pos;
const char *s1_suffix, *s2_suffix;
size_t s1_len, s2_len;
int simple_cmp, result;
/* easy comparison to see if strings are identical */
simple_cmp = strcmp (s1, s2);
if (simple_cmp == 0)
return 0;
/* special handle for "", "." and ".." */
if (*s1 == '\0')
return -1;
if (*s2 == '\0')
return 1;
if (DIR_IS_DOT (s1))
return -1;
if (DIR_IS_DOT (s2))
return 1;
if (DIR_IS_DOTDOT (s1))
return -1;
if (DIR_IS_DOTDOT (s2))
return 1;
/* special handle for other hidden files */
if (*s1 == '.' && *s2 != '.')
return -1;
if (*s1 != '.' && *s2 == '.')
return 1;
if (*s1 == '.' && *s2 == '.')
{
s1++;
s2++;
}
/* "cut" file suffixes */
s1_pos = s1;
s2_pos = s2;
s1_suffix = match_suffix (&s1_pos);
s2_suffix = match_suffix (&s2_pos);
s1_len = (s1_suffix != NULL ? s1_suffix : s1_pos) - s1;
s2_len = (s2_suffix != NULL ? s2_suffix : s2_pos) - s2;
/* restore file suffixes if strings are identical after "cut" */
if ((s1_suffix != NULL || s2_suffix != NULL) && (s1_len == s2_len)
&& strncmp (s1, s2, s1_len) == 0)
{
s1_len = s1_pos - s1;
s2_len = s2_pos - s2;
}
result = verrevcmp (s1, s1_len, s2, s2_len);
return result == 0 ? simple_cmp : result;
return filenvercmp (s1, -1, s2, -1);
}
/* --------------------------------------------------------------------------------------------- */
/* Compare version strings.
*
* @param a first string to compare
* @param alen length of @a or (-1)
* @param b second string to compare
* @param blen length of @b or (-1)
*
* @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2.
*/
int
filenvercmp (const char *a, ssize_t alen, const char *b, ssize_t blen)
{
gboolean aempty, bempty;
ssize_t aprefixlen, bprefixlen;
gboolean one_pass_only;
int result;
/* Special case for empty versions. */
aempty = alen < 0 ? a[0] == '\0' : alen == 0;
bempty = blen < 0 ? b[0] == '\0' : blen == 0;
if (aempty)
return (bempty ? 0 : -1);
if (bempty)
return 1;
/* Special cases for leading ".": "." sorts first, then "..", then other names with leading ".",
then other names. */
if (a[0] == '.')
{
gboolean adot, bdot;
gboolean adotdot, bdotdot;
if (b[0] != '.')
return (-1);
adot = alen < 0 ? a[1] == '\0' : alen == 1;
bdot = blen < 0 ? b[1] == '\0' : blen == 1;
if (adot)
return (bdot ? 0 : -1);
if (bdot)
return 1;
adotdot = a[1] == '.' && (alen < 0 ? a[2] == '\0' : alen == 2);
bdotdot = b[1] == '.' && (blen < 0 ? b[2] == '\0' : blen == 2);
if (adotdot)
return (bdotdot ? 0 : -1);
if (bdotdot)
return 1;
}
else if (b[0] == '.')
return 1;
/* Cut file suffixes. */
aprefixlen = file_prefixlen (a, &alen);
bprefixlen = file_prefixlen (b, &blen);
/* If both suffixes are empty, a second pass would return the same thing. */
one_pass_only = aprefixlen == alen && bprefixlen == blen;
result = verrevcmp (a, aprefixlen, b, bprefixlen);
/* Return the initial result if nonzero, or if no second pass is needed.
Otherwise, restore the suffixes and try again. */
return (result != 0 || one_pass_only ? result : verrevcmp (a, alen, b, blen));
}
/* --------------------------------------------------------------------------------------------- */

View File

@ -56,6 +56,40 @@ sign (int n)
/* --------------------------------------------------------------------------------------------- */
/*
* Return filevercmp (a, a), checking that a similar result is gotten after replacing all '\1's
* with '\0's and calling filenvercmp with the embedded '\0's.
*/
static int
test_filevercmp (char const *a, char const *b)
{
int result;
char buffer[BUF_1K];
size_t alen, blen;
size_t i;
int nresult;
result = filevercmp (a, b);
alen = strlen (a);
blen = strlen (b);
ck_assert_int_le (alen + blen, sizeof (buffer));
memcpy (buffer, a, alen);
memcpy (buffer + alen, b, blen);
for (i = 0; i < alen + blen; i++)
if (buffer[i] == '\1')
buffer[i] = '\0';
nresult = filenvercmp (buffer, alen, buffer + alen, blen);
ck_assert_int_eq (sign (nresult), sign (result));
return result;
}
/* --------------------------------------------------------------------------------------------- */
/* @DataSource("filevercmp_test_ds1") */
/* Testcases are taken from Gnulib */
/* *INDENT-OFF* */
@ -70,7 +104,6 @@ static const struct filevercmp_test_struct
{ "a", "a", 0 },
{ "a", "b", -1 },
{ "b", "a", 1 },
{ "a0", "a", 1 },
{ "00", "01", -1 },
{ "01", "010", -1 },
{ "9", "10", -1 },
@ -106,8 +139,6 @@ static const char *filevercmp_test_ds2[] = {
"",
".",
"..",
".0",
".9",
".A",
".Z",
".a~",
@ -118,7 +149,14 @@ static const char *filevercmp_test_ds2[] = {
".zz~",
".zz",
".zz.~1~",
".0",
".9",
".zz.0",
".\1",
".\1.txt",
".\1x",
".\1x\1",
".\1.0",
"0",
"9",
"A",
@ -129,6 +167,10 @@ static const char *filevercmp_test_ds2[] = {
"a.b",
"a.bc~",
"a.bc",
"a+",
"a.",
"a..a",
"a.+",
"b~",
"b",
"gcc-c++-10.fc9.tar.gz",
@ -158,6 +200,13 @@ static const char *filevercmp_test_ds2[] = {
"zz",
"zz.~1~",
"zz.0",
"zz.0.txt",
"\1",
"\1.txt",
"\1x",
"\1x\1",
"\1.0",
"#\1.b#",
"#.b#"
};
@ -176,7 +225,7 @@ START_TEST (filevercmp_test2)
const char *j = filevercmp_test_ds2[_j];
int result;
result = filevercmp (i, j);
result = test_filevercmp (i, j);
if (result < 0)
ck_assert_int_lt ((size_t) _i, _j);
@ -265,6 +314,66 @@ START_TEST (filevercmp_test4)
END_TEST
/* *INDENT-ON* */
/* @DataSource("filevercmp_test_ds5") */
/* Testcases are taken from Gnulib */
static const char *filevercmp_test_ds5[] = {
"a",
"a0",
"a0000",
NULL,
"a\1c-27.txt",
"a\1c-027.txt",
"a\1c-00000000000000000000000000000000000000000000000000000027.txt",
NULL,
".a\1c-27.txt",
".a\1c-027.txt",
".a\1c-00000000000000000000000000000000000000000000000000000027.txt",
NULL,
"a\1c-",
"a\1c-0",
"a\1c-00",
NULL,
".a\1c-",
".a\1c-0",
".a\1c-00",
NULL,
"a\1c-0.txt",
"a\1c-00.txt",
NULL,
".a\1c-1\1.txt",
".a\1c-001\1.txt",
NULL
};
const size_t filevercmp_test_ds5_len = G_N_ELEMENTS (filevercmp_test_ds5);
/* @Test(dataSource = "filevercmp_test_ds5") */
/* *INDENT-OFF* */
START_TEST (filevercmp_test5)
/* *INDENT-ON* */
{
size_t _i;
for (_i = 0; _i < filevercmp_test_ds5_len; _i++)
for (; filevercmp_test_ds5[_i] != NULL; _i++)
{
const char *i = filevercmp_test_ds5[_i];
size_t _j;
for (_j = _i; filevercmp_test_ds5[_j] != NULL; _j++)
{
const char *j = filevercmp_test_ds5[_j];
ck_assert_int_eq (test_filevercmp (i, j), 0);
ck_assert_int_eq (test_filevercmp (j, i), 0);
}
}
}
/* *INDENT-OFF* */
END_TEST
/* *INDENT-ON* */
/* --------------------------------------------------------------------------------------------- */
int
@ -281,6 +390,7 @@ main (void)
tcase_add_loop_test (tc_core, filevercmp_test2, 0, filevercmp_test_ds2_len);
tcase_add_loop_test (tc_core, filevercmp_test3, 0, filevercmp_test_ds3_len);
tcase_add_loop_test (tc_core, filevercmp_test4, 0, filevercmp_test_ds4_len);
tcase_add_test (tc_core, filevercmp_test5);
/* *********************************** */
return mctest_run_all (tc_core);