From 62a51dead9eb7b5d6892447b11070bc8fab41c6a Mon Sep 17 00:00:00 2001 From: matt335672 <30179339+matt335672@users.noreply.github.com> Date: Sun, 24 Dec 2023 20:14:14 +0000 Subject: [PATCH] Fix g_strtrim() regression Commit 36ea4a3 failed to take UTF-8 characters into account on platforms with signed char. --- common/string_calls.c | 8 +++++--- tests/common/test_string_calls.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/common/string_calls.c b/common/string_calls.c index 16acc869..dca5bc15 100644 --- a/common/string_calls.c +++ b/common/string_calls.c @@ -715,6 +715,7 @@ g_strstr(const char *haystack, const char *needle) int g_strtrim(char *str, int trim_flags) { +#define TRIMMABLE_CHAR(c) ((unsigned char)(c) <= ' ') int rv = 0; int index; int j; @@ -726,7 +727,7 @@ g_strtrim(char *str, int trim_flags) j = 0; for (index = 0; str[index] != '\0'; index++) { - if (str[index] > ' ') + if (!TRIMMABLE_CHAR(str[index])) { str[j++] = str[index]; } @@ -741,7 +742,7 @@ g_strtrim(char *str, int trim_flags) case 2: /* trim right */ index = strlen(str); - while (index > 0 && str[index - 1] <= ' ') + while (index > 0 && TRIMMABLE_CHAR(str[index - 1])) { --index; } @@ -750,7 +751,7 @@ g_strtrim(char *str, int trim_flags) case 1: /* trim left */ index = 0; - while (str[index] != '\0' && str[index] <= ' ') + while (str[index] != '\0' && TRIMMABLE_CHAR(str[index])) { ++index; } @@ -765,6 +766,7 @@ g_strtrim(char *str, int trim_flags) } return rv; +#undef TRIMMABLE_CHAR } /*****************************************************************************/ diff --git a/tests/common/test_string_calls.c b/tests/common/test_string_calls.c index 02240809..5944c536 100644 --- a/tests/common/test_string_calls.c +++ b/tests/common/test_string_calls.c @@ -14,6 +14,24 @@ #define RESULT_LEN 1024 +/* Universal character names need a C99 compiler */ +#if __STDC_VERSION__ >= 199901L +# define CJK_UNIFIED_IDEOGRAPH_5E78 "\u5e78" +# define CJK_UNIFIED_IDEOGRAPH_798F "\u798f" +# define CJK_UNIFIED_IDEOGRAPH_5B89 "\u5b89" +# define CJK_UNIFIED_IDEOGRAPH_5EB7 "\u5eb7" +#else +// Assume we're using UTF-8 +# define CJK_UNIFIED_IDEOGRAPH_5E78 "\xe5\xb9\xb8" +# define CJK_UNIFIED_IDEOGRAPH_798F "\xe7\xa6\x8f" +# define CJK_UNIFIED_IDEOGRAPH_5B89 "\xe5\xae\x89" +# define CJK_UNIFIED_IDEOGRAPH_5EB7 "\xe5\xba\xb7" +#endif + +#define HAPPINESS_AND_WELL_BEING \ + CJK_UNIFIED_IDEOGRAPH_5E78 CJK_UNIFIED_IDEOGRAPH_798F \ + CJK_UNIFIED_IDEOGRAPH_5B89 CJK_UNIFIED_IDEOGRAPH_5EB7 + START_TEST(test_strnjoin__when_src_is_null__returns_empty_string) { /* setup */ @@ -1038,6 +1056,19 @@ START_TEST(test_strtrim__trim_through) } END_TEST +START_TEST(test_strtrim__chinese_chars) +{ + /* setup */ + char output[] = "\t\t \t" HAPPINESS_AND_WELL_BEING "\t\t \n\n"; + + /* test */ + g_strtrim(output, 4); + + /* verify */ + ck_assert_str_eq(output, HAPPINESS_AND_WELL_BEING); +} +END_TEST + /******************************************************************************/ START_TEST(test_sigs__common) @@ -1192,6 +1223,7 @@ make_suite_test_string(void) tcase_add_test(tc_strtrim, test_strtrim__trim_right); tcase_add_test(tc_strtrim, test_strtrim__trim_both); tcase_add_test(tc_strtrim, test_strtrim__trim_through); + tcase_add_test(tc_strtrim, test_strtrim__chinese_chars); tc_sigs = tcase_create("signals"); suite_add_tcase(s, tc_sigs);