@@ -, +, @@ --- lib/util/charset/util_str.c | 12 +++++++++++- 1 files changed, 11 insertions(+), 1 deletions(-) --- a/lib/util/charset/util_str.c +++ a/lib/util/charset/util_str.c @@ -56,7 +56,17 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, if (c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ + /* + * Fall back to byte + * comparison. We must + * step back by the codepoint + * length we just incremented + * - otherwise we are not + * checking the bytes that + * failed the conversion. + */ + s1 -= size1; + s2 -= size2; return strcasecmp(s1, s2); } -- --- lib/util/charset/util_str.c | 29 +++++++++++++++++++++++++++-- 1 files changed, 27 insertions(+), 2 deletions(-) --- a/lib/util/charset/util_str.c +++ a/lib/util/charset/util_str.c @@ -116,8 +116,33 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle, if (c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ - return strcasecmp(s1, s2); + /* + * Fall back to byte + * comparison. We must + * step back by the codepoint + * length we just incremented + * by - otherwise we are not + * checking the bytes that + * failed the conversion. + */ + s1 -= size1; + s2 -= size2; + /* + * n was specified in characters, + * now we must convert it to bytes. + * As bytes are the smallest + * character unit, the following + * increment and strncasecmp is always + * safe. + * + * The source string was already known + * to be n characters long, so we are + * guaranteed to be able to look at the + * (n remaining + size1) bytes from the + * new (s1 - size1) position). + */ + n += size1; + return strncasecmp(s1, s2, n); } if (toupper_m(c1) != toupper_m(c2)) { -- --- lib/util/charset/tests/charset.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) --- a/lib/util/charset/tests/charset.c +++ a/lib/util/charset/tests/charset.c @@ -50,12 +50,18 @@ static bool test_codepoint_cmpi(struct torture_context *tctx) static bool test_strcasecmp_m(struct torture_context *tctx) { + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; torture_assert(tctx, strcasecmp_m("foo", "bar") != 0, "different strings"); torture_assert(tctx, strcasecmp_m("foo", "foo") == 0, "same case strings"); torture_assert(tctx, strcasecmp_m("foo", "Foo") == 0, "different case strings"); torture_assert(tctx, strcasecmp_m(NULL, "Foo") != 0, "one NULL"); torture_assert(tctx, strcasecmp_m("foo", NULL) != 0, "other NULL"); torture_assert(tctx, strcasecmp_m(NULL, NULL) == 0, "both NULL"); + torture_assert(tctx, strcasecmp_m(file_iso8859_1, file_utf8) != 0, + "file.{accented e} should differ"); return true; } @@ -102,6 +108,10 @@ static bool test_string_replace_m(struct torture_context *tctx) static bool test_strncasecmp_m(struct torture_context *tctx) { + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; torture_assert(tctx, strncasecmp_m("foo", "bar", 3) != 0, "different strings"); torture_assert(tctx, strncasecmp_m("foo", "foo", 3) == 0, "same case strings"); torture_assert(tctx, strncasecmp_m("foo", "Foo", 3) == 0, "different case strings"); @@ -111,6 +121,8 @@ static bool test_strncasecmp_m(struct torture_context *tctx) torture_assert(tctx, strncasecmp_m(NULL, "Foo", 3) != 0, "one NULL"); torture_assert(tctx, strncasecmp_m("foo", NULL, 3) != 0, "other NULL"); torture_assert(tctx, strncasecmp_m(NULL, NULL, 3) == 0, "both NULL"); + torture_assert(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6) != 0, + "file.{accented e} should differ"); return true; } --