@@ -, +, @@ in the face of bad conversions. --- lib/util/charset/util_str.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) --- a/lib/util/charset/util_str.c +++ a/lib/util/charset/util_str.c @@ -56,7 +56,17 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, if (c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ + /* + * Fall back to byte + * comparison. We must + * step back by the codepoint + * length we just incremented + * - otherwise we are not + * checking the bytes that + * failed the conversion. + */ + s1 -= size1; + s2 -= size2; return strcasecmp(s1, s2); } -- in the face of bad conversions. --- lib/util/charset/util_str.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) --- a/lib/util/charset/util_str.c +++ a/lib/util/charset/util_str.c @@ -116,8 +116,33 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle, if (c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ - return strcasecmp(s1, s2); + /* + * Fall back to byte + * comparison. We must + * step back by the codepoint + * length we just incremented + * by - otherwise we are not + * checking the bytes that + * failed the conversion. + */ + s1 -= size1; + s2 -= size2; + /* + * n was specified in characters, + * now we must convert it to bytes. + * As bytes are the smallest + * character unit, the following + * increment and strncasecmp is always + * safe. + * + * The source string was already known + * to be n characters long, so we are + * guaranteed to be able to look at the + * (n remaining + size1) bytes from the + * new (s1 - size1) position). + */ + n += size1; + return strncasecmp(s1, s2, n); } if (toupper_m(c1) != toupper_m(c2)) { -- constantly crashes when filename contains non-ascii character --- lib/util/charset/tests/charset.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) --- a/lib/util/charset/tests/charset.c +++ a/lib/util/charset/tests/charset.c @@ -50,12 +50,18 @@ static bool test_codepoint_cmpi(struct torture_context *tctx) static bool test_strcasecmp_m(struct torture_context *tctx) { + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; torture_assert(tctx, strcasecmp_m("foo", "bar") != 0, "different strings"); torture_assert(tctx, strcasecmp_m("foo", "foo") == 0, "same case strings"); torture_assert(tctx, strcasecmp_m("foo", "Foo") == 0, "different case strings"); torture_assert(tctx, strcasecmp_m(NULL, "Foo") != 0, "one NULL"); torture_assert(tctx, strcasecmp_m("foo", NULL) != 0, "other NULL"); torture_assert(tctx, strcasecmp_m(NULL, NULL) == 0, "both NULL"); + torture_assert(tctx, strcasecmp_m(file_iso8859_1, file_utf8) != 0, + "file.{accented e} should differ"); return true; } @@ -102,6 +108,10 @@ static bool test_string_replace_m(struct torture_context *tctx) static bool test_strncasecmp_m(struct torture_context *tctx) { + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; torture_assert(tctx, strncasecmp_m("foo", "bar", 3) != 0, "different strings"); torture_assert(tctx, strncasecmp_m("foo", "foo", 3) == 0, "same case strings"); torture_assert(tctx, strncasecmp_m("foo", "Foo", 3) == 0, "different case strings"); @@ -111,6 +121,8 @@ static bool test_strncasecmp_m(struct torture_context *tctx) torture_assert(tctx, strncasecmp_m(NULL, "Foo", 3) != 0, "one NULL"); torture_assert(tctx, strncasecmp_m("foo", NULL, 3) != 0, "other NULL"); torture_assert(tctx, strncasecmp_m(NULL, NULL, 3) == 0, "both NULL"); + torture_assert(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6) != 0, + "file.{accented e} should differ"); return true; } -- --- lib/util/charset/util_str.c | 48 ++++++++++++--------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) --- a/lib/util/charset/util_str.c +++ a/lib/util/charset/util_str.c @@ -47,6 +47,11 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, c1 = next_codepoint_handle(iconv_handle, s1, &size1); c2 = next_codepoint_handle(iconv_handle, s2, &size2); + if (c1 == INVALID_CODEPOINT || + c2 == INVALID_CODEPOINT) { + return strcasecmp(s1, s2); + } + s1 += size1; s2 += size2; @@ -54,22 +59,6 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, continue; } - if (c1 == INVALID_CODEPOINT || - c2 == INVALID_CODEPOINT) { - /* - * Fall back to byte - * comparison. We must - * step back by the codepoint - * length we just incremented - * - otherwise we are not - * checking the bytes that - * failed the conversion. - */ - s1 -= size1; - s2 -= size2; - return strcasecmp(s1, s2); - } - if (toupper_m(c1) != toupper_m(c2)) { return c1 - c2; } @@ -107,27 +96,9 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle, c1 = next_codepoint_handle(iconv_handle, s1, &size1); c2 = next_codepoint_handle(iconv_handle, s2, &size2); - s1 += size1; - s2 += size2; - - if (c1 == c2) { - continue; - } - if (c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) { /* - * Fall back to byte - * comparison. We must - * step back by the codepoint - * length we just incremented - * by - otherwise we are not - * checking the bytes that - * failed the conversion. - */ - s1 -= size1; - s2 -= size2; - /* * n was specified in characters, * now we must convert it to bytes. * As bytes are the smallest @@ -139,12 +110,19 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle, * to be n characters long, so we are * guaranteed to be able to look at the * (n remaining + size1) bytes from the - * new (s1 - size1) position). + * s1 position). */ n += size1; return strncasecmp(s1, s2, n); } + s1 += size1; + s2 += size2; + + if (c1 == c2) { + continue; + } + if (toupper_m(c1) != toupper_m(c2)) { return c1 - c2; } --