-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libc] Error fixes for mbrtowc and wcrtomb #145785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
POSIX specifies that if there is an invalid mbstate_t, these functions should set errno to EINVAL. Changed Error return for the internal functions and added tests for the public functions.
@llvm/pr-subscribers-libc Author: None (sribee8) ChangesInvalid mbstate_t should set errno to EINVAL. Full diff: https://github.com/llvm/llvm-project/pull/145785.diff 8 Files Affected:
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 6aade4ccc84a6..86a47319f278a 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -27,6 +27,7 @@ add_object_library(
SRCS
wcrtomb.cpp
DEPENDS
+ libc.hdr.errno_macros
libc.hdr.types.char32_t
libc.hdr.types.size_t
libc.hdr.types.wchar_t
@@ -43,6 +44,7 @@ add_object_library(
SRCS
mbrtowc.cpp
DEPENDS
+ libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
diff --git a/libc/src/__support/wchar/mbrtowc.cpp b/libc/src/__support/wchar/mbrtowc.cpp
index 954c7458f4dfb..3b8f7666026c3 100644
--- a/libc/src/__support/wchar/mbrtowc.cpp
+++ b/libc/src/__support/wchar/mbrtowc.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/__support/wchar/mbrtowc.h"
+#include "hdr/errno_macros.h"
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
@@ -22,6 +23,8 @@ namespace internal {
ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
size_t n, mbstate *__restrict ps) {
CharacterConverter char_conv(ps);
+ if (!char_conv.isValidState())
+ return Error(EINVAL);
if (s == nullptr)
return 0;
size_t i = 0;
@@ -30,7 +33,7 @@ ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
int err = char_conv.push(static_cast<char8_t>(s[i]));
// Encoding error
if (err == -1)
- return Error(-1);
+ return Error(EILSEQ);
}
auto wc = char_conv.pop_utf32();
if (wc.has_value()) {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index 8ca3d17ad6ce1..a74a6f3ec34a6 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -11,6 +11,7 @@
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"
+#include "hdr/errno_macros.h"
#include "hdr/types/char32_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
@@ -26,12 +27,15 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
CharacterConverter cr(ps);
+ if (!cr.isValidState())
+ return Error(EINVAL);
+
if (s == nullptr)
- return Error(-1);
+ return Error(EILSEQ);
int status = cr.push(static_cast<char32_t>(wc));
if (status != 0)
- return Error(status);
+ return Error(EILSEQ);
size_t count = 0;
while (!cr.isEmpty()) {
diff --git a/libc/src/wchar/mbrtowc.cpp b/libc/src/wchar/mbrtowc.cpp
index cd429ab8d30e2..6ecf4828da9bf 100644
--- a/libc/src/wchar/mbrtowc.cpp
+++ b/libc/src/wchar/mbrtowc.cpp
@@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(size_t, mbrtowc,
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
- libc_errno = EILSEQ;
+ libc_errno = ret.error();
return -1;
}
return ret.value();
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 6d604a00599ee..3e9df0599431e 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -35,7 +35,7 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
: reinterpret_cast<internal::mbstate *>(ps));
if (!result.has_value()) {
- libc_errno = EILSEQ;
+ libc_errno = result.error();
return -1;
}
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index ddf8709a6a2a2..bf16fdd7f8c4d 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -31,10 +31,12 @@ add_libc_test(
mbrtowc_test.cpp
DEPENDS
libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbrtowc
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
+ libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
@@ -72,6 +74,8 @@ add_libc_test(
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
+ libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
diff --git a/libc/test/src/wchar/mbrtowc_test.cpp b/libc/test/src/wchar/mbrtowc_test.cpp
index 69dcf00fde207..5a14d8e25935c 100644
--- a/libc/test/src/wchar/mbrtowc_test.cpp
+++ b/libc/test/src/wchar/mbrtowc_test.cpp
@@ -9,11 +9,15 @@
#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
+#include "src/__support/wchar/mbstate.h"
#include "src/string/memset.h"
#include "src/wchar/mbrtowc.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
-TEST(LlvmLibcMBRToWC, OneByte) {
+using LlvmLibcMBRToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBRToWCTest, OneByte) {
const char *ch = "A";
wchar_t dest[2];
// Testing if it works with nullptr mbstate_t
@@ -21,13 +25,15 @@ TEST(LlvmLibcMBRToWC, OneByte) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
ASSERT_EQ(static_cast<char>(*dest), 'A');
ASSERT_EQ(static_cast<int>(n), 1);
+ ASSERT_ERRNO_SUCCESS();
// Should fail since we have not read enough
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 0, mb);
ASSERT_EQ(static_cast<int>(n), -2);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, TwoByte) {
+TEST_F(LlvmLibcMBRToWCTest, TwoByte) {
const char ch[2] = {static_cast<char>(0xC2),
static_cast<char>(0x8E)}; // � car symbol
wchar_t dest[2];
@@ -36,6 +42,7 @@ TEST(LlvmLibcMBRToWC, TwoByte) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
ASSERT_EQ(static_cast<int>(*dest), 142);
ASSERT_EQ(static_cast<int>(n), 2);
+ ASSERT_ERRNO_SUCCESS();
// Should fail since we have not read enough
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
@@ -44,9 +51,10 @@ TEST(LlvmLibcMBRToWC, TwoByte) {
n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 1, mb);
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(static_cast<int>(*dest), 142);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, ThreeByte) {
+TEST_F(LlvmLibcMBRToWCTest, ThreeByte) {
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
static_cast<char>(0x91)}; // ∑ sigma symbol
wchar_t dest[2];
@@ -55,17 +63,20 @@ TEST(LlvmLibcMBRToWC, ThreeByte) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
ASSERT_EQ(static_cast<int>(*dest), 8721);
ASSERT_EQ(static_cast<int>(n), 3);
+ ASSERT_ERRNO_SUCCESS();
// Should fail since we have not read enough
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
ASSERT_EQ(static_cast<int>(n), -2);
+ ASSERT_ERRNO_SUCCESS();
// Should pass after reading two more bytes
n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(*dest), 8721);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, FourByte) {
+TEST_F(LlvmLibcMBRToWCTest, FourByte) {
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
static_cast<char>(0xA4),
static_cast<char>(0xA1)}; // 🤡 clown emoji
@@ -75,27 +86,29 @@ TEST(LlvmLibcMBRToWC, FourByte) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
ASSERT_EQ(static_cast<int>(*dest), 129313);
ASSERT_EQ(static_cast<int>(n), 4);
-
+ ASSERT_ERRNO_SUCCESS();
// Should fail since we have not read enough
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
ASSERT_EQ(static_cast<int>(n), -2);
+ ASSERT_ERRNO_SUCCESS();
// Should pass after reading two more bytes
n = LIBC_NAMESPACE::mbrtowc(dest, ch + 2, 2, mb);
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(*dest), 129313);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, InvalidByte) {
+TEST_F(LlvmLibcMBRToWCTest, InvalidByte) {
const char ch[1] = {static_cast<char>(0x80)};
wchar_t dest[2];
mbstate_t *mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
ASSERT_EQ(static_cast<int>(n), -1);
- ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+ ASSERT_ERRNO_EQ(EILSEQ);
}
-TEST(LlvmLibcMBRToWC, InvalidMultiByte) {
+TEST_F(LlvmLibcMBRToWCTest, InvalidMultiByte) {
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
static_cast<char>(0x80),
static_cast<char>(0x00)}; // invalid sequence of bytes
@@ -105,18 +118,19 @@ TEST(LlvmLibcMBRToWC, InvalidMultiByte) {
// Trying to push all 4 should error
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
ASSERT_EQ(static_cast<int>(n), -1);
- ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+ ASSERT_ERRNO_EQ(EILSEQ);
// Trying to push just the first one should error
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
ASSERT_EQ(static_cast<int>(n), -1);
- ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+ ASSERT_ERRNO_EQ(EILSEQ);
// Trying to push the second and third should correspond to null wc
n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
ASSERT_EQ(static_cast<int>(n), 0);
ASSERT_TRUE(*dest == L'\0');
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, InvalidLastByte) {
+TEST_F(LlvmLibcMBRToWCTest, InvalidLastByte) {
// Last byte is invalid since it does not have correct starting sequence.
// 0xC0 --> 11000000 starting sequence should be 10xxxxxx
const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0x80),
@@ -127,10 +141,10 @@ TEST(LlvmLibcMBRToWC, InvalidLastByte) {
// Trying to push all 4 should error
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
ASSERT_EQ(static_cast<int>(n), -1);
- ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+ ASSERT_ERRNO_EQ(EILSEQ);
}
-TEST(LlvmLibcMBRToWC, ValidTwoByteWithExtraRead) {
+TEST_F(LlvmLibcMBRToWCTest, ValidTwoByteWithExtraRead) {
const char ch[3] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
static_cast<char>(0x80)};
wchar_t dest[2];
@@ -140,9 +154,10 @@ TEST(LlvmLibcMBRToWC, ValidTwoByteWithExtraRead) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(*dest), 142);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, TwoValidTwoBytes) {
+TEST_F(LlvmLibcMBRToWCTest, TwoValidTwoBytes) {
const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
static_cast<char>(0xC7), static_cast<char>(0x8C)};
wchar_t dest[2];
@@ -152,12 +167,14 @@ TEST(LlvmLibcMBRToWC, TwoValidTwoBytes) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(*dest), 142);
+ ASSERT_ERRNO_SUCCESS();
n = LIBC_NAMESPACE::mbrtowc(dest + 1, ch + 2, 2, mb);
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(*(dest + 1)), 460);
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcMBRToWC, NullString) {
+TEST_F(LlvmLibcMBRToWCTest, NullString) {
wchar_t dest[2] = {L'O', L'K'};
mbstate_t *mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
@@ -165,8 +182,24 @@ TEST(LlvmLibcMBRToWC, NullString) {
size_t n = LIBC_NAMESPACE::mbrtowc(dest, nullptr, 2, mb);
ASSERT_EQ(static_cast<int>(n), 0);
ASSERT_TRUE(dest[0] == L'O');
+ ASSERT_ERRNO_SUCCESS();
// reading a null terminator should return 0
const char *ch = "\0";
n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
ASSERT_EQ(static_cast<int>(n), 0);
+ ASSERT_ERRNO_SUCCESS();
+}
+
+TEST_F(LlvmLibcMBRToWCTest, InvalidMBState) {
+ const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
+ static_cast<char>(0xC7), static_cast<char>(0x8C)};
+ wchar_t dest[2] = {L'O', L'K'};
+ mbstate_t *mb;
+ LIBC_NAMESPACE::internal::mbstate inv;
+ inv.total_bytes = 6;
+ mb = reinterpret_cast<mbstate_t *>(&inv);
+ // invalid mbstate should error
+ size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
+ ASSERT_EQ(static_cast<int>(n), -1);
+ ASSERT_ERRNO_EQ(EINVAL);
}
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
index c06b39ae0143f..b29624e87f7af 100644
--- a/libc/test/src/wchar/wcrtomb_test.cpp
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -9,11 +9,15 @@
#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
+#include "src/__support/wchar/mbstate.h"
#include "src/string/memset.h"
#include "src/wchar/wcrtomb.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
-TEST(LlvmLibcWCRToMBTest, OneByte) {
+using LlvmLibcWCRToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcWCRToMBTest, OneByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'U';
@@ -21,9 +25,10 @@ TEST(LlvmLibcWCRToMBTest, OneByte) {
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(1));
ASSERT_EQ(mb[0], 'U');
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcWCRToMBTest, TwoByte) {
+TEST_F(LlvmLibcWCRToMBTest, TwoByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xff -> utf8: 0xc3 0xbf
@@ -33,9 +38,10 @@ TEST(LlvmLibcWCRToMBTest, TwoByte) {
ASSERT_EQ(cnt, static_cast<size_t>(2));
ASSERT_EQ(mb[0], static_cast<char>(0xc3));
ASSERT_EQ(mb[1], static_cast<char>(0xbf));
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcWCRToMBTest, ThreeByte) {
+TEST_F(LlvmLibcWCRToMBTest, ThreeByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
@@ -46,9 +52,10 @@ TEST(LlvmLibcWCRToMBTest, ThreeByte) {
ASSERT_EQ(mb[0], static_cast<char>(0xea));
ASSERT_EQ(mb[1], static_cast<char>(0xb0));
ASSERT_EQ(mb[2], static_cast<char>(0x95));
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcWCRToMBTest, FourByte) {
+TEST_F(LlvmLibcWCRToMBTest, FourByte) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
@@ -60,9 +67,10 @@ TEST(LlvmLibcWCRToMBTest, FourByte) {
ASSERT_EQ(mb[1], static_cast<char>(0x9f));
ASSERT_EQ(mb[2], static_cast<char>(0xa4));
ASSERT_EQ(mb[3], static_cast<char>(0xa1));
+ ASSERT_ERRNO_SUCCESS();
}
-TEST(LlvmLibcWCRToMBTest, NullString) {
+TEST_F(LlvmLibcWCRToMBTest, NullString) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'A';
@@ -70,24 +78,39 @@ TEST(LlvmLibcWCRToMBTest, NullString) {
// should be equivalent to the call wcrtomb(buf, L'\0', state)
size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
+ ASSERT_ERRNO_SUCCESS();
size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);
+ ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(cnt1, cnt2);
}
-TEST(LlvmLibcWCRToMBTest, NullState) {
+TEST_F(LlvmLibcWCRToMBTest, NullState) {
wchar_t wc = L'A';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
+ ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(cnt, static_cast<size_t>(1));
}
-TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
+TEST_F(LlvmLibcWCRToMBTest, InvalidWchar) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = 0x12ffff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
ASSERT_EQ(cnt, static_cast<size_t>(-1));
- ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcWCRToMBTest, InvalidMBState) {
+ mbstate_t *state;
+ LIBC_NAMESPACE::internal::mbstate inv;
+ inv.total_bytes = 6;
+ state = reinterpret_cast<mbstate_t *>(&inv);
+ wchar_t wc = L'A';
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, state);
+ ASSERT_EQ(cnt, static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EINVAL);
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Invalid mbstate_t should set errno to EINVAL. Changed Error return for the internal functions and added tests for the public functions. Co-authored-by: Sriya Pratipati <sriyap@google.com>
Invalid mbstate_t should set errno to EINVAL. Changed Error return for the internal functions and added tests for the public functions. Co-authored-by: Sriya Pratipati <sriyap@google.com>
Invalid mbstate_t should set errno to EINVAL.
Changed Error return for the internal functions and added tests for the public functions.