Add Widestring::{To,From,Is}ASCII()
Straight widening/narrowing of 7-bit values with no funny business.
Precursor to converting some L"" literals containing only single-byte
values.
Change-Id: I811a87c92d806744dc4bfbe23eb6aa3e27057103
Reviewed-on: https://pdfium-review.googlesource.com/c/45792
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index 234270e..d699b42 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -635,7 +635,22 @@
return m_pData ? m_pData->m_nRefs : 0;
}
-// static
+bool WideString::IsASCII() const {
+ for (wchar_t wc : *this) {
+ if (wc <= 0 || wc > 127) // Questionable signedness of wchar_t.
+ return false;
+ }
+ return true;
+}
+
+ByteString WideString::ToASCII() const {
+ ByteString result;
+ result.Reserve(GetLength());
+ for (wchar_t wc : *this)
+ result.InsertAtBack(static_cast<char>(wc & 0x7f));
+ return result;
+}
+
ByteString WideString::ToDefANSI() const {
int src_len = GetLength();
int dest_len = FXSYS_WideCharToMultiByte(
@@ -864,6 +879,15 @@
}
// static
+WideString WideString::FromASCII(const ByteStringView& bstr) {
+ WideString result;
+ result.Reserve(bstr.GetLength());
+ for (char c : bstr)
+ result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
+ return result;
+}
+
+// static
WideString WideString::FromDefANSI(const ByteStringView& bstr) {
int src_len = bstr.GetLength();
int dest_len = FXSYS_MultiByteToWideChar(
diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h
index 70339ff..87e886c 100644
--- a/core/fxcrt/widestring.h
+++ b/core/fxcrt/widestring.h
@@ -60,6 +60,7 @@
~WideString();
+ static WideString FromASCII(const ByteStringView& str) WARN_UNUSED_RESULT;
static WideString FromDefANSI(const ByteStringView& str) WARN_UNUSED_RESULT;
static WideString FromUTF8(const ByteStringView& str) WARN_UNUSED_RESULT;
static WideString FromUTF16LE(const unsigned short* str,
@@ -191,6 +192,8 @@
size_t Replace(const WideStringView& pOld, const WideStringView& pNew);
size_t Remove(wchar_t ch);
+ bool IsASCII() const;
+ ByteString ToASCII() const;
ByteString ToDefANSI() const;
ByteString ToUTF8() const;
diff --git a/core/fxcrt/widestring_unittest.cpp b/core/fxcrt/widestring_unittest.cpp
index 2d8a676..5efb556 100644
--- a/core/fxcrt/widestring_unittest.cpp
+++ b/core/fxcrt/widestring_unittest.cpp
@@ -1001,6 +1001,29 @@
}
}
+TEST(WideString, IsASCII) {
+ EXPECT_TRUE(WideString(L"xy\u007fz").IsASCII());
+ EXPECT_FALSE(WideString(L"xy\u0080z").IsASCII());
+ EXPECT_FALSE(WideString(L"xy\u2041z").IsASCII());
+}
+
+TEST(WideString, ToASCII) {
+ const char* kResult =
+ "x"
+ "\x02"
+ "\x7f"
+ "\x22"
+ "\x0c"
+ "y";
+ EXPECT_EQ(kResult, WideString(L"x"
+ L"\u0082"
+ L"\u00ff"
+ L"\u0122"
+ L"\u208c"
+ L"y")
+ .ToASCII());
+}
+
TEST(WideString, ToDefANSI) {
EXPECT_EQ("", WideString().ToDefANSI());
#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
@@ -1027,7 +1050,20 @@
.ToDefANSI());
}
-TEST(WideString, FromLocal) {
+TEST(WideString, FromASCII) {
+ EXPECT_EQ(L"", WideString::FromDefANSI(ByteStringView()));
+ const wchar_t* kResult =
+ L"x"
+ L"\u0002"
+ L"\u007f"
+ L"y";
+ EXPECT_EQ(kResult, WideString::FromASCII("x"
+ "\x82"
+ "\xff"
+ "y"));
+}
+
+TEST(WideString, FromDefANSI) {
EXPECT_EQ(L"", WideString::FromDefANSI(ByteStringView()));
#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
const wchar_t* kResult =