Make WideString's FromUTF16LE(), FromUTF16BE() take bytes, not wchar_t
This allows callers to pass-in UTF-16 data that's possibly not
2-byte aligned, and seems to be what most of the callers want too.
With this, we'll be able to use this in PDF_DecodeText.
No intended behavior change.
Change-Id: I2095c49b98646a33b21342fe4289c0436f8447b9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/113950
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Nico Weber <thakis@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Auto-Submit: Nico Weber <thakis@chromium.org>
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index 9d0b956..2ad7b5d 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -957,36 +957,39 @@
}
// static
-WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
- if (!wstr || wlen == 0)
+WideString WideString::FromUTF16LE(pdfium::span<const uint8_t> data) {
+ if (data.empty()) {
return WideString();
+ }
WideString result;
+ size_t length = 0;
{
// Span's lifetime must end before ReleaseBuffer() below.
- pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
- for (size_t i = 0; i < wlen; i++)
- buf[i] = wstr[i];
+ pdfium::span<wchar_t> buf = result.GetBuffer(data.size() / 2);
+ for (size_t i = 0; i < data.size() - 1; i += 2) {
+ buf[length++] = data[i] | data[i + 1] << 8;
+ }
}
- result.ReleaseBuffer(wlen);
+ result.ReleaseBuffer(length);
return result;
}
-WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
- if (!wstr || wlen == 0)
+WideString WideString::FromUTF16BE(pdfium::span<const uint8_t> data) {
+ if (data.empty()) {
return WideString();
+ }
WideString result;
+ size_t length = 0;
{
// Span's lifetime must end before ReleaseBuffer() below.
- pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
- for (size_t i = 0; i < wlen; i++) {
- auto wch = wstr[i];
- wch = (wch >> 8) | (wch << 8);
- buf[i] = wch;
+ pdfium::span<wchar_t> buf = result.GetBuffer(data.size() / 2);
+ for (size_t i = 0; i < data.size() - 1; i += 2) {
+ buf[length++] = data[i] << 8 | data[i + 1];
}
}
- result.ReleaseBuffer(wlen);
+ result.ReleaseBuffer(length);
return result;
}
diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h
index a01e960..0b8d7aa 100644
--- a/core/fxcrt/widestring.h
+++ b/core/fxcrt/widestring.h
@@ -71,10 +71,8 @@
[[nodiscard]] static WideString FromLatin1(ByteStringView str);
[[nodiscard]] static WideString FromDefANSI(ByteStringView str);
[[nodiscard]] static WideString FromUTF8(ByteStringView str);
- [[nodiscard]] static WideString FromUTF16LE(const unsigned short* str,
- size_t len);
- [[nodiscard]] static WideString FromUTF16BE(const unsigned short* wstr,
- size_t wlen);
+ [[nodiscard]] static WideString FromUTF16LE(pdfium::span<const uint8_t> data);
+ [[nodiscard]] static WideString FromUTF16BE(pdfium::span<const uint8_t> data);
[[nodiscard]] static size_t WStringLength(const unsigned short* str);
diff --git a/core/fxge/fx_font.cpp b/core/fxge/fx_font.cpp
index a3bf344..f0f3576 100644
--- a/core/fxge/fx_font.cpp
+++ b/core/fxge/fx_font.cpp
@@ -117,11 +117,7 @@
return ByteString();
}
- pdfium::span<const uint8_t> raw_span = utf16_be.raw_span();
- return WideString::FromUTF16BE(
- reinterpret_cast<const uint16_t*>(raw_span.data()),
- raw_span.size() / 2)
- .ToUTF8();
+ return WideString::FromUTF16BE(utf16_be.raw_span()).ToUTF8();
}
}
}
diff --git a/core/fxge/win32/cwin32_platform.cpp b/core/fxge/win32/cwin32_platform.cpp
index 0d08822..461a915 100644
--- a/core/fxge/win32/cwin32_platform.cpp
+++ b/core/fxge/win32/cwin32_platform.cpp
@@ -25,11 +25,11 @@
struct Variant {
const char* m_pFaceName;
- const char* m_pVariantName; // Note: UTF-16LE terminator required.
+ pdfium::span<const char> m_pVariantName;
};
constexpr Variant kVariantNames[] = {
- {"DFKai-SB", "\x19\x6A\x77\x69\xD4\x9A\x00\x00"},
+ {"DFKai-SB", pdfium::make_span("\x19\x6A\x77\x69\xD4\x9A")},
};
struct Substs {
@@ -375,10 +375,8 @@
if (new_face != variant.m_pFaceName)
continue;
- const auto* pName =
- reinterpret_cast<const unsigned short*>(variant.m_pVariantName);
- size_t len = WideString::WStringLength(pName);
- WideString wsName = WideString::FromUTF16LE(pName, len);
+ WideString wsName =
+ WideString::FromUTF16LE(pdfium::as_bytes(variant.m_pVariantName));
if (wsFace == wsName)
return hFont;
}
diff --git a/fpdfsdk/cpdfsdk_formfillenvironment.cpp b/fpdfsdk/cpdfsdk_formfillenvironment.cpp
index 075e43e..a063f16 100644
--- a/fpdfsdk/cpdfsdk_formfillenvironment.cpp
+++ b/fpdfsdk/cpdfsdk_formfillenvironment.cpp
@@ -154,8 +154,8 @@
if (nActualLen <= 0 || nActualLen > nRequiredLen)
return WideString();
- return WideString::FromUTF16LE(reinterpret_cast<uint16_t*>(pBuff.data()),
- nActualLen / sizeof(uint16_t));
+ return WideString::FromUTF16LE(
+ {pBuff.data(), static_cast<size_t>(nActualLen)});
#else // PDF_ENABLE_XFA
return WideString();
#endif // PDF_ENABLE_XFA
@@ -176,8 +176,8 @@
if (nActualLen <= 0 || nActualLen > nRequiredLen)
return WideString();
- return WideString::FromUTF16LE(reinterpret_cast<uint16_t*>(pBuff.data()),
- nActualLen / sizeof(uint16_t));
+ return WideString::FromUTF16LE(
+ {pBuff.data(), static_cast<size_t>(nActualLen)});
#else // PDF_ENABLE_XFA
return WideString();
#endif // PDF_ENABLE_XFA
@@ -570,8 +570,8 @@
AsFPDFWideString(&bsHeader), &response);
WideString wsRet =
- WideString::FromUTF16LE(reinterpret_cast<FPDF_WIDESTRING>(response.str),
- response.len / sizeof(FPDF_WCHAR));
+ WideString::FromUTF16LE({reinterpret_cast<const uint8_t*>(response.str),
+ static_cast<size_t>(response.len)});
FPDF_BStr_Clear(&response);
return wsRet;
diff --git a/fpdfsdk/cpdfsdk_helpers.cpp b/fpdfsdk/cpdfsdk_helpers.cpp
index c548420..4d5020f 100644
--- a/fpdfsdk/cpdfsdk_helpers.cpp
+++ b/fpdfsdk/cpdfsdk_helpers.cpp
@@ -210,8 +210,8 @@
}
WideString WideStringFromFPDFWideString(FPDF_WIDESTRING wide_string) {
- return WideString::FromUTF16LE(wide_string,
- WideString::WStringLength(wide_string));
+ return WideString::FromUTF16LE({reinterpret_cast<const uint8_t*>(wide_string),
+ WideString::WStringLength(wide_string) * 2});
}
#ifdef PDF_ENABLE_XFA
diff --git a/fpdfsdk/fpdf_formfill_embeddertest.cpp b/fpdfsdk/fpdf_formfill_embeddertest.cpp
index 71bede8..c35c785 100644
--- a/fpdfsdk/fpdf_formfill_embeddertest.cpp
+++ b/fpdfsdk/fpdf_formfill_embeddertest.cpp
@@ -133,12 +133,12 @@
ASSERT_NE(actual_len, 0U);
ASSERT_LT(actual_len, 1000U);
- std::vector<unsigned short> buf(actual_len);
+ std::vector<uint8_t> buf(actual_len);
ASSERT_EQ(actual_len, FORM_GetSelectedText(form_handle(), page_, buf.data(),
actual_len));
- int num_chars = (actual_len / sizeof(unsigned short)) - 1;
- EXPECT_EQ(expected_string, WideString::FromUTF16LE(buf.data(), num_chars));
+ EXPECT_EQ(expected_string,
+ WideString::FromUTF16LE({buf.data(), actual_len - 2}));
}
void FocusOnPoint(const CFX_PointF& point) {
@@ -151,12 +151,12 @@
ASSERT_NE(actual_len, 0U);
ASSERT_LT(actual_len, 1000U);
- std::vector<unsigned short> buf(actual_len);
+ std::vector<uint8_t> buf(actual_len);
ASSERT_EQ(actual_len, FORM_GetFocusedText(form_handle(), page_, buf.data(),
actual_len));
- int num_chars = (actual_len / sizeof(unsigned short)) - 1;
- EXPECT_EQ(expected_string, WideString::FromUTF16LE(buf.data(), num_chars));
+ EXPECT_EQ(expected_string,
+ WideString::FromUTF16LE({buf.data(), actual_len - 2}));
}
void CheckCanUndo(bool expected_result) {
diff --git a/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp b/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
index 1271fbd..75f9e06 100644
--- a/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
+++ b/fpdfsdk/fpdfxfa/cpdfxfa_context.cpp
@@ -364,17 +364,16 @@
return WideString();
constexpr int kMaxWideChars = 1024;
- FixedZeroedDataVector<uint16_t> buffer(kMaxWideChars);
- pdfium::span<uint16_t> buffer_span = buffer.writable_span();
+ constexpr int kMaxBytes = kMaxWideChars * sizeof(uint16_t);
+ FixedZeroedDataVector<uint8_t> buffer(kMaxBytes);
+ pdfium::span<uint8_t> buffer_span = buffer.writable_span();
int byte_length = m_pFormFillEnv->JS_appResponse(
- wsQuestion, wsTitle, wsDefaultAnswer, WideString(), bMark,
- pdfium::as_writable_bytes(buffer_span));
+ wsQuestion, wsTitle, wsDefaultAnswer, WideString(), bMark, buffer_span);
if (byte_length <= 0)
return WideString();
- buffer_span = buffer_span.first(
- std::min<size_t>(kMaxWideChars, byte_length / sizeof(uint16_t)));
- return WideString::FromUTF16LE(buffer_span.data(), buffer_span.size());
+ buffer_span = buffer_span.first(std::min<size_t>(kMaxBytes, byte_length));
+ return WideString::FromUTF16LE(buffer_span);
}
RetainPtr<IFX_SeekableReadStream> CPDFXFA_Context::DownloadURL(
diff --git a/fpdfsdk/fpdfxfa/cpdfxfa_docenvironment.cpp b/fpdfsdk/fpdfxfa/cpdfxfa_docenvironment.cpp
index ff93008..8800a0c 100644
--- a/fpdfsdk/fpdfxfa/cpdfxfa_docenvironment.cpp
+++ b/fpdfsdk/fpdfxfa/cpdfxfa_docenvironment.cpp
@@ -503,9 +503,7 @@
continue;
}
if (i == pArray->size() - 1) {
- WideString wPath = WideString::FromUTF16LE(
- reinterpret_cast<const unsigned short*>(bs.c_str()),
- bs.GetLength() / sizeof(unsigned short));
+ WideString wPath = WideString::FromUTF16LE(bs.raw_span());
ByteString bPath = wPath.ToUTF8();
static const char kFormat[] =
"\n<pdf href=\"%s\" xmlns=\"http://ns.adobe.com/xdp/pdf/\"/>";
diff --git a/fxjs/cjs_app.cpp b/fxjs/cjs_app.cpp
index d00f8d0..d3dd81d 100644
--- a/fxjs/cjs_app.cpp
+++ b/fxjs/cjs_app.cpp
@@ -545,19 +545,16 @@
constexpr int kMaxWideChars = 1024;
constexpr int kMaxBytes = kMaxWideChars * sizeof(uint16_t);
- FixedZeroedDataVector<uint16_t> buffer(kMaxWideChars);
- pdfium::span<uint16_t> buffer_span = buffer.writable_span();
+ FixedZeroedDataVector<uint8_t> buffer(kMaxBytes);
+ pdfium::span<uint8_t> buffer_span = buffer.writable_span();
int byte_length = pRuntime->GetFormFillEnv()->JS_appResponse(
- swQuestion, swTitle, swDefault, swLabel, bPassword,
- pdfium::as_writable_bytes(buffer_span));
+ swQuestion, swTitle, swDefault, swLabel, bPassword, buffer_span);
if (byte_length < 0 || byte_length > kMaxBytes)
return CJS_Result::Failure(JSMessage::kParamTooLongError);
- buffer_span = buffer_span.first(
- std::min<size_t>(kMaxWideChars, byte_length / sizeof(uint16_t)));
- return CJS_Result::Success(pRuntime->NewString(
- WideString::FromUTF16LE(buffer_span.data(), buffer_span.size())
- .AsStringView()));
+ buffer_span = buffer_span.first(std::min<size_t>(kMaxBytes, byte_length));
+ return CJS_Result::Success(
+ pRuntime->NewString(WideString::FromUTF16LE(buffer_span).AsStringView()));
}
CJS_Result CJS_App::get_media(CJS_Runtime* pRuntime) {
diff --git a/testing/BUILD.gn b/testing/BUILD.gn
index dbb11a9..d61f8f3 100644
--- a/testing/BUILD.gn
+++ b/testing/BUILD.gn
@@ -44,6 +44,7 @@
"../core/fdrm",
"../core/fxcrt",
"../core/fxge",
+ "../fpdfsdk",
"image_diff",
]
configs += [
diff --git a/testing/fuzzers/pdf_bidi_fuzzer.cc b/testing/fuzzers/pdf_bidi_fuzzer.cc
index ffa6558..2871961 100644
--- a/testing/fuzzers/pdf_bidi_fuzzer.cc
+++ b/testing/fuzzers/pdf_bidi_fuzzer.cc
@@ -27,9 +27,7 @@
rtf_break.SetFont(CFGAS_GEFont::LoadFont(std::move(font)));
rtf_break.SetFontSize(12);
- WideString input =
- WideString::FromUTF16LE(reinterpret_cast<const unsigned short*>(data),
- size / sizeof(unsigned short));
+ WideString input = WideString::FromUTF16LE({data, size});
for (wchar_t ch : input)
rtf_break.AppendChar(ch);
diff --git a/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc b/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
index 35afec9..4df0130 100644
--- a/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
+++ b/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
@@ -24,8 +24,7 @@
barcode->SetHeight(298);
barcode->SetWidth(418);
- WideString content = WideString::FromUTF16LE(
- reinterpret_cast<const uint16_t*>(data), size / sizeof(uint16_t));
+ WideString content = WideString::FromUTF16LE({data, size});
if (!barcode->Encode(content.AsStringView()))
return 0;
diff --git a/testing/fuzzers/pdf_cjs_util_fuzzer.cc b/testing/fuzzers/pdf_cjs_util_fuzzer.cc
index 3885c7b..51d6b68 100644
--- a/testing/fuzzers/pdf_cjs_util_fuzzer.cc
+++ b/testing/fuzzers/pdf_cjs_util_fuzzer.cc
@@ -6,18 +6,15 @@
#include "fxjs/cjs_util.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- auto* short_data = reinterpret_cast<const unsigned short*>(data);
- size_t short_size = size / sizeof(unsigned short);
- if (short_size > 1) {
- WideString input = WideString::FromUTF16LE(short_data, short_size);
+ if (size > 2) {
+ WideString input = WideString::FromUTF16LE({data, size});
CJS_Util::ParseDataType(&input);
}
- if (short_size > 2) {
- size_t short_len1 = short_size / 2;
- size_t short_len2 = short_size - short_len1;
- WideString input1 = WideString::FromUTF16LE(short_data, short_len1);
- WideString input2 =
- WideString::FromUTF16LE(short_data + short_len1, short_len2);
+ if (size > 4) {
+ size_t len1 = size / 2;
+ size_t len2 = size - len1;
+ WideString input1 = WideString::FromUTF16LE({data, len1});
+ WideString input2 = WideString::FromUTF16LE({data + len1, len2});
CJS_Util::StringPrintx(input1, input2);
}
return 0;
diff --git a/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc b/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
index d98fffd..0e198c7 100644
--- a/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
+++ b/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
@@ -6,15 +6,12 @@
#include "fxjs/fx_date_helpers.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- auto* short_data = reinterpret_cast<const unsigned short*>(data);
- size_t short_size = size / sizeof(unsigned short);
- if (short_size > 2 && short_size < 8192) {
+ if (size > 4 && size < 16384) {
double ignore;
- size_t short_len1 = short_size / 2;
- size_t short_len2 = short_size - short_len1;
- WideString input1 = WideString::FromUTF16LE(short_data, short_len1);
- WideString input2 =
- WideString::FromUTF16LE(short_data + short_len1, short_len2);
+ size_t len1 = size / 2;
+ size_t len2 = size - len1;
+ WideString input1 = WideString::FromUTF16LE({data, len1});
+ WideString input2 = WideString::FromUTF16LE({data + len1, len2});
FX_ParseDateUsingFormat(input1, input2, &ignore);
}
return 0;
diff --git a/testing/fuzzers/pdf_nametree_fuzzer.cc b/testing/fuzzers/pdf_nametree_fuzzer.cc
index 43c10c7..1058eec 100644
--- a/testing/fuzzers/pdf_nametree_fuzzer.cc
+++ b/testing/fuzzers/pdf_nametree_fuzzer.cc
@@ -28,8 +28,7 @@
constexpr size_t kMaxNameLen = 10;
std::string str = data_provider->ConsumeRandomLengthString(kMaxNameLen);
names.push_back(WideString::FromUTF16LE(
- reinterpret_cast<const unsigned short*>(str.data()),
- str.size() / sizeof(unsigned short)));
+ {reinterpret_cast<const uint8_t*>(str.data()), str.size()}));
}
return names;
}
diff --git a/testing/fx_string_testhelpers.cpp b/testing/fx_string_testhelpers.cpp
index 09f7653..399335c 100644
--- a/testing/fx_string_testhelpers.cpp
+++ b/testing/fx_string_testhelpers.cpp
@@ -10,6 +10,7 @@
#include "core/fxcrt/cfx_datetime.h"
#include "core/fxcrt/fx_string.h"
+#include "fpdfsdk/cpdfsdk_helpers.h"
#include "third_party/base/check_op.h"
#include "third_party/base/containers/span.h"
@@ -38,8 +39,7 @@
}
std::string GetPlatformString(FPDF_WIDESTRING wstr) {
- WideString wide_string =
- WideString::FromUTF16LE(wstr, WideString::WStringLength(wstr));
+ WideString wide_string = WideStringFromFPDFWideString(wstr);
return std::string(wide_string.ToUTF8().c_str());
}
diff --git a/xfa/fwl/cfwl_edit_embeddertest.cpp b/xfa/fwl/cfwl_edit_embeddertest.cpp
index 7496a24..25f6dd2 100644
--- a/xfa/fwl/cfwl_edit_embeddertest.cpp
+++ b/xfa/fwl/cfwl_edit_embeddertest.cpp
@@ -78,9 +78,9 @@
// 12 == (2 * strlen(defgh)) + 2 (for \0\0)
EXPECT_EQ(12UL, FORM_GetSelectedText(form_handle(), page(), nullptr, 0));
- unsigned short buf[128];
+ uint8_t buf[128];
unsigned long len = FORM_GetSelectedText(form_handle(), page(), &buf, 128);
- EXPECT_STREQ(L"defgh", WideString::FromUTF16LE(buf, len).c_str());
+ EXPECT_STREQ(L"defgh", WideString::FromUTF16LE({buf, len}).c_str());
}
TEST_F(CFWLEditEmbedderTest, DragMouseSelection) {
@@ -101,9 +101,9 @@
// 12 == (2 * strlen(defgh)) + 2 (for \0\0)
EXPECT_EQ(12UL, FORM_GetSelectedText(form_handle(), page(), nullptr, 0));
- unsigned short buf[128];
+ uint8_t buf[128];
unsigned long len = FORM_GetSelectedText(form_handle(), page(), &buf, 128);
- EXPECT_STREQ(L"defgh", WideString::FromUTF16LE(buf, len).c_str());
+ EXPECT_STREQ(L"defgh", WideString::FromUTF16LE({buf, len}).c_str());
// TODO(hnakashima): This is incorrect. Visually 'abcdefgh' are selected.
const char kDraggedMD5[] = "f131526c8edd04e44de17b2647ec54c8";