Make WideString's FromUTF16LE(), FromUTF16BE() take bytes, not wchar_t
This allows callers to pass-in UTF-16 data that's possibly not
2-byte aligned, and seems to be what most of the callers want too.
With this, we'll be able to use this in PDF_DecodeText.
No intended behavior change.
Change-Id: I2095c49b98646a33b21342fe4289c0436f8447b9
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/113950
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Nico Weber <thakis@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Auto-Submit: Nico Weber <thakis@chromium.org>
diff --git a/testing/BUILD.gn b/testing/BUILD.gn
index dbb11a9..d61f8f3 100644
--- a/testing/BUILD.gn
+++ b/testing/BUILD.gn
@@ -44,6 +44,7 @@
"../core/fdrm",
"../core/fxcrt",
"../core/fxge",
+ "../fpdfsdk",
"image_diff",
]
configs += [
diff --git a/testing/fuzzers/pdf_bidi_fuzzer.cc b/testing/fuzzers/pdf_bidi_fuzzer.cc
index ffa6558..2871961 100644
--- a/testing/fuzzers/pdf_bidi_fuzzer.cc
+++ b/testing/fuzzers/pdf_bidi_fuzzer.cc
@@ -27,9 +27,7 @@
rtf_break.SetFont(CFGAS_GEFont::LoadFont(std::move(font)));
rtf_break.SetFontSize(12);
- WideString input =
- WideString::FromUTF16LE(reinterpret_cast<const unsigned short*>(data),
- size / sizeof(unsigned short));
+ WideString input = WideString::FromUTF16LE({data, size});
for (wchar_t ch : input)
rtf_break.AppendChar(ch);
diff --git a/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc b/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
index 35afec9..4df0130 100644
--- a/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
+++ b/testing/fuzzers/pdf_cfx_barcode_fuzzer.cc
@@ -24,8 +24,7 @@
barcode->SetHeight(298);
barcode->SetWidth(418);
- WideString content = WideString::FromUTF16LE(
- reinterpret_cast<const uint16_t*>(data), size / sizeof(uint16_t));
+ WideString content = WideString::FromUTF16LE({data, size});
if (!barcode->Encode(content.AsStringView()))
return 0;
diff --git a/testing/fuzzers/pdf_cjs_util_fuzzer.cc b/testing/fuzzers/pdf_cjs_util_fuzzer.cc
index 3885c7b..51d6b68 100644
--- a/testing/fuzzers/pdf_cjs_util_fuzzer.cc
+++ b/testing/fuzzers/pdf_cjs_util_fuzzer.cc
@@ -6,18 +6,15 @@
#include "fxjs/cjs_util.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- auto* short_data = reinterpret_cast<const unsigned short*>(data);
- size_t short_size = size / sizeof(unsigned short);
- if (short_size > 1) {
- WideString input = WideString::FromUTF16LE(short_data, short_size);
+ if (size > 2) {
+ WideString input = WideString::FromUTF16LE({data, size});
CJS_Util::ParseDataType(&input);
}
- if (short_size > 2) {
- size_t short_len1 = short_size / 2;
- size_t short_len2 = short_size - short_len1;
- WideString input1 = WideString::FromUTF16LE(short_data, short_len1);
- WideString input2 =
- WideString::FromUTF16LE(short_data + short_len1, short_len2);
+ if (size > 4) {
+ size_t len1 = size / 2;
+ size_t len2 = size - len1;
+ WideString input1 = WideString::FromUTF16LE({data, len1});
+ WideString input2 = WideString::FromUTF16LE({data + len1, len2});
CJS_Util::StringPrintx(input1, input2);
}
return 0;
diff --git a/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc b/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
index d98fffd..0e198c7 100644
--- a/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
+++ b/testing/fuzzers/pdf_fx_date_helpers_fuzzer.cc
@@ -6,15 +6,12 @@
#include "fxjs/fx_date_helpers.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- auto* short_data = reinterpret_cast<const unsigned short*>(data);
- size_t short_size = size / sizeof(unsigned short);
- if (short_size > 2 && short_size < 8192) {
+ if (size > 4 && size < 16384) {
double ignore;
- size_t short_len1 = short_size / 2;
- size_t short_len2 = short_size - short_len1;
- WideString input1 = WideString::FromUTF16LE(short_data, short_len1);
- WideString input2 =
- WideString::FromUTF16LE(short_data + short_len1, short_len2);
+ size_t len1 = size / 2;
+ size_t len2 = size - len1;
+ WideString input1 = WideString::FromUTF16LE({data, len1});
+ WideString input2 = WideString::FromUTF16LE({data + len1, len2});
FX_ParseDateUsingFormat(input1, input2, &ignore);
}
return 0;
diff --git a/testing/fuzzers/pdf_nametree_fuzzer.cc b/testing/fuzzers/pdf_nametree_fuzzer.cc
index 43c10c7..1058eec 100644
--- a/testing/fuzzers/pdf_nametree_fuzzer.cc
+++ b/testing/fuzzers/pdf_nametree_fuzzer.cc
@@ -28,8 +28,7 @@
constexpr size_t kMaxNameLen = 10;
std::string str = data_provider->ConsumeRandomLengthString(kMaxNameLen);
names.push_back(WideString::FromUTF16LE(
- reinterpret_cast<const unsigned short*>(str.data()),
- str.size() / sizeof(unsigned short)));
+ {reinterpret_cast<const uint8_t*>(str.data()), str.size()}));
}
return names;
}
diff --git a/testing/fx_string_testhelpers.cpp b/testing/fx_string_testhelpers.cpp
index 09f7653..399335c 100644
--- a/testing/fx_string_testhelpers.cpp
+++ b/testing/fx_string_testhelpers.cpp
@@ -10,6 +10,7 @@
#include "core/fxcrt/cfx_datetime.h"
#include "core/fxcrt/fx_string.h"
+#include "fpdfsdk/cpdfsdk_helpers.h"
#include "third_party/base/check_op.h"
#include "third_party/base/containers/span.h"
@@ -38,8 +39,7 @@
}
std::string GetPlatformString(FPDF_WIDESTRING wstr) {
- WideString wide_string =
- WideString::FromUTF16LE(wstr, WideString::WStringLength(wstr));
+ WideString wide_string = WideStringFromFPDFWideString(wstr);
return std::string(wide_string.ToUTF8().c_str());
}