M78: Fix out of bound read in PDF_DecodeText().
Add unit test for PDF_DecodeText() with basic test cases and test cases
for handling escape sequences. Also use size_t consistently.
Bug: chromium:1001159
Change-Id: I6e8750ce90fdd6cf6e5b2eff35bcdc89f28fc6c5
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/60310
Reviewed-by: Chris Palmer <palmer@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
(cherry picked from commit 1d3ff52a37c75d29c7d08483a042ef33266a9baf)
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/60434
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdfapi/parser/fpdf_parser_decode.cpp b/core/fpdfapi/parser/fpdf_parser_decode.cpp
index 232b6ea..7074061 100644
--- a/core/fpdfapi/parser/fpdf_parser_decode.cpp
+++ b/core/fpdfapi/parser/fpdf_parser_decode.cpp
@@ -472,7 +472,7 @@
WideString result;
if (span.size() >= 2 && ((span[0] == 0xfe && span[1] == 0xff) ||
(span[0] == 0xff && span[1] == 0xfe))) {
- uint32_t max_chars = (span.size() - 2) / 2;
+ size_t max_chars = (span.size() - 2) / 2;
if (!max_chars)
return result;
@@ -481,7 +481,7 @@
span[0] == 0xfe ? GetUnicodeFromBigEndianBytes
: GetUnicodeFromLittleEndianBytes;
const uint8_t* unicode_str = &span[2];
- for (uint32_t i = 0; i < max_chars * 2; i += 2) {
+ for (size_t i = 0; i < max_chars * 2; i += 2) {
uint16_t unicode = GetUnicodeFromBytes(unicode_str + i);
// 0x001B is a begin/end marker for language metadata region that
@@ -492,7 +492,8 @@
unicode = GetUnicodeFromBytes(unicode_str + i);
if (unicode == 0x001B) {
i += 2;
- unicode = GetUnicodeFromBytes(unicode_str + i);
+ if (i < max_chars * 2)
+ unicode = GetUnicodeFromBytes(unicode_str + i);
break;
}
}
@@ -504,7 +505,7 @@
}
} else {
pdfium::span<wchar_t> dest_buf = result.GetBuffer(span.size());
- for (uint32_t i = 0; i < span.size(); ++i)
+ for (size_t i = 0; i < span.size(); ++i)
dest_buf[i] = PDFDocEncoding[span[i]];
dest_pos = span.size();
}
diff --git a/core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp b/core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp
index 171482e..0ddf9ed 100644
--- a/core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp
+++ b/core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp
@@ -181,6 +181,55 @@
}
}
+TEST(fpdf_parser_decode, DecodeText) {
+ const struct DecodeTestData {
+ const char* input;
+ size_t input_length;
+ const wchar_t* expected_output;
+ size_t expected_length;
+ } kTestData[] = {
+ // Empty src string.
+ {"", 0, L"", 0},
+ // ASCII text.
+ {"the quick\tfox", 13, L"the quick\tfox", 13},
+ // Unicode text.
+ {"\xFE\xFF\x03\x30\x03\x31", 6, L"\x0330\x0331", 2},
+ // More Unicode text.
+ {"\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
+ "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB",
+ 26,
+ L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
+ L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB",
+ 12},
+ // Unicode escape sequence. https://crbug.com/pdfium/182
+ {"\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37", 14,
+ L"\x0020\x5370\x5237", 3},
+ {"\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37\x29", 15,
+ L"\x0020\x5370\x5237", 3},
+ {"\xFE\xFF\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x00\x20\x53\x70\x52\x37", 16,
+ L"\x0020\x5370\x5237", 3},
+ {"\xFE\xFF\x00\x20\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x52\x37", 14,
+ L"\x0020\x5237", 2},
+ // https://crbug.com/1001159
+ {"\xFE\xFF\x00\x1B\x00\x1B", 6, L"", 0},
+ {"\xFE\xFF\x00\x1B\x00\x1B\x20", 7, L"", 0},
+ {"\xFE\xFF\x00\x1B\x00\x1B\x00\x20", 8, L"\x0020", 1},
+ };
+
+ for (const auto& test_case : kTestData) {
+ WideString output = PDF_DecodeText(
+ pdfium::make_span(reinterpret_cast<const uint8_t*>(test_case.input),
+ test_case.input_length));
+ ASSERT_EQ(test_case.expected_length, output.GetLength())
+ << "for case " << test_case.input;
+ const wchar_t* str_ptr = output.c_str();
+ for (size_t i = 0; i < test_case.expected_length; ++i) {
+ EXPECT_EQ(test_case.expected_output[i], str_ptr[i])
+ << "for case " << test_case.input << " char " << i;
+ }
+ }
+}
+
TEST(fpdf_parser_decode, EncodeText) {
const struct EncodeTestData {
const wchar_t* input;