Remove unsafe_buffer #pragma from fpdftext/ files -- Mark specific unsafe regions. -- Convert C-style array[2] to struct to avoid indexing. -- Use subspan() in place of pointer arithmetic. Bug: pdfium:2154 Change-Id: I4299e446d4ac720d3dade8f2b2c44be85d61cb3c Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/119150 Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com> Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp index 9c8acda..44e71b3 100644 --- a/core/fpdftext/cpdf_linkextract_unittest.cpp +++ b/core/fpdftext/cpdf_linkextract_unittest.cpp
@@ -2,13 +2,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#if defined(UNSAFE_BUFFERS_BUILD) -// TODO(crbug.com/pdfium/2153): resolve buffer safety issues. -#pragma allow_unsafe_buffers -#endif - #include "core/fpdftext/cpdf_linkextract.h" +#include <utility> + #include "testing/gtest/include/gtest/gtest.h" // Class to help test functions in CPDF_LinkExtract class. @@ -40,9 +37,13 @@ EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << input; } + // A struct of {input_string, expected_extracted_email_address}. + struct IOPair { + const wchar_t* input; + const wchar_t* expected_output; + }; // Check cases that can extract valid mail link. - // An array of {input_string, expected_extracted_email_address}. - const wchar_t* const kValidStrings[][2] = { + constexpr IOPair kValidStrings[] = { {L"peter@abc.d", L"peter@abc.d"}, {L"red.teddy.b@abc.com", L"red.teddy.b@abc.com"}, {L"abc_@gmail.com", L"abc_@gmail.com"}, // '_' is ok before '@'. @@ -56,11 +57,10 @@ {L"CAP.cap@Gmail.Com", L"CAP.cap@Gmail.Com"}, // Keep the original case. }; for (const auto& it : kValidStrings) { - const wchar_t* const input = it[0]; - WideString text_str(input); + WideString text_str(it.input); WideString expected_str(L"mailto:"); - expected_str += it[1]; - EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << input; + expected_str += it.expected_output; + EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << it.input; EXPECT_EQ(expected_str.c_str(), text_str); } }
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 9af236a..1e5d1c3 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp
@@ -4,11 +4,6 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#if defined(UNSAFE_BUFFERS_BUILD) -// TODO(crbug.com/pdfium/2153): resolve buffer safety issues. -#pragma allow_unsafe_buffers -#endif - #include "core/fpdftext/cpdf_textpage.h" #include <math.h> @@ -30,6 +25,7 @@ #include "core/fpdftext/unicodenormalizationdata.h" #include "core/fxcrt/check.h" #include "core/fxcrt/check_op.h" +#include "core/fxcrt/compiler_specific.h" #include "core/fxcrt/data_vector.h" #include "core/fxcrt/fx_bidi.h" #include "core/fxcrt/fx_extension.h" @@ -84,23 +80,28 @@ } DataVector<wchar_t> GetUnicodeNormalization(wchar_t wch) { - wch = wch & 0xFFFF; - wchar_t wFind = kUnicodeDataNormalization[wch]; - if (!wFind) - return DataVector<wchar_t>(1, wch); + // TODO(crbug.com/pdfium/2155): resolve safety issues. + UNSAFE_BUFFERS({ + wch = wch & 0xFFFF; + wchar_t wFind = kUnicodeDataNormalization[wch]; + if (!wFind) { + return DataVector<wchar_t>(1, wch); + } - if (wFind >= 0x8000) { - return DataVector<wchar_t>(1, - kUnicodeDataNormalizationMap1[wFind - 0x8000]); - } + if (wFind >= 0x8000) { + return DataVector<wchar_t>(1, + kUnicodeDataNormalizationMap1[wFind - 0x8000]); + } - wch = wFind & 0x0FFF; - wFind >>= 12; - const uint16_t* pMap = kUnicodeDataNormalizationMaps[wFind - 2] + wch; - if (wFind == 4) - wFind = static_cast<wchar_t>(*pMap++); + wch = wFind & 0x0FFF; + wFind >>= 12; + const uint16_t* pMap = kUnicodeDataNormalizationMaps[wFind - 2] + wch; + if (wFind == 4) { + wFind = static_cast<wchar_t>(*pMap++); + } - return DataVector<wchar_t>(pMap, pMap + wFind); + return DataVector<wchar_t>(pMap, pMap + wFind); + }); } float MaskPercentFilled(const std::vector<bool>& mask, @@ -946,7 +947,8 @@ pdfium::span<wchar_t> temp_span = m_TempTextBuf.GetWideSpan(); DCHECK(!temp_span.empty()); if (iBufStartAppend < temp_span.size()) { - std::reverse(temp_span.begin() + iBufStartAppend, temp_span.end()); + pdfium::span<wchar_t> reverse_span = temp_span.subspan(iBufStartAppend); + std::reverse(reverse_span.begin(), reverse_span.end()); } }
diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp index 83a5c83..4bf9f63 100644 --- a/core/fpdftext/cpdf_textpagefind.cpp +++ b/core/fpdftext/cpdf_textpagefind.cpp
@@ -4,11 +4,6 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#if defined(UNSAFE_BUFFERS_BUILD) -// TODO(crbug.com/pdfium/2153): resolve buffer safety issues. -#pragma allow_unsafe_buffers -#endif - #include "core/fpdftext/cpdf_textpagefind.h" #include <wchar.h> @@ -100,23 +95,29 @@ int iSubString) { DCHECK(lpszFullString); - while (iSubString--) { - lpszFullString = wcschr(lpszFullString, L' '); - if (!lpszFullString) - return std::nullopt; + // TODO(crbug.com/pdfium/2155): resolve safety issues. + UNSAFE_BUFFERS({ + while (iSubString--) { + lpszFullString = wcschr(lpszFullString, L' '); + if (!lpszFullString) { + return std::nullopt; + } - lpszFullString++; - while (*lpszFullString == L' ') lpszFullString++; - } + while (*lpszFullString == L' ') { + lpszFullString++; + } + } - const wchar_t* lpchEnd = wcschr(lpszFullString, L' '); - int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString) - : static_cast<int>(wcslen(lpszFullString)); - if (nLen < 0) - return std::nullopt; + const wchar_t* lpchEnd = wcschr(lpszFullString, L' '); + int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString) + : static_cast<int>(wcslen(lpszFullString)); + if (nLen < 0) { + return std::nullopt; + } - return WideString(lpszFullString, static_cast<size_t>(nLen)); + return WideString(lpszFullString, static_cast<size_t>(nLen)); + }); } std::vector<WideString> ExtractFindWhat(const WideString& findwhat) {