Remove unsafe_buffer #pragma from fpdftext/ files -- Mark specific unsafe regions. -- Convert C-style array[2] to struct to avoid indexing. -- Use subspan() in place of pointer arithmetic. Bug: pdfium:2154 Change-Id: I4299e446d4ac720d3dade8f2b2c44be85d61cb3c Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/119150 Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com> Commit-Queue: Tom Sepez <tsepez@chromium.org>

commit: 547d96118caf46c07b3e520a971992b6909696bd [log] [tgz]
author: Tom Sepez <tsepez@chromium.org> Wed May 15 02:12:19 2024 +0000
committer: Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed May 15 02:12:19 2024 +0000
tree: 9839da59572ac96eba5106a884194d92f89ec007
parent: 7024bccc57833205f3f2605c5703125798460ac0 [diff]
diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp
index 9c8acda..44e71b3 100644
--- a/core/fpdftext/cpdf_linkextract_unittest.cpp
+++ b/core/fpdftext/cpdf_linkextract_unittest.cpp

@@ -2,13 +2,10 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#if defined(UNSAFE_BUFFERS_BUILD)
-// TODO(crbug.com/pdfium/2153): resolve buffer safety issues.
-#pragma allow_unsafe_buffers
-#endif
-
 #include "core/fpdftext/cpdf_linkextract.h"
 
+#include <utility>
+
 #include "testing/gtest/include/gtest/gtest.h"
 
 // Class to help test functions in CPDF_LinkExtract class.
@@ -40,9 +37,13 @@
     EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << input;
   }
 
+  // A struct of {input_string, expected_extracted_email_address}.
+  struct IOPair {
+    const wchar_t* input;
+    const wchar_t* expected_output;
+  };
   // Check cases that can extract valid mail link.
-  // An array of {input_string, expected_extracted_email_address}.
-  const wchar_t* const kValidStrings[][2] = {
+  constexpr IOPair kValidStrings[] = {
       {L"peter@abc.d", L"peter@abc.d"},
       {L"red.teddy.b@abc.com", L"red.teddy.b@abc.com"},
       {L"abc_@gmail.com", L"abc_@gmail.com"},  // '_' is ok before '@'.
@@ -56,11 +57,10 @@
       {L"CAP.cap@Gmail.Com", L"CAP.cap@Gmail.Com"},  // Keep the original case.
   };
   for (const auto& it : kValidStrings) {
-    const wchar_t* const input = it[0];
-    WideString text_str(input);
+    WideString text_str(it.input);
     WideString expected_str(L"mailto:");
-    expected_str += it[1];
-    EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << input;
+    expected_str += it.expected_output;
+    EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << it.input;
     EXPECT_EQ(expected_str.c_str(), text_str);
   }
 }

diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 9af236a..1e5d1c3 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp

@@ -4,11 +4,6 @@
 
 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
 
-#if defined(UNSAFE_BUFFERS_BUILD)
-// TODO(crbug.com/pdfium/2153): resolve buffer safety issues.
-#pragma allow_unsafe_buffers
-#endif
-
 #include "core/fpdftext/cpdf_textpage.h"
 
 #include <math.h>
@@ -30,6 +25,7 @@
 #include "core/fpdftext/unicodenormalizationdata.h"
 #include "core/fxcrt/check.h"
 #include "core/fxcrt/check_op.h"
+#include "core/fxcrt/compiler_specific.h"
 #include "core/fxcrt/data_vector.h"
 #include "core/fxcrt/fx_bidi.h"
 #include "core/fxcrt/fx_extension.h"
@@ -84,23 +80,28 @@
 }
 
 DataVector<wchar_t> GetUnicodeNormalization(wchar_t wch) {
-  wch = wch & 0xFFFF;
-  wchar_t wFind = kUnicodeDataNormalization[wch];
-  if (!wFind)
-    return DataVector<wchar_t>(1, wch);
+  // TODO(crbug.com/pdfium/2155): resolve safety issues.
+  UNSAFE_BUFFERS({
+    wch = wch & 0xFFFF;
+    wchar_t wFind = kUnicodeDataNormalization[wch];
+    if (!wFind) {
+      return DataVector<wchar_t>(1, wch);
+    }
 
-  if (wFind >= 0x8000) {
-    return DataVector<wchar_t>(1,
-                               kUnicodeDataNormalizationMap1[wFind - 0x8000]);
-  }
+    if (wFind >= 0x8000) {
+      return DataVector<wchar_t>(1,
+                                 kUnicodeDataNormalizationMap1[wFind - 0x8000]);
+    }
 
-  wch = wFind & 0x0FFF;
-  wFind >>= 12;
-  const uint16_t* pMap = kUnicodeDataNormalizationMaps[wFind - 2] + wch;
-  if (wFind == 4)
-    wFind = static_cast<wchar_t>(*pMap++);
+    wch = wFind & 0x0FFF;
+    wFind >>= 12;
+    const uint16_t* pMap = kUnicodeDataNormalizationMaps[wFind - 2] + wch;
+    if (wFind == 4) {
+      wFind = static_cast<wchar_t>(*pMap++);
+    }
 
-  return DataVector<wchar_t>(pMap, pMap + wFind);
+    return DataVector<wchar_t>(pMap, pMap + wFind);
+  });
 }
 
 float MaskPercentFilled(const std::vector<bool>& mask,
@@ -946,7 +947,8 @@
   pdfium::span<wchar_t> temp_span = m_TempTextBuf.GetWideSpan();
   DCHECK(!temp_span.empty());
   if (iBufStartAppend < temp_span.size()) {
-    std::reverse(temp_span.begin() + iBufStartAppend, temp_span.end());
+    pdfium::span<wchar_t> reverse_span = temp_span.subspan(iBufStartAppend);
+    std::reverse(reverse_span.begin(), reverse_span.end());
   }
 }
 

diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
index 83a5c83..4bf9f63 100644
--- a/core/fpdftext/cpdf_textpagefind.cpp
+++ b/core/fpdftext/cpdf_textpagefind.cpp

@@ -4,11 +4,6 @@
 
 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
 
-#if defined(UNSAFE_BUFFERS_BUILD)
-// TODO(crbug.com/pdfium/2153): resolve buffer safety issues.
-#pragma allow_unsafe_buffers
-#endif
-
 #include "core/fpdftext/cpdf_textpagefind.h"
 
 #include <wchar.h>
@@ -100,23 +95,29 @@
                                            int iSubString) {
   DCHECK(lpszFullString);
 
-  while (iSubString--) {
-    lpszFullString = wcschr(lpszFullString, L' ');
-    if (!lpszFullString)
-      return std::nullopt;
+  // TODO(crbug.com/pdfium/2155): resolve safety issues.
+  UNSAFE_BUFFERS({
+    while (iSubString--) {
+      lpszFullString = wcschr(lpszFullString, L' ');
+      if (!lpszFullString) {
+        return std::nullopt;
+      }
 
-    lpszFullString++;
-    while (*lpszFullString == L' ')
       lpszFullString++;
-  }
+      while (*lpszFullString == L' ') {
+        lpszFullString++;
+      }
+    }
 
-  const wchar_t* lpchEnd = wcschr(lpszFullString, L' ');
-  int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString)
-                     : static_cast<int>(wcslen(lpszFullString));
-  if (nLen < 0)
-    return std::nullopt;
+    const wchar_t* lpchEnd = wcschr(lpszFullString, L' ');
+    int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString)
+                       : static_cast<int>(wcslen(lpszFullString));
+    if (nLen < 0) {
+      return std::nullopt;
+    }
 
-  return WideString(lpszFullString, static_cast<size_t>(nLen));
+    return WideString(lpszFullString, static_cast<size_t>(nLen));
+  });
 }
 
 std::vector<WideString> ExtractFindWhat(const WideString& findwhat) {
commit	547d96118caf46c07b3e520a971992b6909696bd	[log] [tgz]
author	Tom Sepez <tsepez@chromium.org>	Wed May 15 02:12:19 2024 +0000
committer	Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed May 15 02:12:19 2024 +0000
tree	9839da59572ac96eba5106a884194d92f89ec007
parent	7024bccc57833205f3f2605c5703125798460ac0 [diff]