Add StringToInt() Add utility functions to core/fxcrt/fx_string.h that takes a {Byte,Wide}StringView. Implement them using the same logic as FXSYS_StrToInt() inside fx_system.cpp. Add unit tests that are identical to the existing unit tests for FXSYS_atoi() and FXSYS_wtoi() to provide test coverage. Then add a few more suggested test cases for additional coverage. Replace a few FXSYS_atoi() and FXSYS_wtoi() calls to put StringToInt() to use. Change-Id: I74ee1e8d7130d300401f0bf17a7b06a327c60c3c Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/126010 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com>

commit: 2150b69003c17a7ab4d4541053f0b1eca4a6e85c [log] [tgz]
author: Lei Zhang <thestig@chromium.org> Thu Nov 14 21:20:23 2024 +0000
committer: Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Nov 14 21:20:23 2024 +0000
tree: 606b7acfb9c9ff4648481b3c2b5de0e2c90123a1
parent: f3ddbc93c29a12d5104da5bc35556d63a3b49646 [diff]
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index a2e2213..7fdd089 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp

@@ -35,6 +35,7 @@
 #include "core/fxcrt/data_vector.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_safe_types.h"
+#include "core/fxcrt/fx_string.h"
 #include "core/fxcrt/notreached.h"
 #include "core/fxcrt/scoped_set_insertion.h"
 #include "core/fxcrt/span.h"
@@ -566,8 +567,8 @@
         info.pos = offset.ValueOrDie();
 
         // TODO(art-snake): The info.gennum is uint16_t, but version may be
-        // greated than max<uint16_t>. Needs solve this issue.
-        const int32_t version = FXSYS_atoi(pEntry.subspan(11).data());
+        // greater than max<uint16_t>. Need to solve this issue.
+        const int32_t version = StringToInt(ByteStringView(pEntry.subspan(11)));
         info.gennum = version;
         info.type = ObjectType::kNormal;
       }

diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp
index 1443c29..212620d 100644
--- a/core/fxcrt/fx_string.cpp
+++ b/core/fxcrt/fx_string.cpp

@@ -9,6 +9,7 @@
 #include <stdint.h>
 
 #include <array>
+#include <limits>
 #include <string>
 #include <vector>
 
@@ -64,6 +65,67 @@
   }
 }
 
+template <typename IntType, typename StringViewType>
+IntType StringToIntImpl(StringViewType str) {
+  if (str.IsEmpty()) {
+    return 0;
+  }
+
+  // Process the sign.
+  bool neg = str.CharAt(0u) == '-';
+  if (neg || str.CharAt(0u) == '+') {
+    str = str.Substr(1u);
+  }
+
+  IntType num = 0;
+  while (!str.IsEmpty() && FXSYS_IsDecimalDigit(str.CharAt(0u))) {
+    IntType val = FXSYS_DecimalCharToInt(str.CharAt(0u));
+    if (num > (std::numeric_limits<IntType>::max() - val) / 10) {
+      if (neg && std::numeric_limits<IntType>::is_signed) {
+        // Return MIN when the represented number is signed type and is smaller
+        // than the min value.
+        return std::numeric_limits<IntType>::min();
+      }
+      // Return MAX when the represented number is signed type and is larger
+      // than the max value, or the number is unsigned type and out of range.
+      return std::numeric_limits<IntType>::max();
+    }
+    num = num * 10 + val;
+    str = str.Substr(1u);
+  }
+  // When it is a negative value, -num should be returned. Since num may be of
+  // unsigned type, use ~num + 1 to avoid the warning of applying unary minus
+  // operator to unsigned type.
+  return neg ? ~num + 1 : num;
+}
+
+// Intended to work for the cases where `T` is float or double.
+template <class T>
+T StringToFloatImpl(ByteStringView strc) {
+  // Skip leading whitespaces.
+  size_t start = 0;
+  size_t len = strc.GetLength();
+  while (start < len && strc[start] == ' ') {
+    ++start;
+  }
+
+  // Skip a leading '+' sign.
+  if (start < len && strc[start] == '+') {
+    ++start;
+  }
+
+  ByteStringView sub_strc = strc.Substr(start, len - start);
+
+  T value;
+  auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
+
+  // Return 0 for parsing errors. Some examples of errors are an empty string
+  // and a string that cannot be converted to T.
+  return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
+             ? value
+             : 0;
+}
+
 }  // namespace
 
 ByteString FX_UTF8Encode(WideStringView wsStr) {
@@ -97,38 +159,16 @@
   return result;
 }
 
-namespace {
-
-template <class T>
-T StringTo(ByteStringView strc) {
-  // Skip leading whitespaces.
-  size_t start = 0;
-  size_t len = strc.GetLength();
-  while (start < len && strc[start] == ' ') {
-    ++start;
-  }
-
-  // Skip a leading '+' sign.
-  if (start < len && strc[start] == '+') {
-    ++start;
-  }
-
-  ByteStringView sub_strc = strc.Substr(start, len - start);
-
-  T value;
-  auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
-
-  // Return 0 for parsing errors. Some examples of errors are an empty string
-  // and a string that cannot be converted to T.
-  return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
-             ? value
-             : 0;
+int32_t StringToInt(ByteStringView str) {
+  return StringToIntImpl<int32_t, ByteStringView>(str);
 }
 
-}  // namespace
+int32_t StringToInt(WideStringView wsStr) {
+  return StringToIntImpl<int32_t, WideStringView>(wsStr);
+}
 
 float StringToFloat(ByteStringView strc) {
-  return StringTo<float>(strc);
+  return StringToFloatImpl<float>(strc);
 }
 
 float StringToFloat(WideStringView wsStr) {
@@ -136,7 +176,7 @@
 }
 
 double StringToDouble(ByteStringView strc) {
-  return StringTo<double>(strc);
+  return StringToFloatImpl<double>(strc);
 }
 
 double StringToDouble(WideStringView wsStr) {

diff --git a/core/fxcrt/fx_string.h b/core/fxcrt/fx_string.h
index cd79418..88a7afe 100644
--- a/core/fxcrt/fx_string.h
+++ b/core/fxcrt/fx_string.h

@@ -24,6 +24,9 @@
 ByteString FX_UTF8Encode(WideStringView wsStr);
 std::u16string FX_UTF16Encode(WideStringView wsStr);
 
+int32_t StringToInt(ByteStringView str);
+int32_t StringToInt(WideStringView wsStr);
+
 float StringToFloat(ByteStringView str);
 float StringToFloat(WideStringView wsStr);
 

diff --git a/core/fxcrt/fx_string_unittest.cpp b/core/fxcrt/fx_string_unittest.cpp
index b9e74e3..a6037d4 100644
--- a/core/fxcrt/fx_string_unittest.cpp
+++ b/core/fxcrt/fx_string_unittest.cpp

@@ -52,6 +52,66 @@
 }
 #endif  // defined(WCHAR_T_IS_16_BIT)
 
+TEST(fxstring, ByteStringToInt) {
+  EXPECT_EQ(0, StringToInt(""));
+  EXPECT_EQ(0, StringToInt("0"));
+  EXPECT_EQ(-1, StringToInt("-1"));
+  EXPECT_EQ(2345, StringToInt("+2345"));
+  EXPECT_EQ(2345, StringToInt("2345"));
+  EXPECT_EQ(-2147483647, StringToInt("-2147483647"));
+  // The max value.
+  EXPECT_EQ(2147483647, StringToInt("2147483647"));
+  // The min value.
+  EXPECT_EQ(-2147483647 - 1, StringToInt("-2147483648"));
+
+  // Out of range values.
+  EXPECT_EQ(2147483647, StringToInt("2147483623423412348"));
+  EXPECT_EQ(2147483647, StringToInt("2147483648"));
+  EXPECT_EQ(-2147483647 - 1, StringToInt("-2147483652"));
+
+  // Other edge cases.
+  EXPECT_EQ(0, StringToInt("-"));
+  EXPECT_EQ(0, StringToInt("c--"));
+  EXPECT_EQ(0, StringToInt("--i"));
+  EXPECT_EQ(0, StringToInt("+"));
+  EXPECT_EQ(0, StringToInt("c++"));
+  EXPECT_EQ(0, StringToInt("++i"));
+  EXPECT_EQ(1, StringToInt("1beef"));
+  EXPECT_EQ(0, StringToInt("beef1"));
+  EXPECT_EQ(0, StringToInt("b33f"));
+  EXPECT_EQ(33, StringToInt("33f"));
+}
+
+TEST(fxstring, WideStringToInt) {
+  EXPECT_EQ(0, StringToInt(L""));
+  EXPECT_EQ(0, StringToInt(L"0"));
+  EXPECT_EQ(-1, StringToInt(L"-1"));
+  EXPECT_EQ(2345, StringToInt(L"2345"));
+  EXPECT_EQ(2345, StringToInt(L"+2345"));
+  EXPECT_EQ(-2147483647, StringToInt(L"-2147483647"));
+  // The max value.
+  EXPECT_EQ(2147483647, StringToInt(L"2147483647"));
+  // The min value.
+  EXPECT_EQ(-2147483647 - 1, StringToInt(L"-2147483648"));
+
+  // Out of range values.
+  EXPECT_EQ(2147483647, StringToInt(L"2147483623423412348"));
+  EXPECT_EQ(2147483647, StringToInt(L"2147483648"));
+  EXPECT_EQ(-2147483647 - 1, StringToInt(L"-2147483652"));
+
+  // Other edge cases.
+  EXPECT_EQ(0, StringToInt(L"-"));
+  EXPECT_EQ(0, StringToInt(L"c--"));
+  EXPECT_EQ(0, StringToInt(L"--i"));
+  EXPECT_EQ(0, StringToInt(L"+"));
+  EXPECT_EQ(0, StringToInt(L"c++"));
+  EXPECT_EQ(0, StringToInt(L"++i"));
+  EXPECT_EQ(1, StringToInt(L"1beef"));
+  EXPECT_EQ(0, StringToInt(L"beef1"));
+  EXPECT_EQ(0, StringToInt(L"b33f"));
+  EXPECT_EQ(33, StringToInt(L"33f"));
+}
+
 TEST(fxstring, ByteStringToFloat) {
   EXPECT_FLOAT_EQ(0.0f, StringToFloat(""));
   EXPECT_FLOAT_EQ(0.0f, StringToFloat("0"));

diff --git a/core/fxcrt/fx_system.h b/core/fxcrt/fx_system.h
index e7c9f4d..cc08353 100644
--- a/core/fxcrt/fx_system.h
+++ b/core/fxcrt/fx_system.h

@@ -83,6 +83,10 @@
 }  // extern "C"
 
 // C++-only section to allow future use of TerminatedPtr<>.
+// Also consider using / implementing equivalent functions that take string
+// views in core/fxcrt/fx_string.h instead.
+// TODO(crbug.com/42270715): Remove these if all callers switch to the string
+// view equivalent.
 int FXSYS_stricmp(const char* str1, const char* str2);
 int FXSYS_wcsicmp(const wchar_t* str1, const wchar_t* str2);
 int32_t FXSYS_atoi(const char* str);

diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index 708ceb0..a4bdde4 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp

@@ -829,7 +829,7 @@
   TrimBack(kWideTrimChars);
 }
 int WideString::GetInteger() const {
-  return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
+  return m_pData ? StringToInt(m_pData->m_String) : 0;
 }
 
 std::wostream& operator<<(std::wostream& os, const WideString& str) {
commit	2150b69003c17a7ab4d4541053f0b1eca4a6e85c	[log] [tgz]
author	Lei Zhang <thestig@chromium.org>	Thu Nov 14 21:20:23 2024 +0000
committer	Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Nov 14 21:20:23 2024 +0000
tree	606b7acfb9c9ff4648481b3c2b5de0e2c90123a1
parent	f3ddbc93c29a12d5104da5bc35556d63a3b49646 [diff]