Add StringToInt()
Add utility functions to core/fxcrt/fx_string.h that takes a
{Byte,Wide}StringView. Implement them using the same logic as
FXSYS_StrToInt() inside fx_system.cpp.
Add unit tests that are identical to the existing unit tests for
FXSYS_atoi() and FXSYS_wtoi() to provide test coverage. Then add a few
more suggested test cases for additional coverage.
Replace a few FXSYS_atoi() and FXSYS_wtoi() calls to put StringToInt()
to use.
Change-Id: I74ee1e8d7130d300401f0bf17a7b06a327c60c3c
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/126010
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index a2e2213..7fdd089 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -35,6 +35,7 @@
#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
+#include "core/fxcrt/fx_string.h"
#include "core/fxcrt/notreached.h"
#include "core/fxcrt/scoped_set_insertion.h"
#include "core/fxcrt/span.h"
@@ -566,8 +567,8 @@
info.pos = offset.ValueOrDie();
// TODO(art-snake): The info.gennum is uint16_t, but version may be
- // greated than max<uint16_t>. Needs solve this issue.
- const int32_t version = FXSYS_atoi(pEntry.subspan(11).data());
+ // greater than max<uint16_t>. Need to solve this issue.
+ const int32_t version = StringToInt(ByteStringView(pEntry.subspan(11)));
info.gennum = version;
info.type = ObjectType::kNormal;
}
diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp
index 1443c29..212620d 100644
--- a/core/fxcrt/fx_string.cpp
+++ b/core/fxcrt/fx_string.cpp
@@ -9,6 +9,7 @@
#include <stdint.h>
#include <array>
+#include <limits>
#include <string>
#include <vector>
@@ -64,6 +65,67 @@
}
}
+template <typename IntType, typename StringViewType>
+IntType StringToIntImpl(StringViewType str) {
+ if (str.IsEmpty()) {
+ return 0;
+ }
+
+ // Process the sign.
+ bool neg = str.CharAt(0u) == '-';
+ if (neg || str.CharAt(0u) == '+') {
+ str = str.Substr(1u);
+ }
+
+ IntType num = 0;
+ while (!str.IsEmpty() && FXSYS_IsDecimalDigit(str.CharAt(0u))) {
+ IntType val = FXSYS_DecimalCharToInt(str.CharAt(0u));
+ if (num > (std::numeric_limits<IntType>::max() - val) / 10) {
+ if (neg && std::numeric_limits<IntType>::is_signed) {
+ // Return MIN when the represented number is signed type and is smaller
+ // than the min value.
+ return std::numeric_limits<IntType>::min();
+ }
+ // Return MAX when the represented number is signed type and is larger
+ // than the max value, or the number is unsigned type and out of range.
+ return std::numeric_limits<IntType>::max();
+ }
+ num = num * 10 + val;
+ str = str.Substr(1u);
+ }
+ // When it is a negative value, -num should be returned. Since num may be of
+ // unsigned type, use ~num + 1 to avoid the warning of applying unary minus
+ // operator to unsigned type.
+ return neg ? ~num + 1 : num;
+}
+
+// Intended to work for the cases where `T` is float or double.
+template <class T>
+T StringToFloatImpl(ByteStringView strc) {
+ // Skip leading whitespaces.
+ size_t start = 0;
+ size_t len = strc.GetLength();
+ while (start < len && strc[start] == ' ') {
+ ++start;
+ }
+
+ // Skip a leading '+' sign.
+ if (start < len && strc[start] == '+') {
+ ++start;
+ }
+
+ ByteStringView sub_strc = strc.Substr(start, len - start);
+
+ T value;
+ auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
+
+ // Return 0 for parsing errors. Some examples of errors are an empty string
+ // and a string that cannot be converted to T.
+ return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
+ ? value
+ : 0;
+}
+
} // namespace
ByteString FX_UTF8Encode(WideStringView wsStr) {
@@ -97,38 +159,16 @@
return result;
}
-namespace {
-
-template <class T>
-T StringTo(ByteStringView strc) {
- // Skip leading whitespaces.
- size_t start = 0;
- size_t len = strc.GetLength();
- while (start < len && strc[start] == ' ') {
- ++start;
- }
-
- // Skip a leading '+' sign.
- if (start < len && strc[start] == '+') {
- ++start;
- }
-
- ByteStringView sub_strc = strc.Substr(start, len - start);
-
- T value;
- auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
-
- // Return 0 for parsing errors. Some examples of errors are an empty string
- // and a string that cannot be converted to T.
- return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
- ? value
- : 0;
+int32_t StringToInt(ByteStringView str) {
+ return StringToIntImpl<int32_t, ByteStringView>(str);
}
-} // namespace
+int32_t StringToInt(WideStringView wsStr) {
+ return StringToIntImpl<int32_t, WideStringView>(wsStr);
+}
float StringToFloat(ByteStringView strc) {
- return StringTo<float>(strc);
+ return StringToFloatImpl<float>(strc);
}
float StringToFloat(WideStringView wsStr) {
@@ -136,7 +176,7 @@
}
double StringToDouble(ByteStringView strc) {
- return StringTo<double>(strc);
+ return StringToFloatImpl<double>(strc);
}
double StringToDouble(WideStringView wsStr) {
diff --git a/core/fxcrt/fx_string.h b/core/fxcrt/fx_string.h
index cd79418..88a7afe 100644
--- a/core/fxcrt/fx_string.h
+++ b/core/fxcrt/fx_string.h
@@ -24,6 +24,9 @@
ByteString FX_UTF8Encode(WideStringView wsStr);
std::u16string FX_UTF16Encode(WideStringView wsStr);
+int32_t StringToInt(ByteStringView str);
+int32_t StringToInt(WideStringView wsStr);
+
float StringToFloat(ByteStringView str);
float StringToFloat(WideStringView wsStr);
diff --git a/core/fxcrt/fx_string_unittest.cpp b/core/fxcrt/fx_string_unittest.cpp
index b9e74e3..a6037d4 100644
--- a/core/fxcrt/fx_string_unittest.cpp
+++ b/core/fxcrt/fx_string_unittest.cpp
@@ -52,6 +52,66 @@
}
#endif // defined(WCHAR_T_IS_16_BIT)
+TEST(fxstring, ByteStringToInt) {
+ EXPECT_EQ(0, StringToInt(""));
+ EXPECT_EQ(0, StringToInt("0"));
+ EXPECT_EQ(-1, StringToInt("-1"));
+ EXPECT_EQ(2345, StringToInt("+2345"));
+ EXPECT_EQ(2345, StringToInt("2345"));
+ EXPECT_EQ(-2147483647, StringToInt("-2147483647"));
+ // The max value.
+ EXPECT_EQ(2147483647, StringToInt("2147483647"));
+ // The min value.
+ EXPECT_EQ(-2147483647 - 1, StringToInt("-2147483648"));
+
+ // Out of range values.
+ EXPECT_EQ(2147483647, StringToInt("2147483623423412348"));
+ EXPECT_EQ(2147483647, StringToInt("2147483648"));
+ EXPECT_EQ(-2147483647 - 1, StringToInt("-2147483652"));
+
+ // Other edge cases.
+ EXPECT_EQ(0, StringToInt("-"));
+ EXPECT_EQ(0, StringToInt("c--"));
+ EXPECT_EQ(0, StringToInt("--i"));
+ EXPECT_EQ(0, StringToInt("+"));
+ EXPECT_EQ(0, StringToInt("c++"));
+ EXPECT_EQ(0, StringToInt("++i"));
+ EXPECT_EQ(1, StringToInt("1beef"));
+ EXPECT_EQ(0, StringToInt("beef1"));
+ EXPECT_EQ(0, StringToInt("b33f"));
+ EXPECT_EQ(33, StringToInt("33f"));
+}
+
+TEST(fxstring, WideStringToInt) {
+ EXPECT_EQ(0, StringToInt(L""));
+ EXPECT_EQ(0, StringToInt(L"0"));
+ EXPECT_EQ(-1, StringToInt(L"-1"));
+ EXPECT_EQ(2345, StringToInt(L"2345"));
+ EXPECT_EQ(2345, StringToInt(L"+2345"));
+ EXPECT_EQ(-2147483647, StringToInt(L"-2147483647"));
+ // The max value.
+ EXPECT_EQ(2147483647, StringToInt(L"2147483647"));
+ // The min value.
+ EXPECT_EQ(-2147483647 - 1, StringToInt(L"-2147483648"));
+
+ // Out of range values.
+ EXPECT_EQ(2147483647, StringToInt(L"2147483623423412348"));
+ EXPECT_EQ(2147483647, StringToInt(L"2147483648"));
+ EXPECT_EQ(-2147483647 - 1, StringToInt(L"-2147483652"));
+
+ // Other edge cases.
+ EXPECT_EQ(0, StringToInt(L"-"));
+ EXPECT_EQ(0, StringToInt(L"c--"));
+ EXPECT_EQ(0, StringToInt(L"--i"));
+ EXPECT_EQ(0, StringToInt(L"+"));
+ EXPECT_EQ(0, StringToInt(L"c++"));
+ EXPECT_EQ(0, StringToInt(L"++i"));
+ EXPECT_EQ(1, StringToInt(L"1beef"));
+ EXPECT_EQ(0, StringToInt(L"beef1"));
+ EXPECT_EQ(0, StringToInt(L"b33f"));
+ EXPECT_EQ(33, StringToInt(L"33f"));
+}
+
TEST(fxstring, ByteStringToFloat) {
EXPECT_FLOAT_EQ(0.0f, StringToFloat(""));
EXPECT_FLOAT_EQ(0.0f, StringToFloat("0"));
diff --git a/core/fxcrt/fx_system.h b/core/fxcrt/fx_system.h
index e7c9f4d..cc08353 100644
--- a/core/fxcrt/fx_system.h
+++ b/core/fxcrt/fx_system.h
@@ -83,6 +83,10 @@
} // extern "C"
// C++-only section to allow future use of TerminatedPtr<>.
+// Also consider using / implementing equivalent functions that take string
+// views in core/fxcrt/fx_string.h instead.
+// TODO(crbug.com/42270715): Remove these if all callers switch to the string
+// view equivalent.
int FXSYS_stricmp(const char* str1, const char* str2);
int FXSYS_wcsicmp(const wchar_t* str1, const wchar_t* str2);
int32_t FXSYS_atoi(const char* str);
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index 708ceb0..a4bdde4 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -829,7 +829,7 @@
TrimBack(kWideTrimChars);
}
int WideString::GetInteger() const {
- return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
+ return m_pData ? StringToInt(m_pData->m_String) : 0;
}
std::wostream& operator<<(std::wostream& os, const WideString& str) {