Fix worst-case O(n^2) copy in pdfium::Split<>()
Use views rather than copying for {Byte,Wide}String.
The implementation for {Byte,Wide}StringView was already linear.
e.g. ByteStringSplitEfficiency goes from 885 ms down to 9 ms
locally on my machine.
Change-Id: Ief648c57085605f87b842b6cedd92c48b4fd5eaf
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/57970
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcrt/fx_string.h b/core/fxcrt/fx_string.h
index 8152fbc..5c048ca 100644
--- a/core/fxcrt/fx_string.h
+++ b/core/fxcrt/fx_string.h
@@ -30,15 +30,15 @@
template <typename StrType>
std::vector<StrType> Split(const StrType& that, typename StrType::CharType ch) {
std::vector<StrType> result;
- StrType remaining(that);
+ StringViewTemplate<typename StrType::CharType> remaining(that.span());
while (1) {
Optional<size_t> index = remaining.Find(ch);
if (!index.has_value())
break;
- result.push_back(remaining.Left(index.value()));
+ result.emplace_back(remaining.Left(index.value()));
remaining = remaining.Right(remaining.GetLength() - index.value() - 1);
}
- result.push_back(remaining);
+ result.emplace_back(remaining);
return result;
}
diff --git a/core/fxcrt/fx_string_unittest.cpp b/core/fxcrt/fx_string_unittest.cpp
index ad813ab..86b43ce 100644
--- a/core/fxcrt/fx_string_unittest.cpp
+++ b/core/fxcrt/fx_string_unittest.cpp
@@ -272,3 +272,43 @@
EXPECT_EQ(L"", result[1]);
EXPECT_EQ(L"a", result[2]);
}
+
+TEST(fxstring, ByteStringSplitEfficiency) {
+ std::vector<char> commas(50000, ',');
+ ByteString input(commas.data(), commas.size());
+ std::vector<ByteString> result;
+ result = fxcrt::Split(input, ',');
+ ASSERT_EQ(commas.size() + 1, result.size());
+ EXPECT_EQ("", result.front());
+ EXPECT_EQ("", result.back());
+}
+
+TEST(fxstring, ByteStringViewSplitEfficiency) {
+ std::vector<char> commas(50000, ',');
+ ByteStringView input(commas.data(), commas.size());
+ std::vector<ByteStringView> result;
+ result = fxcrt::Split(input, ',');
+ ASSERT_EQ(commas.size() + 1, result.size());
+ EXPECT_EQ("", result.front());
+ EXPECT_EQ("", result.back());
+}
+
+TEST(fxstring, WideStringSplitEfficiency) {
+ std::vector<wchar_t> commas(50000, L',');
+ WideString input(commas.data(), commas.size());
+ std::vector<WideString> result;
+ result = fxcrt::Split(input, ',');
+ ASSERT_EQ(commas.size() + 1, result.size());
+ EXPECT_EQ(L"", result.front());
+ EXPECT_EQ(L"", result.back());
+}
+
+TEST(fxstring, WideStringViewSplitEfficiency) {
+ std::vector<wchar_t> commas(50000, L',');
+ WideStringView input(commas.data(), commas.size());
+ std::vector<WideStringView> result;
+ result = fxcrt::Split(input, ',');
+ ASSERT_EQ(commas.size() + 1, result.size());
+ EXPECT_EQ(L"", result.front());
+ EXPECT_EQ(L"", result.back());
+}