Make CodePointView::Iterator() demonstrably safe.
Use WideStringView operations rather than C-style pointers.
Code is specific to Windows and its 16-bit wchar_t.
Change-Id: I7e016a92d29621b017709faa02ab6d0f6204c516
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/116830
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcrt/BUILD.gn b/core/fxcrt/BUILD.gn
index 90b9ec7..dd0a412 100644
--- a/core/fxcrt/BUILD.gn
+++ b/core/fxcrt/BUILD.gn
@@ -161,6 +161,7 @@
sources += [
"cfx_fileaccess_windows.cpp",
"cfx_fileaccess_windows.h",
+ "code_point_view.cpp",
"fx_folder_windows.cpp",
"win/scoped_select_object.h",
"win/win_util.cc",
diff --git a/core/fxcrt/code_point_view.cpp b/core/fxcrt/code_point_view.cpp
new file mode 100644
index 0000000..417e1d0
--- /dev/null
+++ b/core/fxcrt/code_point_view.cpp
@@ -0,0 +1,39 @@
+// Copyright 2024 The PDFium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxcrt/code_point_view.h"
+
+#include "core/fxcrt/utf16.h"
+
+#if !defined(WCHAR_T_IS_16_BIT)
+#error "Building on wrong platform".
+#endif
+
+namespace pdfium {
+
+CodePointView::CodePointView(WideStringView backing) : backing_(backing) {}
+
+CodePointView::~CodePointView() = default;
+
+CodePointView::Iterator::Iterator(WideStringView str_view)
+ : current_(str_view), code_point_(Decode()) {}
+
+CodePointView::Iterator::~Iterator() = default;
+
+char32_t CodePointView::Iterator::Decode() {
+ if (current_.IsEmpty()) {
+ return kSentinel;
+ }
+ char32_t code_point = current_.Front();
+ next_ = current_.Substr(1);
+ if (IsHighSurrogate(code_point)) {
+ if (!next_.IsEmpty() && IsLowSurrogate(next_.Front())) {
+ code_point = SurrogatePair(code_point, next_.Front()).ToCodePoint();
+ next_ = next_.Substr(1);
+ }
+ }
+ return code_point;
+}
+
+} // namespace pdfium
diff --git a/core/fxcrt/code_point_view.h b/core/fxcrt/code_point_view.h
index 7ced8fb..5372fd1 100644
--- a/core/fxcrt/code_point_view.h
+++ b/core/fxcrt/code_point_view.h
@@ -7,7 +7,6 @@
#include "build/build_config.h"
#include "core/fxcrt/string_view_template.h"
-#include "core/fxcrt/utf16.h"
#include "third_party/base/check_op.h"
namespace pdfium {
@@ -19,6 +18,8 @@
public:
class Iterator {
public:
+ ~Iterator();
+
bool operator==(const Iterator& other) const {
return current_ == other.current_;
}
@@ -28,8 +29,7 @@
}
Iterator& operator++() {
- DCHECK_LT(current_, end_);
- current_ += IsSupplementary(code_point_) ? 2 : 1;
+ current_ = next_;
code_point_ = Decode();
return *this;
}
@@ -44,43 +44,23 @@
static constexpr char32_t kSentinel = -1;
- Iterator(const wchar_t* begin, const wchar_t* end)
- : current_(begin), end_(end), code_point_(Decode()) {}
+ explicit Iterator(WideStringView str_view);
- char32_t Decode() {
- if (current_ >= end_) {
- return kSentinel;
- }
+ char32_t Decode();
- char32_t code_point = *current_;
- if (IsHighSurrogate(code_point)) {
- const wchar_t* next = current_ + 1;
- if (next < end_ && IsLowSurrogate(*next)) {
- code_point = SurrogatePair(code_point, *next).ToCodePoint();
- }
- }
-
- return code_point;
- }
-
- const wchar_t* current_;
- const wchar_t* end_;
+ WideStringView current_;
+ WideStringView next_;
char32_t code_point_;
};
- explicit CodePointView(WideStringView backing)
- : begin_(backing.begin()), end_(backing.end()) {
- DCHECK_LE(begin_, end_);
- }
+ explicit CodePointView(WideStringView backing);
+ ~CodePointView();
- Iterator begin() const { return Iterator(begin_, end_); }
-
- Iterator end() const { return Iterator(end_, end_); }
+ Iterator begin() const { return Iterator(backing_); }
+ Iterator end() const { return Iterator(WideStringView()); }
private:
- // Note that a `WideStringView` member would make the constructor too complex.
- const wchar_t* begin_;
- const wchar_t* end_;
+ WideStringView backing_;
};
#else
using CodePointView = WideStringView;