Convert selection to use count instead of end index

This CL changes the Text Edit Engine code to use a count instead of an
end index for the selection range. Using count lets us differentiate a
selection at the beginning of 1 character and an empty selection.

A few new tests were added to test unicode word break behaviour, some are not
working yet and are commented out.

Change-Id: Icce8f5003102ef0a850151ccdf16d3c2226d94bf
Reviewed-on: https://pdfium-review.googlesource.com/13491
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/xfa/fde/cfde_texteditengine.cpp b/xfa/fde/cfde_texteditengine.cpp
index 8075ea7..0c64e39 100644
--- a/xfa/fde/cfde_texteditengine.cpp
+++ b/xfa/fde/cfde_texteditengine.cpp
@@ -588,31 +588,29 @@
 
   has_selection_ = true;
   selection_.start_idx = 0;
-  selection_.end_idx = text_length_ - 1;
+  selection_.count = text_length_;
 }
 
 void CFDE_TextEditEngine::ClearSelection() {
   has_selection_ = false;
   selection_.start_idx = 0;
-  selection_.end_idx = 0;
+  selection_.count = 0;
 }
 
-void CFDE_TextEditEngine::SetSelection(size_t start_idx, size_t end_idx) {
-  // If the points are the same, then we pretend the selection doesn't exist
-  // anymore.
-  if (start_idx == end_idx) {
+void CFDE_TextEditEngine::SetSelection(size_t start_idx, size_t count) {
+  if (count == 0) {
     ClearSelection();
     return;
   }
 
   if (start_idx > text_length_)
     return;
-  if (end_idx > text_length_)
-    end_idx = text_length_ - 1;
+  if (start_idx + count > text_length_)
+    count = text_length_ - start_idx;
 
   has_selection_ = true;
   selection_.start_idx = start_idx;
-  selection_.end_idx = end_idx;
+  selection_.count = count;
 }
 
 WideString CFDE_TextEditEngine::GetSelectedText() const {
@@ -621,22 +619,30 @@
 
   WideString text;
   if (selection_.start_idx < gap_position_) {
-    if (selection_.end_idx < gap_position_) {
+    // Fully on left of gap.
+    if (selection_.start_idx + selection_.count < gap_position_) {
       text += WideStringView(content_.data() + selection_.start_idx,
-                             selection_.end_idx - selection_.start_idx + 1);
+                             selection_.count);
       return text;
     }
 
+    // Pre-gap text
     text += WideStringView(content_.data() + selection_.start_idx,
                            gap_position_ - selection_.start_idx);
-    text += WideStringView(
-        content_.data() + gap_position_ + gap_size_,
-        selection_.end_idx - (gap_position_ - selection_.start_idx) + 1);
+
+    if (selection_.count - (gap_position_ - selection_.start_idx) > 0) {
+      // Post-gap text
+      text += WideStringView(
+          content_.data() + gap_position_ + gap_size_,
+          selection_.count - (gap_position_ - selection_.start_idx));
+    }
+
     return text;
   }
 
+  // Fully right of gap
   text += WideStringView(content_.data() + gap_size_ + selection_.start_idx,
-                         selection_.end_idx - selection_.start_idx + 1);
+                         selection_.count);
   return text;
 }
 
@@ -645,8 +651,7 @@
   if (!has_selection_)
     return L"";
 
-  return Delete(selection_.start_idx,
-                selection_.end_idx - selection_.start_idx + 1, add_operation);
+  return Delete(selection_.start_idx, selection_.count, add_operation);
 }
 
 WideString CFDE_TextEditEngine::Delete(size_t start_idx,
@@ -965,6 +970,9 @@
 
 std::pair<size_t, size_t> CFDE_TextEditEngine::BoundsForWordAt(
     size_t idx) const {
+  if (idx > text_length_)
+    return {0, 0};
+
   CFDE_TextEditEngine::Iterator iter(this);
   iter.SetAt(idx);
   iter.FindNextBreakPos(true);
@@ -972,7 +980,7 @@
 
   iter.FindNextBreakPos(false);
   size_t end_idx = iter.GetAt();
-  return {start_idx, end_idx};
+  return {start_idx, end_idx - start_idx + 1};
 }
 
 CFDE_TextEditEngine::Iterator::Iterator(const CFDE_TextEditEngine* engine)
diff --git a/xfa/fde/cfde_texteditengine.h b/xfa/fde/cfde_texteditengine.h
index 750b62a..40f12e5 100644
--- a/xfa/fde/cfde_texteditengine.h
+++ b/xfa/fde/cfde_texteditengine.h
@@ -140,12 +140,12 @@
   size_t GetIndexAtEndOfLine(size_t pos) { return 0; }
 
   void SelectAll();
-  void SetSelection(size_t start_idx, size_t end_idx);
+  void SetSelection(size_t start_idx, size_t count);
   void ClearSelection();
   bool HasSelection() const { return has_selection_; }
-  // Returns <start, end> indices of the selection.
+  // Returns <start_idx, count> of the selection.
   std::pair<size_t, size_t> GetSelection() const {
-    return {selection_.start_idx, selection_.end_idx};
+    return {selection_.start_idx, selection_.count};
   }
   WideString GetSelectedText() const;
   WideString DeleteSelectedText(
@@ -159,7 +159,7 @@
   size_t GetWidthOfChar(size_t idx);
   // Non-const so we can force a Layout() if needed.
   size_t GetIndexForPoint(const CFX_PointF& point);
-  // <start_idx, end_idx>
+  // <start_idx, count>
   std::pair<size_t, size_t> BoundsForWordAt(size_t idx) const;
 
   // Returns <bidi level, character rect>
@@ -197,7 +197,7 @@
 
   struct Selection {
     size_t start_idx;
-    size_t end_idx;
+    size_t count;
   };
 
   CFX_RectF contents_bounding_box_;
diff --git a/xfa/fde/cfde_texteditengine_unittest.cpp b/xfa/fde/cfde_texteditengine_unittest.cpp
index 48ed647..51940f2 100644
--- a/xfa/fde/cfde_texteditengine_unittest.cpp
+++ b/xfa/fde/cfde_texteditengine_unittest.cpp
@@ -244,10 +244,10 @@
 
   engine()->SelectAll();
   size_t start_idx;
-  size_t end_idx;
-  std::tie(start_idx, end_idx) = engine()->GetSelection();
+  size_t count;
+  std::tie(start_idx, count) = engine()->GetSelection();
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(10U, end_idx);
+  EXPECT_EQ(11U, count);
 
   // Selection before gap.
   EXPECT_STREQ(L"Hello World", engine()->GetSelectedText().c_str());
@@ -272,7 +272,7 @@
   EXPECT_STREQ(L"", engine()->GetText().c_str());
 
   engine()->Insert(0, L"Hello World");
-  engine()->SetSelection(5, 9);
+  engine()->SetSelection(5, 5);
   EXPECT_STREQ(L" Worl", engine()->DeleteSelectedText().c_str());
   EXPECT_FALSE(engine()->HasSelection());
   EXPECT_STREQ(L"Hellod", engine()->GetText().c_str());
@@ -419,112 +419,155 @@
 
 TEST_F(CFDE_TextEditEngineTest, BoundsForWordAt) {
   size_t start_idx;
-  size_t end_idx;
+  size_t count;
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(100);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(100);
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(0U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(0U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"Hello");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(0);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(0);
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(4U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(5U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"Hello", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"Hello World");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(100);
-  EXPECT_EQ(11U, start_idx);
-  EXPECT_EQ(11U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(100);
+  EXPECT_EQ(0U, start_idx);
+  EXPECT_EQ(0U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(0);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(0);
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(4U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(5U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"Hello", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(1);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(1);
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(4U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(5U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"Hello", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(4);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(4);
   EXPECT_EQ(0U, start_idx);
-  EXPECT_EQ(4U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(5U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"Hello", engine()->GetSelectedText().c_str());
 
   // Select the space
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(5);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(5);
   EXPECT_EQ(5U, start_idx);
-  EXPECT_EQ(5U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
-  EXPECT_STREQ(L"", engine()->GetSelectedText().c_str());
+  EXPECT_EQ(1U, count);
+  engine()->SetSelection(start_idx, count);
+  EXPECT_STREQ(L" ", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(6);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(6);
   EXPECT_EQ(6U, start_idx);
-  EXPECT_EQ(10U, end_idx);
-  engine()->SetSelection(start_idx, end_idx);
+  EXPECT_EQ(5U, count);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"World", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"123 456 789");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(5);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(5);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"456", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"123def789");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(5);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(5);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"123def789", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"abc456ghi");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(5);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(5);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"abc456ghi", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"hello, world");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(0);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(0);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"hello", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"hello, world");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(5);
-  engine()->SetSelection(start_idx, end_idx);
-  EXPECT_STREQ(L"", engine()->GetSelectedText().c_str());
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(5);
+  engine()->SetSelection(start_idx, count);
+  EXPECT_STREQ(L",", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"np-complete");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(6);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(6);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"complete", engine()->GetSelectedText().c_str());
 
   engine()->Clear();
   engine()->Insert(0, L"(123) 456-7890");
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(0);
-  engine()->SetSelection(start_idx, end_idx);
-  EXPECT_STREQ(L"", engine()->GetSelectedText().c_str());
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(0);
+  engine()->SetSelection(start_idx, count);
+  EXPECT_STREQ(L"(", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(1);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(1);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"123", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(7);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(7);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"456", engine()->GetSelectedText().c_str());
 
-  std::tie(start_idx, end_idx) = engine()->BoundsForWordAt(11);
-  engine()->SetSelection(start_idx, end_idx);
+  std::tie(start_idx, count) = engine()->BoundsForWordAt(11);
+  engine()->SetSelection(start_idx, count);
   EXPECT_STREQ(L"7890", engine()->GetSelectedText().c_str());
+
+  // Tests from:
+  // http://unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.html#samples
+  struct bounds {
+    size_t start;
+    size_t end;
+  };
+  struct {
+    const wchar_t* str;
+    std::vector<const wchar_t*> results;
+  } tests[] = {
+      // {L"\r\na\n\u0308", {L"\r\n", L"a", L"\n", L"\u0308"}},
+      // {L"a\u0308", {L"a\u0308"}},
+      // {L" \u200d\u0646", {L" \u200d", L"\u0646"}},
+      // {L"\u0646\u200d ", {L"\u0646\u200d", L" "}},
+      {L"AAA", {L"AAA"}},
+      {L"A:A", {L"A:A"}},
+      {L"A::A", {L"A", L":", L":", L"A"}},
+      // {L"\u05d0'", {L"\u05d0'"}},
+      // {L"\u05d0\"\u05d0", {L"\u05d0\"\u05d0"}},
+      {L"A00A", {L"A00A"}},
+      {L"0,0", {L"0,0"}},
+      {L"0,,0", {L"0", L",", L",", L"0"}},
+      {L"\u3031\u3031", {L"\u3031\u3031"}},
+      {L"A_0_\u3031_", {L"A_0_\u3031_"}},
+      {L"A__A", {L"A__A"}},
+      // {L"\u200d\u2640", {L"\u200d\u2640"}},
+      // {L"a\u0308\u200b\u0308b", {L"a\u0308\u200b\u0308b"}},
+  };
+
+  for (auto t : tests) {
+    engine()->Clear();
+    engine()->Insert(0, t.str);
+
+    size_t idx = 0;
+    for (const auto* res : t.results) {
+      std::tie(start_idx, count) = engine()->BoundsForWordAt(idx);
+      engine()->SetSelection(start_idx, count);
+      EXPECT_STREQ(res, engine()->GetSelectedText().c_str())
+          << "Input: '" << t.str << "'";
+      idx += count;
+    }
+  }
 }
diff --git a/xfa/fwl/cfwl_datetimepicker.h b/xfa/fwl/cfwl_datetimepicker.h
index 276fea4..97c5cd1 100644
--- a/xfa/fwl/cfwl_datetimepicker.h
+++ b/xfa/fwl/cfwl_datetimepicker.h
@@ -53,7 +53,7 @@
   WideString GetEditText() const;
 
   bool HasSelection() const { return m_pEdit->HasSelection(); }
-  // Returns <start, end> indices of the selection.
+  // Returns <start, count> of the selection.
   std::pair<size_t, size_t> GetSelection() const {
     return m_pEdit->GetSelection();
   }
diff --git a/xfa/fwl/cfwl_edit.cpp b/xfa/fwl/cfwl_edit.cpp
index 1bec150..426d46e 100644
--- a/xfa/fwl/cfwl_edit.cpp
+++ b/xfa/fwl/cfwl_edit.cpp
@@ -481,10 +481,10 @@
   bool bShowSel = !!(m_pProperties->m_dwStates & FWL_WGTSTATE_Focused);
   if (bShowSel && m_EdtEngine.HasSelection()) {
     size_t sel_start;
-    size_t sel_end;
-    std::tie(sel_start, sel_end) = m_EdtEngine.GetSelection();
-    std::vector<CFX_RectF> rects = m_EdtEngine.GetCharacterRectsInRange(
-        sel_start, sel_end - sel_start + 1);
+    size_t count;
+    std::tie(sel_start, count) = m_EdtEngine.GetSelection();
+    std::vector<CFX_RectF> rects =
+        m_EdtEngine.GetCharacterRectsInRange(sel_start, count);
 
     CXFA_Path path;
     for (auto& rect : rects) {
@@ -1237,11 +1237,11 @@
 void CFWL_Edit::OnButtonDoubleClick(CFWL_MessageMouse* pMsg) {
   size_t click_idx = m_EdtEngine.GetIndexForPoint(DeviceToEngine(pMsg->m_pos));
   size_t start_idx;
-  size_t end_idx;
-  std::tie(start_idx, end_idx) = m_EdtEngine.BoundsForWordAt(click_idx);
+  size_t count;
+  std::tie(start_idx, count) = m_EdtEngine.BoundsForWordAt(click_idx);
 
-  m_EdtEngine.SetSelection(start_idx, end_idx);
-  m_CursorPosition = end_idx;
+  m_EdtEngine.SetSelection(start_idx, count);
+  m_CursorPosition = start_idx + count;
   RepaintRect(m_rtEngine);
 }
 
@@ -1260,10 +1260,13 @@
     SetCursorPosition(length);
 
   size_t sel_start;
-  size_t sel_end;
-  std::tie(sel_start, sel_end) = m_EdtEngine.GetSelection();
-  m_EdtEngine.SetSelection(std::min(sel_start, m_CursorPosition),
-                           std::max(sel_end, m_CursorPosition));
+  size_t count;
+  std::tie(sel_start, count) = m_EdtEngine.GetSelection();
+  size_t original_end = sel_start + count;
+  sel_start = std::min(sel_start, m_CursorPosition);
+  m_EdtEngine.SetSelection(
+      std::min(sel_start, m_CursorPosition),
+      std::max(original_end, m_CursorPosition) - sel_start);
 }
 
 void CFWL_Edit::OnKeyDown(CFWL_MessageKey* pMsg) {
@@ -1273,8 +1276,8 @@
   size_t sel_start = m_CursorPosition;
   if (m_EdtEngine.HasSelection()) {
     size_t start_idx;
-    size_t end_idx;
-    std::tie(start_idx, end_idx) = m_EdtEngine.GetSelection();
+    size_t count;
+    std::tie(start_idx, count) = m_EdtEngine.GetSelection();
     sel_start = start_idx;
   }
 
diff --git a/xfa/fwl/cfwl_edit.h b/xfa/fwl/cfwl_edit.h
index e85baaa..cfebb25 100644
--- a/xfa/fwl/cfwl_edit.h
+++ b/xfa/fwl/cfwl_edit.h
@@ -74,7 +74,7 @@
   void SelectAll();
   void ClearSelection();
   bool HasSelection() const;
-  // Returns <start, end> indices of the selection.
+  // Returns <start, count> of the selection.
   std::pair<size_t, size_t> GetSelection() const;
 
   int32_t GetLimit() const;
diff --git a/xfa/fxfa/cxfa_fftextedit.cpp b/xfa/fxfa/cxfa_fftextedit.cpp
index e27971d..82b6f63 100644
--- a/xfa/fxfa/cxfa_fftextedit.cpp
+++ b/xfa/fxfa/cxfa_fftextedit.cpp
@@ -299,8 +299,9 @@
     CFWL_DateTimePicker* pDateTime = (CFWL_DateTimePicker*)pEdit;
     eParam.m_wsNewText = pDateTime->GetEditText();
     if (pDateTime->HasSelection()) {
-      std::tie(eParam.m_iSelStart, eParam.m_iSelEnd) =
-          pDateTime->GetSelection();
+      size_t count;
+      std::tie(eParam.m_iSelStart, count) = pDateTime->GetSelection();
+      eParam.m_iSelEnd = eParam.m_iSelStart + count;
     }
   } else {
     eParam.m_wsNewText = pEdit->GetText();