Bounds check in GetWordBreakProperty()

wchar_t might be wider than 16 bits on non-windows platforms.

- Make tables static and provide accessor functions.

Bug: chromium:1043510
Change-Id: Ib573971692e8c35d299ed0fb376039878827ef7d
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65411
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/testing/resources/javascript/xfa_specific/bug_1043510.evt b/testing/resources/javascript/xfa_specific/bug_1043510.evt
new file mode 100644
index 0000000..891a49d
--- /dev/null
+++ b/testing/resources/javascript/xfa_specific/bug_1043510.evt
@@ -0,0 +1 @@
+mousedoubleclick,left,0,0
\ No newline at end of file
diff --git a/testing/resources/javascript/xfa_specific/bug_1043510.pdf b/testing/resources/javascript/xfa_specific/bug_1043510.pdf
new file mode 100644
index 0000000..24a3cf1
--- /dev/null
+++ b/testing/resources/javascript/xfa_specific/bug_1043510.pdf
Binary files differ
diff --git a/xfa/fde/cfde_texteditengine.cpp b/xfa/fde/cfde_texteditengine.cpp
index 66c24d1..3b9c1ad 100644
--- a/xfa/fde/cfde_texteditengine.cpp
+++ b/xfa/fde/cfde_texteditengine.cpp
@@ -98,20 +98,6 @@
   DeleteOperation delete_op_;
 };
 
-bool CheckStateChangeForWordBreak(WordBreakProperty from,
-                                  WordBreakProperty to) {
-  ASSERT(static_cast<int>(from) < 13);
-
-  return !!(gs_FX_WordBreak_Table[static_cast<int>(from)] &
-            static_cast<uint16_t>(1 << static_cast<int>(to)));
-}
-
-WordBreakProperty GetWordBreakProperty(wchar_t wcCodePoint) {
-  uint8_t dwProperty = gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];
-  return static_cast<WordBreakProperty>((wcCodePoint & 1) ? (dwProperty & 0x0F)
-                                                          : (dwProperty >> 4));
-}
-
 int GetBreakFlagsFor(WordBreakProperty current, WordBreakProperty next) {
   if (current == WordBreakProperty::kMidLetter) {
     if (next == WordBreakProperty::kALetter)
@@ -1275,17 +1261,17 @@
   WordBreakProperty ePreType = WordBreakProperty::kNone;
   if (!IsEOF(!bPrev)) {
     Next(!bPrev);
-    ePreType = GetWordBreakProperty(GetChar());
+    ePreType = FX_GetWordBreakProperty(GetChar());
     Next(bPrev);
   }
 
-  WordBreakProperty eCurType = GetWordBreakProperty(GetChar());
+  WordBreakProperty eCurType = FX_GetWordBreakProperty(GetChar());
   bool bFirst = true;
   while (!IsEOF(bPrev)) {
     Next(bPrev);
 
-    WordBreakProperty eNextType = GetWordBreakProperty(GetChar());
-    bool wBreak = CheckStateChangeForWordBreak(eCurType, eNextType);
+    WordBreakProperty eNextType = FX_GetWordBreakProperty(GetChar());
+    bool wBreak = FX_CheckStateChangeForWordBreak(eCurType, eNextType);
     if (wBreak) {
       if (IsEOF(bPrev)) {
         Next(!bPrev);
@@ -1310,7 +1296,7 @@
         }
 
         Next(bPrev);
-        eNextType = GetWordBreakProperty(GetChar());
+        eNextType = FX_GetWordBreakProperty(GetChar());
         if (BreakFlagsChanged(nFlags, eNextType)) {
           Next(!bPrev);
           Next(!bPrev);
diff --git a/xfa/fde/cfde_wordbreak_data.cpp b/xfa/fde/cfde_wordbreak_data.cpp
index 3c4864b..9d85efd 100644
--- a/xfa/fde/cfde_wordbreak_data.cpp
+++ b/xfa/fde/cfde_wordbreak_data.cpp
@@ -6,6 +6,9 @@
 
 #include "xfa/fde/cfde_wordbreak_data.h"
 
+#include "core/fxcrt/fx_memory.h"
+#include "core/fxcrt/fx_system.h"
+
 namespace {
 
 enum WordBreakValue : uint16_t {
@@ -64,9 +67,7 @@
                   (1 << static_cast<int>(WordBreakProperty::kExtendNumLet)),
               "WordBreakValue must match");
 
-}  // namespace
-
-const uint16_t gs_FX_WordBreak_Table[] = {
+const uint16_t kWordBreakTable[] = {
     // WordBreakProperty::kNone
     0xFFFF,
 
@@ -114,7 +115,7 @@
                             kWordBreakValueExtendNumLet)),
 };
 
-const uint8_t gs_FX_WordBreak_CodePointProperties[(0xFFFF - 1) / 2 + 1] = {
+const uint8_t kCodePointProperties[(0xFFFF - 1) / 2 + 1] = {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x90, 0xA0,
     0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0x89, 0x00, 0x00, 0x07, 0x77, 0x77, 0x77,
@@ -2847,3 +2848,22 @@
     0x00, 0x77, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x05, 0x55, 0x00, 0x00,
 };
+
+}  // namespace
+
+bool FX_CheckStateChangeForWordBreak(WordBreakProperty from,
+                                     WordBreakProperty to) {
+  ASSERT(static_cast<int>(from) < 13);
+  return !!(kWordBreakTable[static_cast<int>(from)] &
+            static_cast<uint16_t>(1 << static_cast<int>(to)));
+}
+
+WordBreakProperty FX_GetWordBreakProperty(wchar_t wcCodePoint) {
+  size_t index = static_cast<size_t>(wcCodePoint) / 2;
+  if (index >= FX_ArraySize(kCodePointProperties))
+    return WordBreakProperty::kNone;
+
+  uint8_t dwProperty = kCodePointProperties[index];
+  return static_cast<WordBreakProperty>((wcCodePoint & 1) ? (dwProperty & 0x0F)
+                                                          : (dwProperty >> 4));
+}
diff --git a/xfa/fde/cfde_wordbreak_data.h b/xfa/fde/cfde_wordbreak_data.h
index 28e26cc..ca07b20 100644
--- a/xfa/fde/cfde_wordbreak_data.h
+++ b/xfa/fde/cfde_wordbreak_data.h
@@ -25,7 +25,8 @@
   kExtendNumLet,
 };
 
-extern const uint16_t gs_FX_WordBreak_Table[];
-extern const uint8_t gs_FX_WordBreak_CodePointProperties[];
+bool FX_CheckStateChangeForWordBreak(WordBreakProperty from,
+                                     WordBreakProperty to);
+WordBreakProperty FX_GetWordBreakProperty(wchar_t wcCodePoint);
 
 #endif  // XFA_FDE_CFDE_WORDBREAK_DATA_H_