Add checks of index operations on string classes

Specifically the index parameter passed in to GetAt(), SetAt() and
operator[] are now being tested to be in bounds.

BUG=chromium:752480, pdfium:828

Change-Id: I9e94d58c98a8eaaaae53cd0e3ffe2123ea17d8c4
Reviewed-on: https://pdfium-review.googlesource.com/10651
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_cmapparser.cpp b/core/fpdfapi/font/cpdf_cmapparser.cpp
index 0f9bca6..cbf6939 100644
--- a/core/fpdfapi/font/cpdf_cmapparser.cpp
+++ b/core/fpdfapi/font/cpdf_cmapparser.cpp
@@ -35,7 +35,7 @@
 CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
   if (word.GetLength() <= 2)
     return CFX_ByteStringC();
-  return CFX_ByteStringC(&word[1], word.GetLength() - 2);
+  return word.Right(word.GetLength() - 2);
 }
 
 }  // namespace
@@ -137,6 +137,8 @@
 
 // Static.
 uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) {
+  if (word.IsEmpty())
+    return 0;
   pdfium::base::CheckedNumeric<uint32_t> num = 0;
   if (word.GetAt(0) == '<') {
     for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) {
diff --git a/core/fpdfdoc/cpdf_variabletext.cpp b/core/fpdfdoc/cpdf_variabletext.cpp
index 1fc3294..74a5e3a 100644
--- a/core/fpdfdoc/cpdf_variabletext.cpp
+++ b/core/fpdfdoc/cpdf_variabletext.cpp
@@ -430,18 +430,16 @@
     switch (word) {
       case 0x0D:
         if (m_bMultiLine) {
-          if (swText.GetAt(i + 1) == 0x0A)
-            i += 1;
-
+          if (i + 1 < sz && swText.GetAt(i + 1) == 0x0A)
+            i++;
           wp.AdvanceSection();
           AddSection(wp, secinfo);
         }
         break;
       case 0x0A:
         if (m_bMultiLine) {
-          if (swText.GetAt(i + 1) == 0x0D)
-            i += 1;
-
+          if (i + 1 < sz && swText.GetAt(i + 1) == 0x0D)
+            i++;
           wp.AdvanceSection();
           AddSection(wp, secinfo);
         }
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 8f0e376..1470ad7 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -945,9 +945,9 @@
     if (item.m_CharCode == static_cast<uint32_t>(-1))
       continue;
     CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
-    wchar_t wChar = wstrItem.GetAt(0);
-    if ((wstrItem.IsEmpty() || wChar == 0) && item.m_CharCode)
-      wChar = (wchar_t)item.m_CharCode;
+    wchar_t wChar = !wstrItem.IsEmpty() ? wstrItem[0] : 0;
+    if (wChar == 0)
+      wChar = item.m_CharCode;
     if (wChar)
       str += wChar;
   }
@@ -1222,9 +1222,11 @@
   if (strCurText.IsEmpty())
     strCurText = m_TextBuf.AsStringC();
   FX_STRSIZE nCount = strCurText.GetLength();
+  if (nCount < 1)
+    return false;
   int nIndex = nCount - 1;
   wchar_t wcTmp = strCurText.GetAt(nIndex);
-  while (wcTmp == 0x20 && nIndex <= nCount - 1 && nIndex >= 0)
+  while (wcTmp == 0x20 && nIndex > 0 && nIndex <= nCount - 1)
     wcTmp = strCurText.GetAt(--nIndex);
   if (0x2D == wcTmp || 0xAD == wcTmp) {
     if (--nIndex > 0) {
@@ -1353,6 +1355,8 @@
   }
   CFX_WideString PrevStr =
       m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
+  if (PrevStr.IsEmpty())
+    return GenerateCharacter::None;
   wchar_t preChar = PrevStr.GetAt(PrevStr.GetLength() - 1);
   CFX_Matrix matrix = pObj->GetTextMatrix();
   matrix.Concat(formMatrix);
diff --git a/core/fxcrt/cfx_bytestring.cpp b/core/fxcrt/cfx_bytestring.cpp
index c29d24a..0aba3be 100644
--- a/core/fxcrt/cfx_bytestring.cpp
+++ b/core/fxcrt/cfx_bytestring.cpp
@@ -694,14 +694,10 @@
   return nCount;
 }
 
-void CFX_ByteString::SetAt(FX_STRSIZE nIndex, char ch) {
-  if (!m_pData) {
-    return;
-  }
-  ASSERT(nIndex >= 0);
-  ASSERT(nIndex < m_pData->m_nDataLength);
+void CFX_ByteString::SetAt(FX_STRSIZE index, char c) {
+  ASSERT(index >= 0 && index < GetLength());
   ReallocBeforeWrite(m_pData->m_nDataLength);
-  m_pData->m_String[nIndex] = ch;
+  m_pData->m_String[index] = c;
 }
 
 CFX_WideString CFX_ByteString::UTF8Decode() const {
diff --git a/core/fxcrt/cfx_bytestring.h b/core/fxcrt/cfx_bytestring.h
index cf688b4..df31751 100644
--- a/core/fxcrt/cfx_bytestring.h
+++ b/core/fxcrt/cfx_bytestring.h
@@ -107,15 +107,14 @@
   const CFX_ByteString& operator+=(const CFX_ByteString& str);
   const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
 
-  uint8_t GetAt(FX_STRSIZE nIndex) const {
-    return m_pData ? m_pData->m_String[nIndex] : 0;
+  uint8_t GetAt(FX_STRSIZE index) const {
+    ASSERT(index >= 0 && index < GetLength());
+    return m_pData->m_String[index];
   }
 
-  uint8_t operator[](FX_STRSIZE nIndex) const {
-    return m_pData ? m_pData->m_String[nIndex] : 0;
-  }
+  uint8_t operator[](FX_STRSIZE index) const { return GetAt(index); }
 
-  void SetAt(FX_STRSIZE nIndex, char ch);
+  void SetAt(FX_STRSIZE index, char c);
   FX_STRSIZE Insert(FX_STRSIZE index, char ch);
   FX_STRSIZE InsertAtFront(char ch) { return Insert(0, ch); }
   FX_STRSIZE InsertAtBack(char ch) { return Insert(GetLength(), ch); }
diff --git a/core/fxcrt/cfx_bytestring_unittest.cpp b/core/fxcrt/cfx_bytestring_unittest.cpp
index b79a765..680a37e 100644
--- a/core/fxcrt/cfx_bytestring_unittest.cpp
+++ b/core/fxcrt/cfx_bytestring_unittest.cpp
@@ -11,13 +11,52 @@
 #include "testing/gtest/include/gtest/gtest.h"
 #include "third_party/base/stl_util.h"
 
+TEST(fxcrt, ByteStringGetAt) {
+  CFX_ByteString short_string("a");
+  CFX_ByteString longer_string("abc");
+  CFX_ByteString embedded_nul_string("ab\0c", 4);
+
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(-1); }, ".*");
+#endif
+  EXPECT_EQ('a', short_string.GetAt(0));
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(1); }, ".*");
+#endif
+  EXPECT_EQ('c', longer_string.GetAt(2));
+  EXPECT_EQ('b', embedded_nul_string.GetAt(1));
+  EXPECT_EQ('\0', embedded_nul_string.GetAt(2));
+  EXPECT_EQ('c', embedded_nul_string.GetAt(3));
+}
+
 TEST(fxcrt, ByteStringOperatorSubscript) {
-  // CFX_ByteString includes the NUL terminator for non-empty strings.
   CFX_ByteString abc("abc");
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[-1]; }, ".*");
+#endif
   EXPECT_EQ('a', abc[0]);
   EXPECT_EQ('b', abc[1]);
   EXPECT_EQ('c', abc[2]);
-  EXPECT_EQ(0, abc[3]);
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[3]; }, ".*");
+#endif
+}
+
+TEST(fxcrt, ByteStringSetAt) {
+  // CFX_ByteString includes the NUL terminator for non-empty strings.
+  CFX_ByteString abc("abc");
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc.SetAt(-1, 'd'); }, ".*");
+#endif
+  abc.SetAt(0, 'd');
+  EXPECT_EQ("dbc", abc);
+  abc.SetAt(1, 'e');
+  EXPECT_EQ("dec", abc);
+  abc.SetAt(2, 'f');
+  EXPECT_EQ("def", abc);
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc.SetAt(3, 'g'); }, ".*");
+#endif
 }
 
 TEST(fxcrt, ByteStringOperatorLT) {
@@ -905,24 +944,35 @@
 }
 
 TEST(fxcrt, ByteStringCGetAt) {
-  CFX_ByteString short_string("a");
-  CFX_ByteString longer_string("abc");
-  CFX_ByteString embedded_nul_string("ab\0c", 4);
+  CFX_ByteStringC short_string("a");
+  CFX_ByteStringC longer_string("abc");
+  CFX_ByteStringC embedded_nul_string("ab\0c", 4);
 
-  EXPECT_EQ('a', short_string.GetAt(0));
-  EXPECT_EQ('c', longer_string.GetAt(2));
-  EXPECT_EQ('b', embedded_nul_string.GetAt(1));
-  EXPECT_EQ('\0', embedded_nul_string.GetAt(2));
-  EXPECT_EQ('c', embedded_nul_string.GetAt(3));
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(-1); }, ".*");
+#endif
+  EXPECT_EQ('a', static_cast<char>(short_string.GetAt(0)));
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(1); }, ".*");
+#endif
+  EXPECT_EQ('c', static_cast<char>(longer_string.GetAt(2)));
+  EXPECT_EQ('b', static_cast<char>(embedded_nul_string.GetAt(1)));
+  EXPECT_EQ('\0', static_cast<char>(embedded_nul_string.GetAt(2)));
+  EXPECT_EQ('c', static_cast<char>(embedded_nul_string.GetAt(3)));
 }
 
 TEST(fxcrt, ByteStringCOperatorSubscript) {
   // CFX_ByteStringC includes the NUL terminator for non-empty strings.
   CFX_ByteStringC abc("abc");
-  EXPECT_EQ('a', abc[0]);
-  EXPECT_EQ('b', abc[1]);
-  EXPECT_EQ('c', abc[2]);
-  EXPECT_EQ(0, abc[3]);
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[-1]; }, ".*");
+#endif
+  EXPECT_EQ('a', static_cast<char>(abc[0]));
+  EXPECT_EQ('b', static_cast<char>(abc[1]));
+  EXPECT_EQ('c', static_cast<char>(abc[2]));
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[3]; }, ".*");
+#endif
 }
 
 TEST(fxcrt, ByteStringCOperatorLT) {
diff --git a/core/fxcrt/cfx_string_c_template.h b/core/fxcrt/cfx_string_c_template.h
index 6a95a05..bc0fe1e 100644
--- a/core/fxcrt/cfx_string_c_template.h
+++ b/core/fxcrt/cfx_string_c_template.h
@@ -119,8 +119,13 @@
   FX_STRSIZE GetLength() const { return m_Length; }
   bool IsEmpty() const { return m_Length == 0; }
 
-  UnsignedType GetAt(FX_STRSIZE index) const { return m_Ptr.Get()[index]; }
+  UnsignedType GetAt(FX_STRSIZE index) const {
+    ASSERT(index >= 0 && index < GetLength());
+    return m_Ptr.Get()[index];
+  }
+
   CharType CharAt(FX_STRSIZE index) const {
+    ASSERT(index >= 0 && index < GetLength());
     return static_cast<CharType>(m_Ptr.Get()[index]);
   }
 
@@ -159,9 +164,7 @@
     return CFX_StringCTemplate(m_Ptr.Get() + m_Length - count, count);
   }
 
-  const UnsignedType& operator[](size_t index) const {
-    return m_Ptr.Get()[index];
-  }
+  UnsignedType operator[](FX_STRSIZE index) const { return GetAt(index); }
 
   bool operator<(const CFX_StringCTemplate& that) const {
     int result = FXSYS_cmp(reinterpret_cast<const CharType*>(m_Ptr.Get()),
diff --git a/core/fxcrt/cfx_widestring.cpp b/core/fxcrt/cfx_widestring.cpp
index a1fa7ec..c2e1e48 100644
--- a/core/fxcrt/cfx_widestring.cpp
+++ b/core/fxcrt/cfx_widestring.cpp
@@ -841,14 +841,10 @@
   return nCount;
 }
 
-void CFX_WideString::SetAt(FX_STRSIZE nIndex, wchar_t ch) {
-  if (!m_pData) {
-    return;
-  }
-  ASSERT(nIndex >= 0);
-  ASSERT(nIndex < m_pData->m_nDataLength);
+void CFX_WideString::SetAt(FX_STRSIZE index, wchar_t c) {
+  ASSERT(index >= 0 && index < GetLength());
   ReallocBeforeWrite(m_pData->m_nDataLength);
-  m_pData->m_String[nIndex] = ch;
+  m_pData->m_String[index] = c;
 }
 
 // static
diff --git a/core/fxcrt/cfx_widestring.h b/core/fxcrt/cfx_widestring.h
index ccb1e75..b49e898 100644
--- a/core/fxcrt/cfx_widestring.h
+++ b/core/fxcrt/cfx_widestring.h
@@ -101,15 +101,14 @@
 
   bool operator<(const CFX_WideString& str) const;
 
-  wchar_t GetAt(FX_STRSIZE nIndex) const {
-    return m_pData ? m_pData->m_String[nIndex] : 0;
+  wchar_t GetAt(FX_STRSIZE index) const {
+    ASSERT(index >= 0 && index < GetLength());
+    return m_pData->m_String[index];
   }
 
-  wchar_t operator[](FX_STRSIZE nIndex) const {
-    return m_pData ? m_pData->m_String[nIndex] : 0;
-  }
+  wchar_t operator[](FX_STRSIZE index) const { return GetAt(index); }
 
-  void SetAt(FX_STRSIZE nIndex, wchar_t ch);
+  void SetAt(FX_STRSIZE index, wchar_t c);
 
   int Compare(const wchar_t* str) const;
   int Compare(const CFX_WideString& str) const;
diff --git a/core/fxcrt/cfx_widestring_unittest.cpp b/core/fxcrt/cfx_widestring_unittest.cpp
index 6e7b63c..a237632 100644
--- a/core/fxcrt/cfx_widestring_unittest.cpp
+++ b/core/fxcrt/cfx_widestring_unittest.cpp
@@ -10,13 +10,51 @@
 
 #include "testing/gtest/include/gtest/gtest.h"
 
+TEST(fxcrt, WideStringGetAt) {
+  CFX_WideString short_string(L"a");
+  CFX_WideString longer_string(L"abc");
+  CFX_WideString embedded_nul_string(L"ab\0c", 4);
+
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(-1); }, ".*");
+#endif
+  EXPECT_EQ(L'a', short_string.GetAt(0));
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(1); }, ".*");
+#endif
+  EXPECT_EQ(L'c', longer_string.GetAt(2));
+  EXPECT_EQ(L'b', embedded_nul_string.GetAt(1));
+  EXPECT_EQ(L'\0', embedded_nul_string.GetAt(2));
+  EXPECT_EQ(L'c', embedded_nul_string.GetAt(3));
+}
+
 TEST(fxcrt, WideStringOperatorSubscript) {
-  // CFX_WideString includes the NUL terminator for non-empty strings.
   CFX_WideString abc(L"abc");
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[-1]; }, ".*");
+#endif
   EXPECT_EQ(L'a', abc[0]);
   EXPECT_EQ(L'b', abc[1]);
   EXPECT_EQ(L'c', abc[2]);
-  EXPECT_EQ(L'\0', abc[3]);
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[4]; }, ".*");
+#endif
+}
+
+TEST(fxcrt, WideStringSetAt) {
+  CFX_WideString abc(L"abc");
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc.SetAt(-1, L'd'); }, ".*");
+#endif
+  abc.SetAt(0, L'd');
+  EXPECT_EQ(L"dbc", abc);
+  abc.SetAt(1, L'e');
+  EXPECT_EQ(L"dec", abc);
+  abc.SetAt(2, L'f');
+  EXPECT_EQ(L"def", abc);
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc.SetAt(3, L'g'); }, ".*");
+#endif
 }
 
 TEST(fxcrt, WideStringOperatorLT) {
@@ -742,13 +780,35 @@
   EXPECT_EQ(nullptr, cleared_string.raw_str());
 }
 
+TEST(fxcrt, WideStringCGetAt) {
+  CFX_WideStringC short_string(L"a");
+  CFX_WideStringC longer_string(L"abc");
+  CFX_WideStringC embedded_nul_string(L"ab\0c", 4);
+
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(-1); }, ".*");
+#endif
+  EXPECT_EQ(L'a', static_cast<wchar_t>(short_string.GetAt(0)));
+#ifndef NDEBUG
+  EXPECT_DEATH({ short_string.GetAt(1); }, ".*");
+#endif
+  EXPECT_EQ(L'c', static_cast<wchar_t>(longer_string.GetAt(2)));
+  EXPECT_EQ(L'b', static_cast<wchar_t>(embedded_nul_string.GetAt(1)));
+  EXPECT_EQ(L'\0', static_cast<wchar_t>(embedded_nul_string.GetAt(2)));
+  EXPECT_EQ(L'c', static_cast<wchar_t>(embedded_nul_string.GetAt(3)));
+}
+
 TEST(fxcrt, WideStringCOperatorSubscript) {
-  // CFX_WideStringC includes the NUL terminator for non-empty strings.
   CFX_WideStringC abc(L"abc");
-  EXPECT_EQ(L'a', abc.CharAt(0));
-  EXPECT_EQ(L'b', abc.CharAt(1));
-  EXPECT_EQ(L'c', abc.CharAt(2));
-  EXPECT_EQ(L'\0', abc.CharAt(3));
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[-1]; }, ".*");
+#endif
+  EXPECT_EQ(L'a', static_cast<wchar_t>(abc[0]));
+  EXPECT_EQ(L'b', static_cast<wchar_t>(abc[1]));
+  EXPECT_EQ(L'c', static_cast<wchar_t>(abc[2]));
+#ifndef NDEBUG
+  EXPECT_DEATH({ abc[4]; }, ".*");
+#endif
 }
 
 TEST(fxcrt, WideStringCOperatorLT) {
diff --git a/fxbarcode/oned/BC_OnedCode128Writer.cpp b/fxbarcode/oned/BC_OnedCode128Writer.cpp
index 644933d..95a95b6 100644
--- a/fxbarcode/oned/BC_OnedCode128Writer.cpp
+++ b/fxbarcode/oned/BC_OnedCode128Writer.cpp
@@ -195,7 +195,7 @@
     if (std::isdigit(ch)) {
       patternIndex = FXSYS_atoi(contents.Mid(position, 2).c_str());
       ++position;
-      if (std::isdigit(contents[position]))
+      if (position < contents.GetLength() && std::isdigit(contents[position]))
         ++position;
     } else {
       patternIndex = static_cast<int32_t>(ch);
diff --git a/xfa/fxfa/fm2js/cxfa_fmexpression.cpp b/xfa/fxfa/fm2js/cxfa_fmexpression.cpp
index a74239f..c6b5814 100644
--- a/xfa/fxfa/fm2js/cxfa_fmexpression.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmexpression.cpp
@@ -60,7 +60,7 @@
     javascript << L"(\n";
   }
   javascript << L"function ";
-  if (m_wsName.GetAt(0) == L'!') {
+  if (!m_wsName.IsEmpty() && m_wsName[0] == L'!') {
     CFX_WideString tempName =
         EXCLAMATION_IN_IDENTIFIER + m_wsName.Right(m_wsName.GetLength() - 1);
     javascript << tempName;