Re-write some unsafe string code prior to UNSAFE_BUFFERS.

This is a set of files extracted from
  https://pdfium-review.googlesource.com/c/pdfium/+/116070

which re-implement string methods rather than wrapping expressions
with UNSAFE_BUFFERS. These can be landed first.

Change-Id: I14e61aa0164db1da8e985f243e8cbf01fdbd05a7
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/116831
Reviewed-by: Thomas Sepez <tsepez@google.com>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcrt/bytestring.cpp b/core/fxcrt/bytestring.cpp
index 1b78d08..667c777 100644
--- a/core/fxcrt/bytestring.cpp
+++ b/core/fxcrt/bytestring.cpp
@@ -271,25 +271,22 @@
 }
 
 bool ByteString::EqualNoCase(ByteStringView str) const {
-  if (!m_pData)
+  if (!m_pData) {
     return str.IsEmpty();
-
-  size_t len = str.GetLength();
-  if (m_pData->m_nDataLength != len)
+  }
+  if (m_pData->m_nDataLength != str.GetLength()) {
     return false;
-
-  const uint8_t* pThis = (const uint8_t*)m_pData->m_String;
-  const uint8_t* pThat = str.raw_str();
-  for (size_t i = 0; i < len; i++) {
-    if ((*pThis) != (*pThat)) {
-      uint8_t this_char = tolower(*pThis);
-      uint8_t that_char = tolower(*pThat);
-      if (this_char != that_char) {
-        return false;
-      }
+  }
+  pdfium::span<const uint8_t> this_span = pdfium::as_bytes(m_pData->span());
+  pdfium::span<const uint8_t> that_span = str.raw_span();
+  while (!this_span.empty()) {
+    uint8_t this_char = this_span.front();
+    uint8_t that_char = that_span.front();
+    if (this_char != that_char && tolower(this_char) != tolower(that_char)) {
+      return false;
     }
-    pThis++;
-    pThat++;
+    this_span = this_span.subspan(1);
+    that_span = that_span.subspan(1);
   }
   return true;
 }
@@ -366,6 +363,7 @@
 void ByteString::TrimWhitespaceBack() {
   TrimBack(kTrimChars);
 }
+
 std::ostream& operator<<(std::ostream& os, const ByteString& str) {
   return os.write(str.c_str(), str.GetLength());
 }
diff --git a/core/fxcrt/cfx_seekablestreamproxy.cpp b/core/fxcrt/cfx_seekablestreamproxy.cpp
index 92ff676..edf33d0 100644
--- a/core/fxcrt/cfx_seekablestreamproxy.cpp
+++ b/core/fxcrt/cfx_seekablestreamproxy.cpp
@@ -16,6 +16,7 @@
 #include "core/fxcrt/data_vector.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_safe_types.h"
+#include "core/fxcrt/span_util.h"
 #include "third_party/base/check.h"
 #include "third_party/base/check_op.h"
 
@@ -67,26 +68,19 @@
   return {iSrcNum, iDstNum};
 }
 
+void UTF16ToWChar(pdfium::span<wchar_t> buffer) {
 #if defined(WCHAR_T_IS_32_BIT)
-static_assert(sizeof(wchar_t) > 2, "wchar_t is too small");
-
-void UTF16ToWChar(void* pBuffer, size_t iLength) {
-  DCHECK(pBuffer);
-  DCHECK_GT(iLength, 0u);
-
-  uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
-  wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
-
+  auto src = fxcrt::reinterpret_span<uint16_t>(buffer);
   // Perform self-intersecting copy in reverse order.
-  for (size_t i = iLength; i > 0; --i)
-    pDst[i - 1] = static_cast<wchar_t>(pSrc[i - 1]);
-}
+  for (size_t i = buffer.size(); i > 0; --i) {
+    buffer[i - 1] = static_cast<wchar_t>(src[i - 1]);
+  }
 #endif  // defined(WCHAR_T_IS_32_BIT)
+}
 
-void SwapByteOrder(uint16_t* pStr, size_t iLength) {
-  while (iLength-- > 0) {
-    uint16_t wch = *pStr;
-    *pStr++ = (wch >> 8) | (wch << 8);
+void SwapByteOrder(pdfium::span<uint16_t> str) {
+  for (auto& wch : str) {
+    wch = (wch >> 8) | (wch << 8);
   }
 }
 
@@ -191,13 +185,10 @@
     size_t iBytes = size * 2;
     size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
     size = iLen / 2;
-    if (m_wCodePage == FX_CodePage::kUTF16BE)
-      SwapByteOrder(reinterpret_cast<uint16_t*>(pStr), size);
-
-#if defined(WCHAR_T_IS_32_BIT)
-    if (size > 0)
-      UTF16ToWChar(pStr, size);
-#endif
+    if (m_wCodePage == FX_CodePage::kUTF16BE) {
+      SwapByteOrder({reinterpret_cast<uint16_t*>(pStr), size});
+    }
+    UTF16ToWChar({pStr, size});
     return size;
   }
 
diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp
index b6bdb5a..94ee466 100644
--- a/core/fxcrt/fx_string.cpp
+++ b/core/fxcrt/fx_string.cpp
@@ -8,6 +8,7 @@
 
 #include <stdint.h>
 
+#include <array>
 #include <iterator>
 
 #include "build/build_config.h"
@@ -54,7 +55,7 @@
     byte_size = 4;
   }
 
-  static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
+  static constexpr std::array<uint8_t, 3> kPrefix = {{0xc0, 0xe0, 0xf0}};
   int order = 1 << ((byte_size - 1) * 6);
   buffer += kPrefix[byte_size - 2] | (code_point / order);
   for (int i = 0; i < byte_size - 1; i++) {
diff --git a/core/fxcrt/string_template.cpp b/core/fxcrt/string_template.cpp
index 45339b5..50cdf2d 100644
--- a/core/fxcrt/string_template.cpp
+++ b/core/fxcrt/string_template.cpp
@@ -53,7 +53,7 @@
   }
   DCHECK_EQ(m_pData->m_nRefs, 1);
   m_pData->m_nDataLength = nNewLength;
-  m_pData->m_String[nNewLength] = 0;
+  m_pData->capacity_span()[nNewLength] = 0;
   if (m_pData->m_nAllocLength - nNewLength >= 32) {
     // Over arbitrary threshold, so pay the price to relocate.  Force copy to
     // always occur by holding a second reference to the string.
@@ -64,40 +64,29 @@
 
 template <typename T>
 size_t StringTemplate<T>::Remove(T chRemove) {
-  if (IsEmpty()) {
-    return 0;
-  }
-
-  T* pstrSource = m_pData->m_String;
-  T* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
-  while (pstrSource < pstrEnd) {
-    if (*pstrSource == chRemove) {
-      break;
+  size_t count = 0;
+  for (const auto& ch : span()) {
+    if (ch == chRemove) {
+      count++;
     }
-    pstrSource++;
   }
-  if (pstrSource == pstrEnd) {
+  if (count == 0) {
     return 0;
   }
-
-  ptrdiff_t copied = pstrSource - m_pData->m_String;
   ReallocBeforeWrite(m_pData->m_nDataLength);
-  pstrSource = m_pData->m_String + copied;
-  pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
-
-  T* pstrDest = pstrSource;
-  while (pstrSource < pstrEnd) {
-    if (*pstrSource != chRemove) {
-      *pstrDest = *pstrSource;
-      pstrDest++;
+  auto src_span = m_pData->span();
+  auto dst_span = m_pData->span();
+  // Perform self-intersecting copy in forwards order.
+  while (!src_span.empty()) {
+    if (src_span[0] != chRemove) {
+      dst_span[0] = src_span[0];
+      dst_span = dst_span.subspan(1);
     }
-    pstrSource++;
+    src_span = src_span.subspan(1);
   }
-
-  *pstrDest = 0;
-  size_t nCount = static_cast<size_t>(pstrSource - pstrDest);
-  m_pData->m_nDataLength -= nCount;
-  return nCount;
+  m_pData->m_nDataLength -= count;
+  m_pData->capacity_span()[m_pData->m_nDataLength] = 0;
+  return count;
 }
 
 template <typename T>
@@ -110,7 +99,7 @@
   ReallocBeforeWrite(new_length);
   fxcrt::spanmove(m_pData->capacity_span().subspan(index + 1),
                   m_pData->capacity_span().subspan(index, new_length - index));
-  m_pData->m_String[index] = ch;
+  m_pData->capacity_span()[index] = ch;
   m_pData->m_nDataLength = new_length;
   return new_length;
 }
@@ -174,7 +163,7 @@
   }
   size_t nLength = m_pData->m_nDataLength;
   while (nLength--) {
-    if (m_pData->m_String[nLength] == ch) {
+    if (m_pData->span()[nLength] == ch) {
       return nLength;
     }
   }
@@ -263,7 +252,7 @@
   while (pos < len) {
     size_t i = 0;
     while (i < targets.GetLength() &&
-           targets.CharAt(i) != m_pData->m_String[pos]) {
+           targets.CharAt(i) != m_pData->span()[pos]) {
       i++;
     }
     if (i == targets.GetLength()) {
@@ -297,7 +286,7 @@
   while (pos) {
     size_t i = 0;
     while (i < targets.GetLength() &&
-           targets.CharAt(i) != m_pData->m_String[pos - 1]) {
+           targets.CharAt(i) != m_pData->span()[pos - 1]) {
       i++;
     }
     if (i == targets.GetLength()) {
@@ -307,8 +296,8 @@
   }
   if (pos < m_pData->m_nDataLength) {
     ReallocBeforeWrite(m_pData->m_nDataLength);
-    m_pData->m_String[pos] = 0;
     m_pData->m_nDataLength = pos;
+    m_pData->capacity_span()[m_pData->m_nDataLength] = 0;
   }
 }
 
@@ -329,7 +318,7 @@
   } else {
     pNewData->m_nDataLength = 0;
   }
-  pNewData->m_String[pNewData->m_nDataLength] = 0;
+  pNewData->capacity_span()[pNewData->m_nDataLength] = 0;
   m_pData = std::move(pNewData);
 }