Remove FX_UTF8Decode() in favor of WideString::FromUTF8()

One UTF-8 to WideString API is enough, and most places use the latter.

Pure code move (and caller updates), no behavior change.

Change-Id: Iaa78f09ff7a87f72e1ae4b0747262a906dcfcc4f
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/114090
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Auto-Submit: Nico Weber <thakis@chromium.org>
diff --git a/core/fxcrt/cfx_fileaccess_windows.cpp b/core/fxcrt/cfx_fileaccess_windows.cpp
index bef9f58..303fcd1 100644
--- a/core/fxcrt/cfx_fileaccess_windows.cpp
+++ b/core/fxcrt/cfx_fileaccess_windows.cpp
@@ -26,7 +26,7 @@
   if (m_hFile)
     return false;
 
-  WideString wname = FX_UTF8Decode(fileName);
+  WideString wname = WideString::FromUTF8(fileName);
   m_hFile = ::CreateFileW(wname.c_str(), GENERIC_READ,
                           FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr,
                           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp
index 37b6692..2f13775 100644
--- a/core/fxcrt/fx_string.cpp
+++ b/core/fxcrt/fx_string.cpp
@@ -64,31 +64,6 @@
   }
 }
 
-// Appends a Unicode code point to a `WideString` using either UTF-16 or UTF-32,
-// depending on the platform's definition of `wchar_t`.
-//
-// TODO(crbug.com/pdfium/2031): Always use UTF-16.
-// TODO(crbug.com/pdfium/2041): Migrate to `WideString`.
-void AppendCodePointToWideString(char32_t code_point, WideString& buffer) {
-  if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
-    // Invalid code point above U+10FFFF.
-    return;
-  }
-
-#if defined(WCHAR_T_IS_16_BIT)
-  if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
-    buffer += static_cast<wchar_t>(code_point);
-  } else {
-    // Encode as UTF-16 surrogate pair.
-    pdfium::SurrogatePair surrogate_pair(code_point);
-    buffer += surrogate_pair.high();
-    buffer += surrogate_pair.low();
-  }
-#else
-  buffer += static_cast<wchar_t>(code_point);
-#endif  // defined(WCHAR_T_IS_16_BIT)
-}
-
 }  // namespace
 
 ByteString FX_UTF8Encode(WideStringView wsStr) {
@@ -99,41 +74,6 @@
   return buffer;
 }
 
-WideString FX_UTF8Decode(ByteStringView bsStr) {
-  WideString buffer;
-
-  int remaining = 0;
-  char32_t code_point = 0;
-  for (char byte : bsStr) {
-    uint8_t code_unit = static_cast<uint8_t>(byte);
-    if (code_unit < 0x80) {
-      remaining = 0;
-      AppendCodePointToWideString(code_unit, buffer);
-    } else if (code_unit < 0xc0) {
-      if (remaining > 0) {
-        --remaining;
-        code_point = (code_point << 6) | (code_unit & 0x3f);
-        if (remaining == 0) {
-          AppendCodePointToWideString(code_point, buffer);
-        }
-      }
-    } else if (code_unit < 0xe0) {
-      remaining = 1;
-      code_point = code_unit & 0x1f;
-    } else if (code_unit < 0xf0) {
-      remaining = 2;
-      code_point = code_unit & 0x0f;
-    } else if (code_unit < 0xf8) {
-      remaining = 3;
-      code_point = code_unit & 0x07;
-    } else {
-      remaining = 0;
-    }
-  }
-
-  return buffer;
-}
-
 namespace {
 
 constexpr float kFractionScalesFloat[] = {
diff --git a/core/fxcrt/fx_string.h b/core/fxcrt/fx_string.h
index 49351c1..c99e423 100644
--- a/core/fxcrt/fx_string.h
+++ b/core/fxcrt/fx_string.h
@@ -21,7 +21,6 @@
 }
 
 ByteString FX_UTF8Encode(WideStringView wsStr);
-WideString FX_UTF8Decode(ByteStringView bsStr);
 
 float StringToFloat(ByteStringView str);
 float StringToFloat(WideStringView wsStr);
diff --git a/core/fxcrt/fx_string_unittest.cpp b/core/fxcrt/fx_string_unittest.cpp
index ab7046e..2998658 100644
--- a/core/fxcrt/fx_string_unittest.cpp
+++ b/core/fxcrt/fx_string_unittest.cpp
@@ -64,96 +64,6 @@
 }
 #endif  // defined(WCHAR_T_IS_16_BIT)
 
-TEST(fxstring, FXUTF8Decode) {
-  EXPECT_EQ(L"", FX_UTF8Decode(ByteStringView()));
-  EXPECT_EQ(
-      L"x"
-      L"\u0080"
-      L"\u00ff"
-      L"\ud7ff"
-      L"\ue000"
-      L"\uff2c"
-      L"\uffff"
-      L"y",
-      FX_UTF8Decode("x"
-                    "\u0080"
-                    "\u00ff"
-                    "\ud7ff"
-                    "\ue000"
-                    "\uff2c"
-                    "\uffff"
-                    "y"));
-}
-
-TEST(fxstring, FXUTF8DecodeSupplementary) {
-  EXPECT_EQ(
-      L"\U00010000"
-      L"\U0001f3a8"
-      L"\U0010ffff",
-      FX_UTF8Decode("\U00010000"
-                    "🎨"
-                    "\U0010ffff"));
-}
-
-TEST(fxstring, FXUTF8DecodeErrorRecovery) {
-  EXPECT_EQ(L"(A)", FX_UTF8Decode("(\xc2\x41)")) << "Invalid continuation";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xc2\xc2)")) << "Invalid continuation";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xc2\xff\x80)")) << "Invalid continuation";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\x80\x80)")) << "Invalid leading";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xff\x80\x80)")) << "Invalid leading";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xf8\x80\x80\x80\x80)"))
-      << "Invalid leading";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xf8\x88\x80\x80\x80)"))
-      << "Invalid leading";
-  EXPECT_EQ(L"()", FX_UTF8Decode("(\xf4\x90\x80\x80)"))
-      << "Code point greater than U+10FFFF";
-}
-
-TEST(fxstring, FXUTF8EncodeDecodeConsistency) {
-  WideString wstr;
-  wstr.Reserve(0x10000);
-  for (char32_t w = 0; w < pdfium::kMinimumSupplementaryCodePoint; ++w) {
-    if (pdfium::IsHighSurrogate(w) || pdfium::IsLowSurrogate(w)) {
-      // Skip UTF-16 surrogates.
-      continue;
-    }
-    wstr += static_cast<wchar_t>(w);
-  }
-  ASSERT_EQ(0xf800u, wstr.GetLength());
-
-  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
-  WideString wstr2 = FX_UTF8Decode(bstr.AsStringView());
-  EXPECT_EQ(wstr, wstr2);
-}
-
-TEST(fxstring, FXUTF8EncodeDecodeConsistencyUnpairedHighSurrogates) {
-  WideString wstr;
-  wstr.Reserve(0x400);
-  for (wchar_t w = pdfium::kMinimumHighSurrogateCodeUnit;
-       w <= pdfium::kMaximumHighSurrogateCodeUnit; ++w) {
-    wstr += w;
-  }
-  ASSERT_EQ(0x400u, wstr.GetLength());
-
-  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
-  WideString wstr2 = FX_UTF8Decode(bstr.AsStringView());
-  EXPECT_EQ(wstr, wstr2);
-}
-
-TEST(fxstring, FXUTF8EncodeDecodeConsistencyUnpairedLowSurrogates) {
-  WideString wstr;
-  wstr.Reserve(0x400);
-  for (wchar_t w = pdfium::kMinimumLowSurrogateCodeUnit;
-       w <= pdfium::kMaximumLowSurrogateCodeUnit; ++w) {
-    wstr += w;
-  }
-  ASSERT_EQ(0x400u, wstr.GetLength());
-
-  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
-  WideString wstr2 = FX_UTF8Decode(bstr.AsStringView());
-  EXPECT_EQ(wstr, wstr2);
-}
-
 TEST(fxstring, ByteStringToFloat) {
   EXPECT_FLOAT_EQ(0.0f, StringToFloat(""));
   EXPECT_FLOAT_EQ(0.0f, StringToFloat("0"));
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index dca0a48..11261f1 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -298,6 +298,66 @@
   return str;
 }
 
+// Appends a Unicode code point to a `WideString` using either UTF-16 or UTF-32,
+// depending on the platform's definition of `wchar_t`.
+//
+// TODO(crbug.com/pdfium/2031): Always use UTF-16.
+// TODO(crbug.com/pdfium/2041): Migrate to `WideString`.
+void AppendCodePointToWideString(char32_t code_point, WideString& buffer) {
+  if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
+    // Invalid code point above U+10FFFF.
+    return;
+  }
+
+#if defined(WCHAR_T_IS_16_BIT)
+  if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
+    buffer += static_cast<wchar_t>(code_point);
+  } else {
+    // Encode as UTF-16 surrogate pair.
+    pdfium::SurrogatePair surrogate_pair(code_point);
+    buffer += surrogate_pair.high();
+    buffer += surrogate_pair.low();
+  }
+#else
+  buffer += static_cast<wchar_t>(code_point);
+#endif  // defined(WCHAR_T_IS_16_BIT)
+}
+
+WideString UTF8Decode(ByteStringView bsStr) {
+  WideString buffer;
+
+  int remaining = 0;
+  char32_t code_point = 0;
+  for (char byte : bsStr) {
+    uint8_t code_unit = static_cast<uint8_t>(byte);
+    if (code_unit < 0x80) {
+      remaining = 0;
+      AppendCodePointToWideString(code_unit, buffer);
+    } else if (code_unit < 0xc0) {
+      if (remaining > 0) {
+        --remaining;
+        code_point = (code_point << 6) | (code_unit & 0x3f);
+        if (remaining == 0) {
+          AppendCodePointToWideString(code_point, buffer);
+        }
+      }
+    } else if (code_unit < 0xe0) {
+      remaining = 1;
+      code_point = code_unit & 0x1f;
+    } else if (code_unit < 0xf0) {
+      remaining = 2;
+      code_point = code_unit & 0x0f;
+    } else if (code_unit < 0xf8) {
+      remaining = 3;
+      code_point = code_unit & 0x07;
+    } else {
+      remaining = 0;
+    }
+  }
+
+  return buffer;
+}
+
 }  // namespace
 
 namespace fxcrt {
@@ -971,7 +1031,7 @@
 
 // static
 WideString WideString::FromUTF8(ByteStringView str) {
-  return FX_UTF8Decode(str);
+  return UTF8Decode(str);
 }
 
 // static
diff --git a/core/fxcrt/widestring_unittest.cpp b/core/fxcrt/widestring_unittest.cpp
index 5c555ca..f60e11f 100644
--- a/core/fxcrt/widestring_unittest.cpp
+++ b/core/fxcrt/widestring_unittest.cpp
@@ -10,6 +10,7 @@
 
 #include "build/build_config.h"
 #include "core/fxcrt/fx_string.h"
+#include "core/fxcrt/utf16.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "third_party/base/containers/contains.h"
 #include "third_party/base/containers/span.h"
@@ -1136,6 +1137,99 @@
   EXPECT_EQ(0, iter - multi_str.rbegin());
 }
 
+TEST(WideString, FromUTF8) {
+  EXPECT_EQ(L"", WideString::FromUTF8(ByteStringView()));
+  EXPECT_EQ(
+      L"x"
+      L"\u0080"
+      L"\u00ff"
+      L"\ud7ff"
+      L"\ue000"
+      L"\uff2c"
+      L"\uffff"
+      L"y",
+      WideString::FromUTF8("x"
+                           "\u0080"
+                           "\u00ff"
+                           "\ud7ff"
+                           "\ue000"
+                           "\uff2c"
+                           "\uffff"
+                           "y"));
+}
+
+TEST(WideString, FromUTF8Supplementary) {
+  EXPECT_EQ(
+      L"\U00010000"
+      L"\U0001f3a8"
+      L"\U0010ffff",
+      WideString::FromUTF8("\U00010000"
+                           "🎨"
+                           "\U0010ffff"));
+}
+
+TEST(WideString, FromUTF8ErrorRecovery) {
+  EXPECT_EQ(L"(A)", WideString::FromUTF8("(\xc2\x41)"))
+      << "Invalid continuation";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xc2\xc2)"))
+      << "Invalid continuation";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xc2\xff\x80)"))
+      << "Invalid continuation";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\x80\x80)")) << "Invalid leading";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xff\x80\x80)")) << "Invalid leading";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xf8\x80\x80\x80\x80)"))
+      << "Invalid leading";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xf8\x88\x80\x80\x80)"))
+      << "Invalid leading";
+  EXPECT_EQ(L"()", WideString::FromUTF8("(\xf4\x90\x80\x80)"))
+      << "Code point greater than U+10FFFF";
+}
+
+TEST(WideString, UTF8EncodeDecodeConsistency) {
+  WideString wstr;
+  wstr.Reserve(0x10000);
+  for (char32_t w = 0; w < pdfium::kMinimumSupplementaryCodePoint; ++w) {
+    if (pdfium::IsHighSurrogate(w) || pdfium::IsLowSurrogate(w)) {
+      // Skip UTF-16 surrogates.
+      continue;
+    }
+    wstr += static_cast<wchar_t>(w);
+  }
+  ASSERT_EQ(0xf800u, wstr.GetLength());
+
+  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
+  WideString wstr2 = WideString::FromUTF8(bstr.AsStringView());
+  EXPECT_EQ(wstr, wstr2);
+}
+
+TEST(WideString, UTF8EncodeDecodeConsistencyUnpairedHighSurrogates) {
+  WideString wstr;
+  wstr.Reserve(0x400);
+  for (wchar_t w = pdfium::kMinimumHighSurrogateCodeUnit;
+       w <= pdfium::kMaximumHighSurrogateCodeUnit; ++w) {
+    wstr += w;
+  }
+  ASSERT_EQ(0x400u, wstr.GetLength());
+
+  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
+  WideString wstr2 = WideString::FromUTF8(bstr.AsStringView());
+  EXPECT_EQ(wstr, wstr2);
+}
+
+TEST(WideString, UTF8EncodeDecodeConsistencyUnpairedLowSurrogates) {
+  WideString wstr;
+  wstr.Reserve(0x400);
+  for (wchar_t w = pdfium::kMinimumLowSurrogateCodeUnit;
+       w <= pdfium::kMaximumLowSurrogateCodeUnit; ++w) {
+    wstr += w;
+  }
+  ASSERT_EQ(0x400u, wstr.GetLength());
+
+  ByteString bstr = FX_UTF8Encode(wstr.AsStringView());
+  WideString wstr2 = WideString::FromUTF8(bstr.AsStringView());
+  EXPECT_EQ(wstr, wstr2);
+}
+
 TEST(WideString, FromUTF16BE) {
   struct UTF16BEDecodeCase {
     ByteString in;