Move ByteString::FromUnicode() to WideString::ToDefANSI()

Turns out that "FromUnicode" is misleading in that, on linux, it simply
removes any characters beyond 0xFF and passes the rest unchanged, so
no unicode decoding actually takes place. On Windows, it passes it into
the system function specifying FX_CODEPAGE_DefANSI, converting it into
the so-called "default ANSI code plane", passing some characters,
converting others to '?' and still others to 'A'. Either way, nothing
resembling UTF8 comes out of this, so pick a better name.

These now immediately look suspicious, so a follow-up CL will see
which ones should really be WideString::UTF8Encode() instead.

Making this a normal method on a widestring rather than a static
method on a bytestring feels more natural; this is parallel to
the UTF8Encode and UTF16LE_Encode functions.

Add a test that shows these conversions.

Change-Id: Ia7551b47199eba61b5c328a97bfe9176ac8e583c
Reviewed-on: https://pdfium-review.googlesource.com/39690
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 7641dbd..2a43262 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -814,7 +814,7 @@
                                          bool bTranslateName) {
   LOGFONTA lfa;
   memcpy(&lfa, pLogFont, (char*)lfa.lfFaceName - (char*)&lfa);
-  ByteString face = ByteString::FromUnicode(pLogFont->lfFaceName);
+  ByteString face = WideString(pLogFont->lfFaceName).ToDefANSI();
   if (face.GetLength() >= LF_FACESIZE)
     return nullptr;
 
diff --git a/core/fpdfdoc/cpdf_filespec.cpp b/core/fpdfdoc/cpdf_filespec.cpp
index 2cc51e3..aa93b91 100644
--- a/core/fpdfdoc/cpdf_filespec.cpp
+++ b/core/fpdfdoc/cpdf_filespec.cpp
@@ -203,10 +203,9 @@
 
   WideString wsStr = EncodeFileName(wsFileName);
   if (m_pObj->IsString()) {
-    m_pWritableObj->SetString(ByteString::FromUnicode(wsStr));
+    m_pWritableObj->SetString(wsStr.ToDefANSI());
   } else if (CPDF_Dictionary* pDict = m_pWritableObj->AsDictionary()) {
-    pDict->SetNewFor<CPDF_String>(pdfium::stream::kF,
-                                  ByteString::FromUnicode(wsStr), false);
+    pDict->SetNewFor<CPDF_String>(pdfium::stream::kF, wsStr.ToDefANSI(), false);
     pDict->SetNewFor<CPDF_String>("UF", PDF_EncodeText(wsStr), false);
   }
 }
diff --git a/core/fpdfdoc/cpdf_interform.cpp b/core/fpdfdoc/cpdf_interform.cpp
index d7cee35..26dfbc1 100644
--- a/core/fpdfdoc/cpdf_interform.cpp
+++ b/core/fpdfdoc/cpdf_interform.cpp
@@ -1030,8 +1030,8 @@
   if (!pdf_path.IsEmpty()) {
     if (bSimpleFileSpec) {
       WideString wsFilePath = CPDF_FileSpec::EncodeFileName(pdf_path);
-      pMainDict->SetNewFor<CPDF_String>(
-          pdfium::stream::kF, ByteString::FromUnicode(wsFilePath), false);
+      pMainDict->SetNewFor<CPDF_String>(pdfium::stream::kF,
+                                        wsFilePath.ToDefANSI(), false);
       pMainDict->SetNewFor<CPDF_String>("UF", PDF_EncodeText(wsFilePath),
                                         false);
     } else {
diff --git a/core/fxcrt/bytestring.cpp b/core/fxcrt/bytestring.cpp
index 4d55c98..b6c1ce7 100644
--- a/core/fxcrt/bytestring.cpp
+++ b/core/fxcrt/bytestring.cpp
@@ -676,26 +676,6 @@
   return WideString(decoder.GetResult());
 }
 
-// static
-ByteString ByteString::FromUnicode(const WideString& wstr) {
-  int src_len = wstr.GetLength();
-  int dest_len =
-      FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len,
-                                nullptr, 0, nullptr, nullptr);
-  if (!dest_len)
-    return ByteString();
-
-  ByteString bstr;
-  {
-    // Span's lifetime must end before ReleaseBuffer() below.
-    pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
-    FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, wstr.c_str(), src_len,
-                              dest_buf.data(), dest_len, nullptr, nullptr);
-  }
-  bstr.ReleaseBuffer(dest_len);
-  return bstr;
-}
-
 int ByteString::Compare(const ByteStringView& str) const {
   if (!m_pData)
     return str.IsEmpty() ? 0 : -1;
diff --git a/core/fxcrt/bytestring.h b/core/fxcrt/bytestring.h
index 5722c49..5975acb 100644
--- a/core/fxcrt/bytestring.h
+++ b/core/fxcrt/bytestring.h
@@ -66,8 +66,6 @@
 
   void clear() { m_pData.Reset(); }
 
-  static ByteString FromUnicode(const WideString& str) WARN_UNUSED_RESULT;
-
   // Explicit conversion to C-style string.
   // Note: Any subsequent modification of |this| will invalidate the result.
   const char* c_str() const { return m_pData ? m_pData->m_String : ""; }
diff --git a/core/fxcrt/widestring.cpp b/core/fxcrt/widestring.cpp
index cde1973..7dd1c30 100644
--- a/core/fxcrt/widestring.cpp
+++ b/core/fxcrt/widestring.cpp
@@ -673,6 +673,25 @@
   return m_pData ? m_pData->m_nRefs : 0;
 }
 
+// static
+ByteString WideString::ToDefANSI() const {
+  int src_len = GetLength();
+  int dest_len = FXSYS_WideCharToMultiByte(
+      FX_CODEPAGE_DefANSI, 0, c_str(), src_len, nullptr, 0, nullptr, nullptr);
+  if (!dest_len)
+    return ByteString();
+
+  ByteString bstr;
+  {
+    // Span's lifetime must end before ReleaseBuffer() below.
+    pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
+    FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, c_str(), src_len,
+                              dest_buf.data(), dest_len, nullptr, nullptr);
+  }
+  bstr.ReleaseBuffer(dest_len);
+  return bstr;
+}
+
 ByteString WideString::UTF8Encode() const {
   return FX_UTF8Encode(AsStringView());
 }
diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h
index b531292..dc5dd23 100644
--- a/core/fxcrt/widestring.h
+++ b/core/fxcrt/widestring.h
@@ -195,6 +195,7 @@
   size_t Replace(const WideStringView& pOld, const WideStringView& pNew);
   size_t Remove(wchar_t ch);
 
+  ByteString ToDefANSI() const;
   ByteString UTF8Encode() const;
 
   // This method will add \0\0 to the end of the string to represent the
diff --git a/core/fxcrt/widestring_unittest.cpp b/core/fxcrt/widestring_unittest.cpp
index 9017fe0..9d38aa4 100644
--- a/core/fxcrt/widestring_unittest.cpp
+++ b/core/fxcrt/widestring_unittest.cpp
@@ -999,6 +999,32 @@
   }
 }
 
+TEST(WideString, ToDefANSI) {
+  EXPECT_EQ("", WideString().ToDefANSI());
+#if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+  const char* kResult =
+      "x"
+      "?"
+      "\xff"
+      "A"
+      "?"
+      "y";
+#else
+  const char* kResult =
+      "x"
+      "\x80"
+      "\xff"
+      "y";
+#endif
+  EXPECT_EQ(kResult, WideString(L"x"
+                                L"\u0080"
+                                L"\u00ff"
+                                L"\u0100"
+                                L"\u208c"
+                                L"y")
+                         .ToDefANSI());
+}
+
 TEST(WideStringView, FromVector) {
   std::vector<WideStringView::UnsignedType> null_vec;
   WideStringView null_string(null_vec);
diff --git a/core/fxge/win32/fx_win32_print.cpp b/core/fxge/win32/fx_win32_print.cpp
index f36fa843..1e2b2d4 100644
--- a/core/fxge/win32/fx_win32_print.cpp
+++ b/core/fxge/win32/fx_win32_print.cpp
@@ -650,7 +650,7 @@
     wsText += charpos.m_Unicode;
   }
   size_t len = totalLength;
-  ByteString text = ByteString::FromUnicode(wsText);
+  ByteString text = wsText.ToDefANSI();
   while (len > 0) {
     char buffer[1026];
     size_t send_len = std::min(len, static_cast<size_t>(1024));
diff --git a/fpdfsdk/cpdfsdk_interform.cpp b/fpdfsdk/cpdfsdk_interform.cpp
index 3cfc84d..6a4b591 100644
--- a/fpdfsdk/cpdfsdk_interform.cpp
+++ b/fpdfsdk/cpdfsdk_interform.cpp
@@ -506,10 +506,10 @@
       continue;
     WideString name;
     name = pField->GetUnicodeTextFor("T");
-    ByteString name_b = ByteString::FromUnicode(name);
+    ByteString name_b = name.ToDefANSI();
     ByteString csBValue = pField->GetStringFor("V");
     WideString csWValue = PDF_DecodeText(csBValue);
-    ByteString csValue_b = ByteString::FromUnicode(csWValue);
+    ByteString csValue_b = csWValue.ToDefANSI();
     fdfEncodedData << name_b << "=" << csValue_b;
     if (i != pFields->GetCount() - 1)
       fdfEncodedData << "&";
diff --git a/fxbarcode/BC_UtilCodingConvert.cpp b/fxbarcode/BC_UtilCodingConvert.cpp
index 77c52f2..96aeadd 100644
--- a/fxbarcode/BC_UtilCodingConvert.cpp
+++ b/fxbarcode/BC_UtilCodingConvert.cpp
@@ -12,7 +12,7 @@
 
 void CBC_UtilCodingConvert::UnicodeToLocale(const WideString& src,
                                             ByteString& dst) {
-  dst = ByteString::FromUnicode(src);
+  dst = src.ToDefANSI();
 }
 
 void CBC_UtilCodingConvert::LocaleToUtf8(const ByteString& src,
@@ -34,15 +34,13 @@
   for (uint8_t value : src)
     utf8 += value;
 
-  WideString unicode = WideString::FromUTF8(utf8.AsStringView());
-  dst = ByteString::FromUnicode(unicode);
+  dst = WideString::FromUTF8(utf8.AsStringView()).ToDefANSI();
 }
 
 void CBC_UtilCodingConvert::Utf8ToLocale(const uint8_t* src,
                                          int32_t count,
                                          ByteString& dst) {
-  WideString unicode = WideString::FromUTF8(ByteStringView(src, count));
-  dst = ByteString::FromUnicode(unicode);
+  dst = WideString::FromUTF8(ByteStringView(src, count)).ToDefANSI();
 }
 
 void CBC_UtilCodingConvert::UnicodeToUTF8(const WideString& src,
diff --git a/fxjs/cfxjse_class.cpp b/fxjs/cfxjse_class.cpp
index e7e54c8..9207fd5 100644
--- a/fxjs/cfxjse_class.cpp
+++ b/fxjs/cfxjse_class.cpp
@@ -112,8 +112,7 @@
     WideString err = JSFormatErrorString(pClassDescriptor->name, *szPropName,
                                          result.Error());
     v8::MaybeLocal<v8::String> str = v8::String::NewFromUtf8(
-        info.GetIsolate(), ByteString::FromUnicode(err).c_str(),
-        v8::NewStringType::kNormal);
+        info.GetIsolate(), err.ToDefANSI().c_str(), v8::NewStringType::kNormal);
     info.GetIsolate()->ThrowException(str.ToLocalChecked());
     return;
   }
diff --git a/fxjs/cjs_field.cpp b/fxjs/cjs_field.cpp
index 19e6712..cd97372 100644
--- a/fxjs/cjs_field.cpp
+++ b/fxjs/cjs_field.cpp
@@ -731,7 +731,7 @@
   if (!m_bCanSet)
     return CJS_Return(JSMessage::kReadOnlyError);
 
-  ByteString byte_str = ByteString::FromUnicode(pRuntime->ToWideString(vp));
+  ByteString byte_str = pRuntime->ToWideString(vp).ToDefANSI();
   if (m_bDelay) {
     AddDelay_String(FP_BORDERSTYLE, byte_str);
   } else {
@@ -2002,7 +2002,7 @@
 
   if (!m_bCanSet)
     return CJS_Return(JSMessage::kReadOnlyError);
-  if (ByteString::FromUnicode(pRuntime->ToWideString(vp)).IsEmpty())
+  if (pRuntime->ToWideString(vp).ToDefANSI().IsEmpty())
     return CJS_Return(JSMessage::kValueError);
   return CJS_Return();
 }
diff --git a/fxjs/cjs_global.cpp b/fxjs/cjs_global.cpp
index 567d548..1865f57 100644
--- a/fxjs/cjs_global.cpp
+++ b/fxjs/cjs_global.cpp
@@ -232,7 +232,7 @@
 
 CJS_Return CJS_Global::DelProperty(CJS_Runtime* pRuntime,
                                    const wchar_t* propname) {
-  auto it = m_MapGlobal.find(ByteString::FromUnicode(propname));
+  auto it = m_MapGlobal.find(WideString(propname).ToDefANSI());
   if (it == m_MapGlobal.end())
     return CJS_Return(JSMessage::kUnknownProperty);
 
@@ -242,7 +242,7 @@
 
 CJS_Return CJS_Global::GetProperty(CJS_Runtime* pRuntime,
                                    const wchar_t* propname) {
-  auto it = m_MapGlobal.find(ByteString::FromUnicode(propname));
+  auto it = m_MapGlobal.find(WideString(propname).ToDefANSI());
   if (it == m_MapGlobal.end())
     return CJS_Return();
 
@@ -272,7 +272,7 @@
 CJS_Return CJS_Global::SetProperty(CJS_Runtime* pRuntime,
                                    const wchar_t* propname,
                                    v8::Local<v8::Value> vp) {
-  ByteString sPropName = ByteString::FromUnicode(propname);
+  ByteString sPropName = WideString(propname).ToDefANSI();
   if (vp->IsNumber()) {
     return SetGlobalVariables(sPropName, JS_GlobalDataType::NUMBER,
                               pRuntime->ToDouble(vp), false, "",
@@ -284,10 +284,9 @@
                               v8::Local<v8::Object>(), false);
   }
   if (vp->IsString()) {
-    return SetGlobalVariables(
-        sPropName, JS_GlobalDataType::STRING, 0, false,
-        ByteString::FromUnicode(pRuntime->ToWideString(vp)),
-        v8::Local<v8::Object>(), false);
+    return SetGlobalVariables(sPropName, JS_GlobalDataType::STRING, 0, false,
+                              pRuntime->ToWideString(vp).ToDefANSI(),
+                              v8::Local<v8::Object>(), false);
   }
   if (vp->IsObject()) {
     return SetGlobalVariables(sPropName, JS_GlobalDataType::OBJECT, 0, false,
@@ -310,8 +309,7 @@
   if (params.size() != 2)
     return CJS_Return(JSMessage::kParamError);
 
-  auto it = m_MapGlobal.find(
-      ByteString::FromUnicode(pRuntime->ToWideString(params[0])));
+  auto it = m_MapGlobal.find(pRuntime->ToWideString(params[0]).ToDefANSI());
   if (it == m_MapGlobal.end() || it->second->bDeleted)
     return CJS_Return(JSMessage::kGlobalNotFoundError);
 
@@ -432,7 +430,7 @@
       continue;
     }
     if (v->IsString()) {
-      ByteString sValue = ByteString::FromUnicode(pRuntime->ToWideString(v));
+      ByteString sValue = pRuntime->ToWideString(v).ToDefANSI();
       CJS_KeyValue* pObjElement = new CJS_KeyValue;
       pObjElement->nType = JS_GlobalDataType::STRING;
       pObjElement->sKey = sKey;
diff --git a/fxjs/cjs_publicmethods.cpp b/fxjs/cjs_publicmethods.cpp
index a86ece3..f2cd3ff 100644
--- a/fxjs/cjs_publicmethods.cpp
+++ b/fxjs/cjs_publicmethods.cpp
@@ -327,7 +327,7 @@
     return pRuntime->ToArray(val);
 
   WideString wsStr = pRuntime->ToWideString(val);
-  ByteString t = ByteString::FromUnicode(wsStr);
+  ByteString t = wsStr.ToDefANSI();
   const char* p = t.c_str();
 
   int nIndex = 0;
@@ -882,7 +882,7 @@
     return CJS_Return(L"No event handler");
 
   WideString& Value = pEvent->Value();
-  ByteString strValue = StrTrim(ByteString::FromUnicode(Value));
+  ByteString strValue = StrTrim(Value.ToDefANSI());
   if (strValue.IsEmpty())
     return CJS_Return();
 
@@ -1078,7 +1078,7 @@
     return CJS_Return(JSMessage::kBadObjectError);
 
   WideString& Value = pEvent->Value();
-  ByteString strValue = StrTrim(ByteString::FromUnicode(Value));
+  ByteString strValue = StrTrim(Value.ToDefANSI());
   if (strValue.IsEmpty())
     return CJS_Return();
 
@@ -1683,7 +1683,7 @@
   if (pEvent->Value().IsEmpty())
     return CJS_Return();
 
-  double dEentValue = atof(ByteString::FromUnicode(pEvent->Value()).c_str());
+  double dEentValue = atof(pEvent->Value().ToDefANSI().c_str());
   bool bGreaterThan = pRuntime->ToBoolean(params[0]);
   double dGreaterThan = pRuntime->ToDouble(params[1]);
   bool bLessThan = pRuntime->ToBoolean(params[2]);
diff --git a/fxjs/cjs_runtime.cpp b/fxjs/cjs_runtime.cpp
index 9329a48..6042538 100644
--- a/fxjs/cjs_runtime.cpp
+++ b/fxjs/cjs_runtime.cpp
@@ -234,7 +234,7 @@
     v8::Local<v8::Value> value) {
   bool bAllowNaN = false;
   if (value->IsString()) {
-    ByteString bstr = ByteString::FromUnicode(ToWideString(value));
+    ByteString bstr = ToWideString(value).ToDefANSI();
     if (bstr.GetLength() == 0)
       return value;
     if (bstr == "NaN")
diff --git a/xfa/fgas/font/cfgas_fontmgr.cpp b/xfa/fgas/font/cfgas_fontmgr.cpp
index 5ab91b3..f173bf2 100644
--- a/xfa/fgas/font/cfgas_fontmgr.cpp
+++ b/xfa/fgas/font/cfgas_fontmgr.cpp
@@ -416,8 +416,7 @@
   }
   ByteString bsName;
   bool bFolder;
-  ByteString bsFolderSeparator =
-      ByteString::FromUnicode(WideString(kFolderSeparator));
+  ByteString bsFolderSeparator = WideString(kFolderSeparator).ToDefANSI();
   while (true) {
     if (!FX_GetNextFile(pCurHandle, &bsName, &bFolder)) {
       FX_CloseFolder(pCurHandle);
diff --git a/xfa/fgas/font/cfgas_gefont.cpp b/xfa/fgas/font/cfgas_gefont.cpp
index 740504f..b5cb796 100644
--- a/xfa/fgas/font/cfgas_gefont.cpp
+++ b/xfa/fgas/font/cfgas_gefont.cpp
@@ -76,7 +76,7 @@
     return false;
   ByteString csFontFamily;
   if (pszFontFamily)
-    csFontFamily = ByteString::FromUnicode(pszFontFamily);
+    csFontFamily = WideString(pszFontFamily).ToDefANSI();
 
   int32_t iWeight =
       FontStyleIsBold(dwFontStyles) ? FXFONT_FW_BOLD : FXFONT_FW_NORMAL;
diff --git a/xfa/fgas/font/cfgas_pdffontmgr.cpp b/xfa/fgas/font/cfgas_pdffontmgr.cpp
index 80d6196..4220d4d 100644
--- a/xfa/fgas/font/cfgas_pdffontmgr.cpp
+++ b/xfa/fgas/font/cfgas_pdffontmgr.cpp
@@ -80,7 +80,7 @@
   if (it != m_FontMap.end())
     return it->second;
 
-  ByteString bsPsName = ByteString::FromUnicode(WideString(wsFontFamily));
+  ByteString bsPsName = WideString(wsFontFamily).ToDefANSI();
   bool bBold = FontStyleIsBold(dwFontStyles);
   bool bItalic = FontStyleIsItalic(dwFontStyles);
   ByteString strFontName = PsNameToFontName(bsPsName, bBold, bItalic);
diff --git a/xfa/fxfa/parser/cxfa_node.cpp b/xfa/fxfa/parser/cxfa_node.cpp
index 7ff205c..8babe45 100644
--- a/xfa/fxfa/parser/cxfa_node.cpp
+++ b/xfa/fxfa/parser/cxfa_node.cpp
@@ -214,7 +214,7 @@
             pdfium::MakeRetain<CFX_MemoryStream>(pImageBuffer, iRead, false);
       }
     } else {
-      bsContent = ByteString::FromUnicode(wsImage);
+      bsContent = wsImage.ToDefANSI();
       pImageFileRead = pdfium::MakeRetain<CFX_MemoryStream>(
           const_cast<uint8_t*>(bsContent.raw_str()), bsContent.GetLength(),
           false);