Use CFX_WideString in CPDF_NameTree functions to strip BOM

PDFium doesn't strip BOMs during parsing, but we should strip BOMs when
retrieving parsed strings in CPDF_NameTree to ensure consistency and
appropriate function behavior. See the bug for more info.

As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText()
instead of GetString(). I added a GetUnicodeTextAt() function in
CPDF_Array, which is symmetrical to GetUnicodeTextFor() in
CPDF_Dictionary.

I then changed the input variable types to CPDF_NameTree functions to
be CFX_WideString instead of CFX_ByteString, and modified all the
calls to them.

I also added a unit test for nametree, which would fail prior to this
change. Nametrees with non-unicode names are already tested by embedder
tests.

Bug=pdfium:820

Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f
Reviewed-on: https://pdfium-review.googlesource.com/8091
Reviewed-by: dsinclair <dsinclair@chromium.org>
Commit-Queue: Jane Liu <janeliulwq@google.com>
diff --git a/BUILD.gn b/BUILD.gn
index e028820..6bf3947 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1917,6 +1917,7 @@
     "core/fpdfdoc/cpdf_dest_unittest.cpp",
     "core/fpdfdoc/cpdf_filespec_unittest.cpp",
     "core/fpdfdoc/cpdf_formfield_unittest.cpp",
+    "core/fpdfdoc/cpdf_nametree_unittest.cpp",
     "core/fpdftext/cpdf_linkextract_unittest.cpp",
     "core/fxcodec/codec/fx_codec_a85_unittest.cpp",
     "core/fxcodec/codec/fx_codec_jpx_unittest.cpp",
diff --git a/core/fpdfapi/parser/cpdf_array.cpp b/core/fpdfapi/parser/cpdf_array.cpp
index ea4ca7e..1f2740e 100644
--- a/core/fpdfapi/parser/cpdf_array.cpp
+++ b/core/fpdfapi/parser/cpdf_array.cpp
@@ -105,6 +105,12 @@
   return m_Objects[i]->GetString();
 }
 
+CFX_WideString CPDF_Array::GetUnicodeTextAt(size_t i) const {
+  if (i >= m_Objects.size())
+    return CFX_WideString();
+  return m_Objects[i]->GetUnicodeText();
+}
+
 int CPDF_Array::GetIntegerAt(size_t i) const {
   if (i >= m_Objects.size())
     return 0;
diff --git a/core/fpdfapi/parser/cpdf_array.h b/core/fpdfapi/parser/cpdf_array.h
index bb17c0a..2590971 100644
--- a/core/fpdfapi/parser/cpdf_array.h
+++ b/core/fpdfapi/parser/cpdf_array.h
@@ -41,6 +41,7 @@
   CPDF_Object* GetObjectAt(size_t index) const;
   CPDF_Object* GetDirectObjectAt(size_t index) const;
   CFX_ByteString GetStringAt(size_t index) const;
+  CFX_WideString GetUnicodeTextAt(size_t index) const;
   int GetIntegerAt(size_t index) const;
   float GetNumberAt(size_t index) const;
   CPDF_Dictionary* GetDictAt(size_t index) const;
diff --git a/core/fpdfdoc/cpdf_action.cpp b/core/fpdfdoc/cpdf_action.cpp
index 88d0781..2357580 100644
--- a/core/fpdfdoc/cpdf_action.cpp
+++ b/core/fpdfdoc/cpdf_action.cpp
@@ -42,7 +42,7 @@
     return CPDF_Dest();
   if (pDest->IsString() || pDest->IsName()) {
     CPDF_NameTree name_tree(pDoc, "Dests");
-    return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString()));
+    return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText()));
   }
   if (CPDF_Array* pArray = pDest->AsArray())
     return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_bookmark.cpp b/core/fpdfdoc/cpdf_bookmark.cpp
index 29303f1..e84001f 100644
--- a/core/fpdfdoc/cpdf_bookmark.cpp
+++ b/core/fpdfdoc/cpdf_bookmark.cpp
@@ -70,7 +70,8 @@
     return CPDF_Dest();
   if (pDest->IsString() || pDest->IsName()) {
     CPDF_NameTree name_tree(pDocument, "Dests");
-    return CPDF_Dest(name_tree.LookupNamedDest(pDocument, pDest->GetString()));
+    return CPDF_Dest(
+        name_tree.LookupNamedDest(pDocument, pDest->GetUnicodeText()));
   }
   if (CPDF_Array* pArray = pDest->AsArray())
     return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_docjsactions.cpp b/core/fpdfdoc/cpdf_docjsactions.cpp
index 59dbccc..669ed70 100644
--- a/core/fpdfdoc/cpdf_docjsactions.cpp
+++ b/core/fpdfdoc/cpdf_docjsactions.cpp
@@ -20,7 +20,7 @@
 
 CPDF_Action CPDF_DocJSActions::GetJSActionAndName(
     int index,
-    CFX_ByteString* csName) const {
+    CFX_WideString* csName) const {
   ASSERT(m_pDocument);
   CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
   CPDF_Object* pAction = name_tree.LookupValueAndName(index, csName);
@@ -28,7 +28,7 @@
                                : CPDF_Action();
 }
 
-CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const {
+CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_WideString& csName) const {
   ASSERT(m_pDocument);
   CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
   CPDF_Object* pAction = name_tree.LookupValue(csName);
@@ -36,7 +36,7 @@
                                : CPDF_Action();
 }
 
-int CPDF_DocJSActions::FindJSAction(const CFX_ByteString& csName) const {
+int CPDF_DocJSActions::FindJSAction(const CFX_WideString& csName) const {
   ASSERT(m_pDocument);
   CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
   return name_tree.GetIndex(csName);
diff --git a/core/fpdfdoc/cpdf_docjsactions.h b/core/fpdfdoc/cpdf_docjsactions.h
index 328b886..73c0a1e 100644
--- a/core/fpdfdoc/cpdf_docjsactions.h
+++ b/core/fpdfdoc/cpdf_docjsactions.h
@@ -19,9 +19,9 @@
   ~CPDF_DocJSActions();
 
   int CountJSActions() const;
-  CPDF_Action GetJSActionAndName(int index, CFX_ByteString* csName) const;
-  CPDF_Action GetJSAction(const CFX_ByteString& csName) const;
-  int FindJSAction(const CFX_ByteString& csName) const;
+  CPDF_Action GetJSActionAndName(int index, CFX_WideString* csName) const;
+  CPDF_Action GetJSAction(const CFX_WideString& csName) const;
+  int FindJSAction(const CFX_WideString& csName) const;
   CPDF_Document* GetDocument() const { return m_pDocument.Get(); }
 
  private:
diff --git a/core/fpdfdoc/cpdf_link.cpp b/core/fpdfdoc/cpdf_link.cpp
index b622094..f7aec40 100644
--- a/core/fpdfdoc/cpdf_link.cpp
+++ b/core/fpdfdoc/cpdf_link.cpp
@@ -28,7 +28,7 @@
 
   if (pDest->IsString() || pDest->IsName()) {
     CPDF_NameTree name_tree(pDoc, "Dests");
-    return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString()));
+    return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText()));
   }
   if (CPDF_Array* pArray = pDest->AsArray())
     return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp
index b3808bd..04cb1b9 100644
--- a/core/fpdfdoc/cpdf_nametree.cpp
+++ b/core/fpdfdoc/cpdf_nametree.cpp
@@ -9,13 +9,14 @@
 #include "core/fpdfapi/parser/cpdf_array.h"
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
 
 namespace {
 
 const int nMaxRecursion = 32;
 
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
-                            const CFX_ByteString& csName,
+                            const CFX_WideString& csName,
                             size_t& nIndex,
                             CPDF_Array** ppFind,
                             int nLevel = 0) {
@@ -24,15 +25,14 @@
 
   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
   if (pLimits) {
-    CFX_ByteString csLeft = pLimits->GetStringAt(0);
-    CFX_ByteString csRight = pLimits->GetStringAt(1);
-    if (csLeft.Compare(csRight.AsStringC()) > 0) {
-      CFX_ByteString csTmp = csRight;
+    CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0);
+    CFX_WideString csRight = pLimits->GetUnicodeTextAt(1);
+    if (csLeft.Compare(csRight) > 0) {
+      CFX_WideString csTmp = csRight;
       csRight = csLeft;
       csLeft = csTmp;
     }
-    if (csName.Compare(csLeft.AsStringC()) < 0 ||
-        csName.Compare(csRight.AsStringC()) > 0) {
+    if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) {
       return nullptr;
     }
   }
@@ -41,8 +41,8 @@
   if (pNames) {
     size_t dwCount = pNames->GetCount() / 2;
     for (size_t i = 0; i < dwCount; i++) {
-      CFX_ByteString csValue = pNames->GetStringAt(i * 2);
-      int32_t iCompare = csValue.Compare(csName.AsStringC());
+      CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2);
+      int32_t iCompare = csValue.Compare(csName);
       if (iCompare <= 0) {
         if (ppFind)
           *ppFind = pNames;
@@ -78,7 +78,7 @@
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
                             size_t nIndex,
                             size_t& nCurIndex,
-                            CFX_ByteString* csName,
+                            CFX_WideString* csName,
                             CPDF_Array** ppFind,
                             int nLevel = 0) {
   if (nLevel > nMaxRecursion)
@@ -93,7 +93,7 @@
     }
     if (ppFind)
       *ppFind = pNames;
-    *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2);
+    *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2);
     return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1);
   }
   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
@@ -158,7 +158,7 @@
   return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0;
 }
 
-int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const {
+int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const {
   if (!m_pRoot)
     return -1;
 
@@ -169,8 +169,8 @@
 }
 
 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
-                                               CFX_ByteString* csName) const {
-  *csName = CFX_ByteString();
+                                               CFX_WideString* csName) const {
+  *csName = CFX_WideString();
   if (!m_pRoot)
     return nullptr;
 
@@ -178,7 +178,7 @@
   return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr);
 }
 
-CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const {
+CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const {
   if (!m_pRoot)
     return nullptr;
 
@@ -187,13 +187,13 @@
 }
 
 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
-                                           const CFX_ByteString& sName) {
+                                           const CFX_WideString& sName) {
   CPDF_Object* pValue = LookupValue(sName);
   if (!pValue) {
     CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
     if (!pDests)
       return nullptr;
-    pValue = pDests->GetDirectObjectFor(sName);
+    pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
   }
   if (!pValue)
     return nullptr;
diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h
index 69000f3..a56f511 100644
--- a/core/fpdfdoc/cpdf_nametree.h
+++ b/core/fpdfdoc/cpdf_nametree.h
@@ -21,11 +21,11 @@
   CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category);
   ~CPDF_NameTree();
 
-  CPDF_Object* LookupValueAndName(int nIndex, CFX_ByteString* csName) const;
-  CPDF_Object* LookupValue(const CFX_ByteString& csName) const;
-  CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_ByteString& sName);
+  CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const;
+  CPDF_Object* LookupValue(const CFX_WideString& csName) const;
+  CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName);
 
-  int GetIndex(const CFX_ByteString& csName) const;
+  int GetIndex(const CFX_WideString& csName) const;
   size_t GetCount() const;
   CPDF_Dictionary* GetRoot() const { return m_pRoot.Get(); }
 
diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp
new file mode 100644
index 0000000..28af9e0
--- /dev/null
+++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp
@@ -0,0 +1,35 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfdoc/cpdf_nametree.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(cpdf_nametree, GetUnicodeNameWithBOM) {
+  // Set up the root dictionary with a Names array.
+  auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>();
+  CPDF_Array* pNames = pRootDict->SetNewFor<CPDF_Array>("Names");
+
+  // Add the key "1" (with BOM) and value 100 into the array.
+  std::ostringstream buf;
+  buf << static_cast<unsigned char>(254) << static_cast<unsigned char>(255)
+      << static_cast<unsigned char>(0) << static_cast<unsigned char>(49);
+  pNames->AddNew<CPDF_String>(CFX_ByteString(buf), true);
+  pNames->AddNew<CPDF_Number>(100);
+
+  // Check that the key is as expected.
+  CPDF_NameTree nameTree(pRootDict.get());
+  CFX_WideString storedName;
+  nameTree.LookupValueAndName(0, &storedName);
+  EXPECT_STREQ(L"1", storedName.c_str());
+
+  // Check that the correct value object can be obtained by looking up "1".
+  CFX_WideString matchName = L"1";
+  CPDF_Object* pObj = nameTree.LookupValue(matchName);
+  ASSERT_TRUE(pObj->IsNumber());
+  EXPECT_EQ(100, pObj->AsNumber()->GetInteger());
+}
diff --git a/fpdfsdk/cpdfsdk_formfillenvironment.cpp b/fpdfsdk/cpdfsdk_formfillenvironment.cpp
index b0ee903..de79475 100644
--- a/fpdfsdk/cpdfsdk_formfillenvironment.cpp
+++ b/fpdfsdk/cpdfsdk_formfillenvironment.cpp
@@ -603,10 +603,9 @@
   CPDF_DocJSActions docJS(pPDFDoc);
   int iCount = docJS.CountJSActions();
   for (int i = 0; i < iCount; i++) {
-    CFX_ByteString csJSName;
+    CFX_WideString csJSName;
     CPDF_Action jsAction = docJS.GetJSActionAndName(i, &csJSName);
-    GetActionHandler()->DoAction_JavaScript(
-        jsAction, CFX_WideString::FromLocal(csJSName.AsStringC()), this);
+    GetActionHandler()->DoAction_JavaScript(jsAction, csJSName, this);
   }
 }
 
diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp
index 337ab35..7d8cce7 100644
--- a/fpdfsdk/fpdfattachment.cpp
+++ b/fpdfsdk/fpdfattachment.cpp
@@ -30,7 +30,7 @@
   if (static_cast<size_t>(index) >= nameTree.GetCount())
     return nullptr;
 
-  CFX_ByteString csName;
+  CFX_WideString csName;
   return nameTree.LookupValueAndName(index, &csName);
 }
 
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 06e72b3..7dbaf7f 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -1322,7 +1322,7 @@
     return nullptr;
 
   CPDF_NameTree name_tree(pDoc, "Dests");
-  return name_tree.LookupNamedDest(pDoc, name);
+  return name_tree.LookupNamedDest(pDoc, PDF_DecodeText(CFX_ByteString(name)));
 }
 
 #ifdef PDF_ENABLE_XFA
@@ -1398,6 +1398,7 @@
 
   CPDF_Object* pDestObj = nullptr;
   CFX_ByteString bsName;
+  CFX_WideString wsName;
   CPDF_NameTree nameTree(pDoc, "Dests");
   int count = nameTree.GetCount();
   if (index >= count) {
@@ -1421,8 +1422,9 @@
         break;
       i++;
     }
+    wsName = PDF_DecodeText(bsName);
   } else {
-    pDestObj = nameTree.LookupValueAndName(index, &bsName);
+    pDestObj = nameTree.LookupValueAndName(index, &wsName);
   }
   if (!pDestObj)
     return nullptr;
@@ -1434,7 +1436,6 @@
   if (!pDestObj->IsArray())
     return nullptr;
 
-  CFX_WideString wsName = PDF_DecodeText(bsName);
   CFX_ByteString utf16Name = wsName.UTF16LE_Encode();
   int len = utf16Name.GetLength();
   if (!buffer) {
diff --git a/fpdfsdk/javascript/Document.cpp b/fpdfsdk/javascript/Document.cpp
index d5b075d..ba4b2ae 100644
--- a/fpdfsdk/javascript/Document.cpp
+++ b/fpdfsdk/javascript/Document.cpp
@@ -1594,13 +1594,12 @@
     return false;
   }
   CFX_WideString wideName = params[0].ToCFXWideString(pRuntime);
-  CFX_ByteString utf8Name = wideName.UTF8Encode();
   CPDF_Document* pDocument = m_pFormFillEnv->GetPDFDocument();
   if (!pDocument)
     return false;
 
   CPDF_NameTree nameTree(pDocument, "Dests");
-  CPDF_Array* destArray = nameTree.LookupNamedDest(pDocument, utf8Name);
+  CPDF_Array* destArray = nameTree.LookupNamedDest(pDocument, wideName);
   if (!destArray)
     return false;
 
diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp
index 01c570a..326fc7f 100644
--- a/xfa/fxfa/cxfa_ffdoc.cpp
+++ b/xfa/fxfa/cxfa_ffdoc.cpp
@@ -365,13 +365,12 @@
     return nullptr;
 
   CPDF_NameTree nametree(pXFAImages);
-  CFX_ByteString bsName = PDF_EncodeText(wsName.c_str(), wsName.GetLength());
-  CPDF_Object* pObject = nametree.LookupValue(bsName);
+  CPDF_Object* pObject = nametree.LookupValue(CFX_WideString(wsName));
   if (!pObject) {
     for (size_t i = 0; i < nametree.GetCount(); i++) {
-      CFX_ByteString bsTemp;
-      CPDF_Object* pTempObject = nametree.LookupValueAndName(i, &bsTemp);
-      if (bsTemp == bsName) {
+      CFX_WideString wsTemp;
+      CPDF_Object* pTempObject = nametree.LookupValueAndName(i, &wsTemp);
+      if (wsTemp == wsName) {
         pObject = pTempObject;
         break;
       }