Make an XML-tree be the input to CFXA_DocumentParser().

Rather than strings or streams. Down the road, we'll want the XML
tree to live longer than the CFXA_Document which current owns it.

Change-Id: Ica076d8da87ed0061809b1d3fb5203b334f48733
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/71876
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/fxjs/xfa/cjx_node.cpp b/fxjs/xfa/cjx_node.cpp
index 1c2b772..5521d0f 100644
--- a/fxjs/xfa/cjx_node.cpp
+++ b/fxjs/xfa/cjx_node.cpp
@@ -11,9 +11,11 @@
 #include <vector>
 
 #include "core/fxcrt/cfx_memorystream.h"
+#include "core/fxcrt/cfx_readonlymemorystream.h"
 #include "core/fxcrt/fx_codepage.h"
 #include "core/fxcrt/xml/cfx_xmldocument.h"
 #include "core/fxcrt/xml/cfx_xmlelement.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
 #include "fxjs/js_resources.h"
 #include "fxjs/xfa/cfxjse_engine.h"
 #include "fxjs/xfa/cfxjse_value.h"
@@ -264,8 +266,11 @@
   if (params.size() >= 3)
     bOverwrite = runtime->ToBoolean(params[2]);
 
+  CFX_XMLParser xml_parser(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(expression.raw_span()));
+
   auto pParser = std::make_unique<CXFA_DocumentParser>(GetDocument());
-  CFX_XMLNode* pXMLNode = pParser->ParseXMLData(expression);
+  CFX_XMLNode* pXMLNode = pParser->ParseData(xml_parser.Parse());
   if (!pXMLNode)
     return CJS_Result::Success();
 
diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp
index f45e09e..7f2a92f 100644
--- a/xfa/fxfa/cxfa_ffdoc.cpp
+++ b/xfa/fxfa/cxfa_ffdoc.cpp
@@ -19,6 +19,7 @@
 #include "core/fxcrt/xml/cfx_xmldocument.h"
 #include "core/fxcrt/xml/cfx_xmlelement.h"
 #include "core/fxcrt/xml/cfx_xmlnode.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
 #include "core/fxge/dib/cfx_dibitmap.h"
 #include "fxjs/xfa/cjx_object.h"
 #include "third_party/base/ptr_util.h"
@@ -100,18 +101,23 @@
 }
 
 bool CXFA_FFDoc::ParseDoc(const RetainPtr<IFX_SeekableStream>& stream) {
-  CXFA_DocumentParser parser(m_pDocument.get());
-  bool parsed = parser.Parse(stream, XFA_PacketType::Xdp);
+  CFX_XMLParser xml_parser(stream);
+  std::unique_ptr<CFX_XMLDocument> xml_doc = xml_parser.Parse();
+  if (!xml_doc)
+    return false;
+
+  CXFA_DocumentParser doc_parser(m_pDocument.get());
+  bool parsed = doc_parser.Parse(std::move(xml_doc), XFA_PacketType::Xdp);
 
   // We have to set the XML document before we return so that we can clean
   // up in the OpenDoc method. If we don't, the XMLDocument will get free'd
   // when this method returns and UnownedPtrs get unhappy.
-  m_pXMLDoc = parser.GetXMLDoc();
+  m_pXMLDoc = doc_parser.GetXMLDoc();
 
   if (!parsed)
     return false;
 
-  m_pDocument->SetRoot(parser.GetRootNode());
+  m_pDocument->SetRoot(doc_parser.GetRootNode());
   return true;
 }
 
diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp
index e210b94..de23fe5 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser.cpp
@@ -19,7 +19,6 @@
 #include "core/fxcrt/xml/cfx_xmlelement.h"
 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
 #include "core/fxcrt/xml/cfx_xmlnode.h"
-#include "core/fxcrt/xml/cfx_xmlparser.h"
 #include "core/fxcrt/xml/cfx_xmltext.h"
 #include "fxjs/xfa/cjx_object.h"
 #include "third_party/base/logging.h"
@@ -230,14 +229,9 @@
 
 CXFA_DocumentParser::~CXFA_DocumentParser() = default;
 
-bool CXFA_DocumentParser::Parse(
-    const RetainPtr<IFX_SeekableReadStream>& pStream,
-    XFA_PacketType ePacketID) {
-  xml_doc_ = LoadXML(pStream);
-  if (!xml_doc_)
-    return false;
-
-  CFX_XMLNode* root = GetDocumentNode(xml_doc_->GetRoot());
+bool CXFA_DocumentParser::Parse(std::unique_ptr<CFX_XMLDocument> pXML,
+                                XFA_PacketType ePacketID) {
+  CFX_XMLNode* root = ParseData(std::move(pXML));
   if (!root)
     return false;
 
@@ -245,27 +239,18 @@
   return !!m_pRootNode;
 }
 
-CFX_XMLNode* CXFA_DocumentParser::ParseXMLData(const ByteString& wsXML) {
-  auto pStream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(wsXML.raw_span());
-  xml_doc_ = LoadXML(pStream);
-  if (!xml_doc_)
+CFX_XMLNode* CXFA_DocumentParser::ParseData(
+    std::unique_ptr<CFX_XMLDocument> pXML) {
+  if (!pXML)
     return nullptr;
+
+  xml_doc_ = std::move(pXML);
+  xml_doc_->GetRoot()->InsertChildNode(
+      xml_doc_->CreateNode<CFX_XMLInstruction>(L"xml"), 0);
+
   return GetDocumentNode(xml_doc_->GetRoot());
 }
 
-std::unique_ptr<CFX_XMLDocument> CXFA_DocumentParser::LoadXML(
-    const RetainPtr<IFX_SeekableReadStream>& pStream) {
-  ASSERT(pStream);
-
-  CFX_XMLParser parser(pStream);
-  std::unique_ptr<CFX_XMLDocument> doc = parser.Parse();
-  if (doc) {
-    doc->GetRoot()->InsertChildNode(doc->CreateNode<CFX_XMLInstruction>(L"xml"),
-                                    0);
-  }
-  return doc;
-}
-
 void CXFA_DocumentParser::ConstructXFANode(CXFA_Node* pXFANode,
                                            CFX_XMLNode* pXMLNode) {
   XFA_PacketType ePacketID = pXFANode->GetPacketType();
diff --git a/xfa/fxfa/parser/cxfa_document_parser.h b/xfa/fxfa/parser/cxfa_document_parser.h
index a494b4f..bef976e 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.h
+++ b/xfa/fxfa/parser/cxfa_document_parser.h
@@ -19,26 +19,20 @@
 class CXFA_Document;
 class CXFA_Node;
 class CFX_XMLInstruction;
-class IFX_SeekableReadStream;
 
 class CXFA_DocumentParser {
  public:
   explicit CXFA_DocumentParser(CXFA_Document* pFactory);
   ~CXFA_DocumentParser();
 
-  bool Parse(const RetainPtr<IFX_SeekableReadStream>& pStream,
-             XFA_PacketType ePacketID);
-
-  CFX_XMLNode* ParseXMLData(const ByteString& wsXML);
-  void ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode);
-
+  bool Parse(std::unique_ptr<CFX_XMLDocument> pXML, XFA_PacketType ePacketID);
+  CFX_XMLNode* ParseData(std::unique_ptr<CFX_XMLDocument> pXML);
   std::unique_ptr<CFX_XMLDocument> GetXMLDoc() { return std::move(xml_doc_); }
+
+  void ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode);
   CXFA_Node* GetRootNode() const;
 
  private:
-  std::unique_ptr<CFX_XMLDocument> LoadXML(
-      const RetainPtr<IFX_SeekableReadStream>& pStream);
-
   CXFA_Node* ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
                               XFA_PacketType ePacketID);
   CXFA_Node* ParseAsXDPPacket_XDP(CFX_XMLNode* pXMLDocumentNode);
diff --git a/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
index bd7e1ee..bb98de1 100644
--- a/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
@@ -6,6 +6,7 @@
 
 #include "core/fxcrt/cfx_readonlymemorystream.h"
 #include "core/fxcrt/xml/cfx_xmldocument.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "xfa/fxfa/parser/cxfa_document.h"
 
@@ -31,6 +32,24 @@
   std::unique_ptr<CXFA_DocumentParser> parser_;
 };
 
+TEST_F(CXFA_DocumentParserTest, EmptyInput) {
+  static const char kInput[] = "";
+  auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
+      pdfium::as_bytes(pdfium::make_span(kInput)));
+  CFX_XMLParser xml_parser(stream);
+  EXPECT_FALSE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
+  EXPECT_FALSE(GetParser()->GetRootNode());
+}
+
+TEST_F(CXFA_DocumentParserTest, BadInput) {
+  static const char kInput[] = "<<<>bar?>>>>>>>";
+  auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
+      pdfium::as_bytes(pdfium::make_span(kInput)));
+  CFX_XMLParser xml_parser(stream);
+  EXPECT_FALSE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
+  EXPECT_FALSE(GetParser()->GetRootNode());
+}
+
 TEST_F(CXFA_DocumentParserTest, XMLInstructionsScriptOff) {
   static const char kInput[] =
       "<config>\n"
@@ -41,7 +60,8 @@
 
   auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
       pdfium::as_bytes(pdfium::make_span(kInput)));
-  ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+  CFX_XMLParser xml_parser(stream);
+  ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
 
   CXFA_Node* root = GetParser()->GetRootNode();
   ASSERT_TRUE(root);
@@ -59,7 +79,8 @@
 
   auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
       pdfium::as_bytes(pdfium::make_span(kInput)));
-  ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+  CFX_XMLParser xml_parser(stream);
+  ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
 
   CXFA_Node* root = GetParser()->GetRootNode();
   ASSERT_TRUE(root);
@@ -76,7 +97,8 @@
 
   auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
       pdfium::as_bytes(pdfium::make_span(kInput)));
-  ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+  CFX_XMLParser xml_parser(stream);
+  ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
 
   CXFA_Node* root = GetParser()->GetRootNode();
   ASSERT_TRUE(root);
@@ -93,7 +115,8 @@
 
   auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
       pdfium::as_bytes(pdfium::make_span(kInput)));
-  ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+  CFX_XMLParser xml_parser(stream);
+  ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
 
   CXFA_Node* root = GetParser()->GetRootNode();
   ASSERT_TRUE(root);
@@ -113,7 +136,8 @@
 
   auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
       pdfium::as_bytes(pdfium::make_span(kInput)));
-  ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+  CFX_XMLParser xml_parser(stream);
+  ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
 
   CXFA_Node* root = GetParser()->GetRootNode();
   ASSERT_TRUE(root);