Make an XML-tree be the input to CFXA_DocumentParser().
Rather than strings or streams. Down the road, we'll want the XML
tree to live longer than the CFXA_Document which current owns it.
Change-Id: Ica076d8da87ed0061809b1d3fb5203b334f48733
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/71876
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/fxjs/xfa/cjx_node.cpp b/fxjs/xfa/cjx_node.cpp
index 1c2b772..5521d0f 100644
--- a/fxjs/xfa/cjx_node.cpp
+++ b/fxjs/xfa/cjx_node.cpp
@@ -11,9 +11,11 @@
#include <vector>
#include "core/fxcrt/cfx_memorystream.h"
+#include "core/fxcrt/cfx_readonlymemorystream.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/xml/cfx_xmldocument.h"
#include "core/fxcrt/xml/cfx_xmlelement.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
#include "fxjs/js_resources.h"
#include "fxjs/xfa/cfxjse_engine.h"
#include "fxjs/xfa/cfxjse_value.h"
@@ -264,8 +266,11 @@
if (params.size() >= 3)
bOverwrite = runtime->ToBoolean(params[2]);
+ CFX_XMLParser xml_parser(
+ pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(expression.raw_span()));
+
auto pParser = std::make_unique<CXFA_DocumentParser>(GetDocument());
- CFX_XMLNode* pXMLNode = pParser->ParseXMLData(expression);
+ CFX_XMLNode* pXMLNode = pParser->ParseData(xml_parser.Parse());
if (!pXMLNode)
return CJS_Result::Success();
diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp
index f45e09e..7f2a92f 100644
--- a/xfa/fxfa/cxfa_ffdoc.cpp
+++ b/xfa/fxfa/cxfa_ffdoc.cpp
@@ -19,6 +19,7 @@
#include "core/fxcrt/xml/cfx_xmldocument.h"
#include "core/fxcrt/xml/cfx_xmlelement.h"
#include "core/fxcrt/xml/cfx_xmlnode.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
#include "core/fxge/dib/cfx_dibitmap.h"
#include "fxjs/xfa/cjx_object.h"
#include "third_party/base/ptr_util.h"
@@ -100,18 +101,23 @@
}
bool CXFA_FFDoc::ParseDoc(const RetainPtr<IFX_SeekableStream>& stream) {
- CXFA_DocumentParser parser(m_pDocument.get());
- bool parsed = parser.Parse(stream, XFA_PacketType::Xdp);
+ CFX_XMLParser xml_parser(stream);
+ std::unique_ptr<CFX_XMLDocument> xml_doc = xml_parser.Parse();
+ if (!xml_doc)
+ return false;
+
+ CXFA_DocumentParser doc_parser(m_pDocument.get());
+ bool parsed = doc_parser.Parse(std::move(xml_doc), XFA_PacketType::Xdp);
// We have to set the XML document before we return so that we can clean
// up in the OpenDoc method. If we don't, the XMLDocument will get free'd
// when this method returns and UnownedPtrs get unhappy.
- m_pXMLDoc = parser.GetXMLDoc();
+ m_pXMLDoc = doc_parser.GetXMLDoc();
if (!parsed)
return false;
- m_pDocument->SetRoot(parser.GetRootNode());
+ m_pDocument->SetRoot(doc_parser.GetRootNode());
return true;
}
diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp
index e210b94..de23fe5 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser.cpp
@@ -19,7 +19,6 @@
#include "core/fxcrt/xml/cfx_xmlelement.h"
#include "core/fxcrt/xml/cfx_xmlinstruction.h"
#include "core/fxcrt/xml/cfx_xmlnode.h"
-#include "core/fxcrt/xml/cfx_xmlparser.h"
#include "core/fxcrt/xml/cfx_xmltext.h"
#include "fxjs/xfa/cjx_object.h"
#include "third_party/base/logging.h"
@@ -230,14 +229,9 @@
CXFA_DocumentParser::~CXFA_DocumentParser() = default;
-bool CXFA_DocumentParser::Parse(
- const RetainPtr<IFX_SeekableReadStream>& pStream,
- XFA_PacketType ePacketID) {
- xml_doc_ = LoadXML(pStream);
- if (!xml_doc_)
- return false;
-
- CFX_XMLNode* root = GetDocumentNode(xml_doc_->GetRoot());
+bool CXFA_DocumentParser::Parse(std::unique_ptr<CFX_XMLDocument> pXML,
+ XFA_PacketType ePacketID) {
+ CFX_XMLNode* root = ParseData(std::move(pXML));
if (!root)
return false;
@@ -245,27 +239,18 @@
return !!m_pRootNode;
}
-CFX_XMLNode* CXFA_DocumentParser::ParseXMLData(const ByteString& wsXML) {
- auto pStream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(wsXML.raw_span());
- xml_doc_ = LoadXML(pStream);
- if (!xml_doc_)
+CFX_XMLNode* CXFA_DocumentParser::ParseData(
+ std::unique_ptr<CFX_XMLDocument> pXML) {
+ if (!pXML)
return nullptr;
+
+ xml_doc_ = std::move(pXML);
+ xml_doc_->GetRoot()->InsertChildNode(
+ xml_doc_->CreateNode<CFX_XMLInstruction>(L"xml"), 0);
+
return GetDocumentNode(xml_doc_->GetRoot());
}
-std::unique_ptr<CFX_XMLDocument> CXFA_DocumentParser::LoadXML(
- const RetainPtr<IFX_SeekableReadStream>& pStream) {
- ASSERT(pStream);
-
- CFX_XMLParser parser(pStream);
- std::unique_ptr<CFX_XMLDocument> doc = parser.Parse();
- if (doc) {
- doc->GetRoot()->InsertChildNode(doc->CreateNode<CFX_XMLInstruction>(L"xml"),
- 0);
- }
- return doc;
-}
-
void CXFA_DocumentParser::ConstructXFANode(CXFA_Node* pXFANode,
CFX_XMLNode* pXMLNode) {
XFA_PacketType ePacketID = pXFANode->GetPacketType();
diff --git a/xfa/fxfa/parser/cxfa_document_parser.h b/xfa/fxfa/parser/cxfa_document_parser.h
index a494b4f..bef976e 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.h
+++ b/xfa/fxfa/parser/cxfa_document_parser.h
@@ -19,26 +19,20 @@
class CXFA_Document;
class CXFA_Node;
class CFX_XMLInstruction;
-class IFX_SeekableReadStream;
class CXFA_DocumentParser {
public:
explicit CXFA_DocumentParser(CXFA_Document* pFactory);
~CXFA_DocumentParser();
- bool Parse(const RetainPtr<IFX_SeekableReadStream>& pStream,
- XFA_PacketType ePacketID);
-
- CFX_XMLNode* ParseXMLData(const ByteString& wsXML);
- void ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode);
-
+ bool Parse(std::unique_ptr<CFX_XMLDocument> pXML, XFA_PacketType ePacketID);
+ CFX_XMLNode* ParseData(std::unique_ptr<CFX_XMLDocument> pXML);
std::unique_ptr<CFX_XMLDocument> GetXMLDoc() { return std::move(xml_doc_); }
+
+ void ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode);
CXFA_Node* GetRootNode() const;
private:
- std::unique_ptr<CFX_XMLDocument> LoadXML(
- const RetainPtr<IFX_SeekableReadStream>& pStream);
-
CXFA_Node* ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
XFA_PacketType ePacketID);
CXFA_Node* ParseAsXDPPacket_XDP(CFX_XMLNode* pXMLDocumentNode);
diff --git a/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
index bd7e1ee..bb98de1 100644
--- a/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp
@@ -6,6 +6,7 @@
#include "core/fxcrt/cfx_readonlymemorystream.h"
#include "core/fxcrt/xml/cfx_xmldocument.h"
+#include "core/fxcrt/xml/cfx_xmlparser.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "xfa/fxfa/parser/cxfa_document.h"
@@ -31,6 +32,24 @@
std::unique_ptr<CXFA_DocumentParser> parser_;
};
+TEST_F(CXFA_DocumentParserTest, EmptyInput) {
+ static const char kInput[] = "";
+ auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
+ pdfium::as_bytes(pdfium::make_span(kInput)));
+ CFX_XMLParser xml_parser(stream);
+ EXPECT_FALSE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
+ EXPECT_FALSE(GetParser()->GetRootNode());
+}
+
+TEST_F(CXFA_DocumentParserTest, BadInput) {
+ static const char kInput[] = "<<<>bar?>>>>>>>";
+ auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
+ pdfium::as_bytes(pdfium::make_span(kInput)));
+ CFX_XMLParser xml_parser(stream);
+ EXPECT_FALSE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
+ EXPECT_FALSE(GetParser()->GetRootNode());
+}
+
TEST_F(CXFA_DocumentParserTest, XMLInstructionsScriptOff) {
static const char kInput[] =
"<config>\n"
@@ -41,7 +60,8 @@
auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
pdfium::as_bytes(pdfium::make_span(kInput)));
- ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+ CFX_XMLParser xml_parser(stream);
+ ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
CXFA_Node* root = GetParser()->GetRootNode();
ASSERT_TRUE(root);
@@ -59,7 +79,8 @@
auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
pdfium::as_bytes(pdfium::make_span(kInput)));
- ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+ CFX_XMLParser xml_parser(stream);
+ ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
CXFA_Node* root = GetParser()->GetRootNode();
ASSERT_TRUE(root);
@@ -76,7 +97,8 @@
auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
pdfium::as_bytes(pdfium::make_span(kInput)));
- ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+ CFX_XMLParser xml_parser(stream);
+ ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
CXFA_Node* root = GetParser()->GetRootNode();
ASSERT_TRUE(root);
@@ -93,7 +115,8 @@
auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
pdfium::as_bytes(pdfium::make_span(kInput)));
- ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+ CFX_XMLParser xml_parser(stream);
+ ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
CXFA_Node* root = GetParser()->GetRootNode();
ASSERT_TRUE(root);
@@ -113,7 +136,8 @@
auto stream = pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
pdfium::as_bytes(pdfium::make_span(kInput)));
- ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config));
+ CFX_XMLParser xml_parser(stream);
+ ASSERT_TRUE(GetParser()->Parse(xml_parser.Parse(), XFA_PacketType::Config));
CXFA_Node* root = GetParser()->GetRootNode();
ASSERT_TRUE(root);