Reset entity start when clearing text

When we extract the text data we need to make sure we clear any
entity start positions, otherwise our entity extraction will go badly.

Bug: chromium:836661
Change-Id: Icbafdef912b1f5b495eafef426961c5df66cd3fd
Reviewed-on: https://pdfium-review.googlesource.com/31450
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 685655e..eb79637 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -586,6 +586,7 @@
 WideString CFX_XMLParser::GetTextData() {
   WideString ret(current_text_.data(), current_text_.size());
   current_text_.clear();
+  m_iEntityStart = -1;
   current_text_.reserve(kCurrentTextReserve);
   return ret;
 }
diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
index b5c9be5..73d6685 100644
--- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
@@ -569,3 +569,35 @@
   ASSERT_EQ(L"p", parser.GetTextData());
   ASSERT_EQ(FX_XmlSyntaxResult::Error, parser.DoSyntaxParse());
 }
+
+TEST(CFX_XMLParserTest, BadEntity) {
+  const char* input =
+      "<script>"
+      "Test &<p>; thing"
+      "</script>";
+
+  auto stream = MakeProxy(input);
+  auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
+  CFX_XMLTestParser parser(root.get(), stream);
+  ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+  ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+  ASSERT_EQ(L"script", parser.GetTextData());
+
+  ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+  ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+  ASSERT_EQ(L"Test &", parser.GetTextData());
+
+  ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+  ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+  ASSERT_EQ(L"p", parser.GetTextData());
+  ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+
+  ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+  ASSERT_EQ(L"; thing", parser.GetTextData());
+
+  ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
+  ASSERT_EQ(L"script", parser.GetTextData());
+
+  ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
+}