Add XFA fuzzer using FDP

Add fuzzer for XFA-related logic. The fuzzer uses FuzzedDataProvider
to generate XFA-trees and also has logic for XFA scripts. The fuzzer
has room for extensions.

Change-Id: I38e125235c4e64c37a8dc0fc5b648507bd465f9a
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/87510
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/AUTHORS b/AUTHORS
index 56b9e5f..c535e36 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,7 @@
 # END individuals section.
 
 # BEGIN organizations section.
+Ada Logics Ltd. <*@adalogics.com>
 Collabora Ltd. <*@collabora.co.uk>
 DocsCorp Pty Ltd. <*@docscorp.com>
 Dropbox <*@dropbox.com>
diff --git a/testing/fuzzers/BUILD.gn b/testing/fuzzers/BUILD.gn
index 85846c3..f3e3e05 100644
--- a/testing/fuzzers/BUILD.gn
+++ b/testing/fuzzers/BUILD.gn
@@ -72,6 +72,9 @@
     "pdf_cpdf_tounicodemap_fuzzer",
     "pdf_nametree_fuzzer",
   ]
+  if (pdf_enable_xfa) {
+    fuzzer_list += [ "pdf_xfa_fdp_fuzzer" ]
+  }
 }
 
 # Note that this only compiles all the fuzzers, to prevent compile breakages.
@@ -457,6 +460,16 @@
       "../../third_party:pdfium_base",
     ]
   }
+  if (pdf_enable_xfa) {
+    pdfium_fuzzer("pdf_xfa_fdp_fuzzer") {
+      sources = [ "pdf_xfa_fdp_fuzzer.cc" ]
+      deps = [
+        ":fuzzer_helper",
+        "../../third_party:pdfium_base",
+      ]
+      public_fuzzer = true
+    }
+  }
 }
 
 pdfium_fuzzer("pdf_cmap_fuzzer") {
diff --git a/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc b/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc
new file mode 100644
index 0000000..d346e54
--- /dev/null
+++ b/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc
@@ -0,0 +1,636 @@
+// Copyright 2021 The PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <fuzzer/FuzzedDataProvider.h>
+
+#include <string>
+#include <vector>
+
+#include "public/fpdf_formfill.h"
+#include "testing/fuzzers/pdfium_fuzzer_helper.h"
+#include "third_party/base/containers/adapters.h"
+#include "third_party/base/cxx17_backports.h"
+
+class PDFiumXFAFuzzer : public PDFiumFuzzerHelper {
+ public:
+  PDFiumXFAFuzzer() = default;
+  ~PDFiumXFAFuzzer() override = default;
+
+  int GetFormCallbackVersion() const override { return 2; }
+
+  // Return false if XFA doesn't load as otherwise we're duplicating the work
+  // done by the non-xfa fuzzer.
+  bool OnFormFillEnvLoaded(FPDF_DOCUMENT doc) override {
+    int form_type = FPDF_GetFormType(doc);
+    if (form_type != FORMTYPE_XFA_FULL && form_type != FORMTYPE_XFA_FOREGROUND)
+      return false;
+    return FPDF_LoadXFA(doc);
+  }
+};
+
+// Possible names of an XFA script function
+std::string GenXfaScriptFuncName(FuzzedDataProvider* data_provider) {
+  static const char* const kXfaScriptFuncs[] = {
+      "Abs",       "Apr",        "At",           "Avg",          "Ceil",
+      "Choose",    "Concat",     "Count",        "Cterm",        "Date",
+      "Date2Num",  "DateFmt",    "Decode",       "Encode",       "Eval",
+      "Exists",    "Floor",      "Format",       "FV",           "Get",
+      "HasValue",  "If",         "Ipmt",         "IsoDate2Num",  "IsoTime2Num",
+      "Left",      "Len",        "LocalDateFmt", "LocalTimeFmt", "Lower",
+      "Ltrim",     "Max",        "Min",          "Mod",          "NPV",
+      "Num2Date",  "Num2GMTime", "Num2Time",     "Oneof",        "Parse",
+      "Pmt",       "Post",       "PPmt",         "Put",          "PV",
+      "Rate",      "Ref",        "Replace",      "Right",        "Round",
+      "Rtrim",     "Space",      "Str",          "Stuff",        "Substr",
+      "Sum",       "Term",       "Time",         "Time2Num",     "TimeFmt",
+      "Translate", "UnitType",   "UnitValue",    "Upper",        "Uuid",
+      "Within",    "WordNum",
+  };
+
+  size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
+      0, pdfium::size(kXfaScriptFuncs) - 1);
+  return kXfaScriptFuncs[elem_selector];
+}
+
+std::string MaybeQuote(FuzzedDataProvider* data_provider, std::string body) {
+  if (data_provider->ConsumeIntegralInRange<uint32_t>(0, 100) < 20) {
+    return "\"" + body + "\"";
+  }
+  return body;
+}
+
+// Possible arguments to a XFA script function
+std::string GenXfaScriptParam(FuzzedDataProvider* data_provider) {
+  static const char* const kXfaFuncParams[] = {
+      "$",
+      "-0",
+      "04/13/2019",
+      ".05",
+      "-1",
+      "1",
+      " 1 | 0",
+      "10 * 10 * 10 * 9 * 123",
+      "1024",
+      "10 * a + 9",
+      "1.2131",
+      "[1,2,3]",
+      "%123",
+      "[1,2,3][0]",
+      "123124",
+      "123342123",
+      "13:13:13",
+      "13:13:13 GMT",
+      "19960315T20:20:20",
+      "1 and 1",
+      "1 and 2",
+      "2",
+      "20000201",
+      "2009-06-01T13:45:30",
+      "2009-06-15T01:45:30",
+      "2009-06-15T13:45:30-07:00",
+      "2009-06-15T13:45:30.5275000",
+      " 2 < 3 + 1",
+      "2 + 3 + 9",
+      "3",
+      "3 * 1",
+      "3 -9",
+      "5 < 5",
+      "-99",
+      "99",
+      "9999999",
+      "99999999999",
+      "A",
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+      "ÁÂÃÄÅÆ",
+      "<a><b></b></a>",
+      "&Acirc;",
+      "&AElig;&Aacute;&Acirc;&Aacute;",
+      "Amount[*]",
+      "~!@#$%^&amp;*()_+",
+      "&amp;|",
+      "&apos",
+      "apr",
+      "april",
+      "B",
+      "<br>",
+      "C",
+      "de_DE",
+      "es_ES",
+      "feb",
+      "febuary",
+      "HH:MM:SS",
+      "<html>",
+      "html",
+      "HTML",
+      "jan",
+      "january",
+      "json",
+      "lkdjfglsdkfgj",
+      "mar",
+      "march",
+      "name[0]",
+      "name1",
+      "name2",
+      "name3",
+      "name4",
+      "name[*].numAmount",
+      "&quot;",
+      "Space",
+      "Str",
+      "url",
+      "xhtml",
+      "xml",
+      "XML&quot;",
+  };
+
+  size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
+      0, pdfium::size(kXfaFuncParams) - 1);
+  return MaybeQuote(data_provider, kXfaFuncParams[elem_selector]);
+}
+
+// Possible XFA tags
+std::string GenXfaTag(FuzzedDataProvider* data_provider) {
+  static const char* const kXfaElemTags[] = {
+      "accessibleContent",
+      "acrobat",
+      "acrobat",
+      "acrobat7",
+      "ADBE_JSConsole",
+      "ADBE_JSDebugger",
+      "addSilentPrint",
+      "addViewerPreferences",
+      "adjustData",
+      "adobeExtensionLevel",
+      "agent",
+      "alwaysEmbed",
+      "amd",
+      "appearanceFilter",
+      "arc",
+      "area",
+      "assist",
+      "attributes",
+      "autoSave",
+      "barcode",
+      "base",
+      "batchOutput",
+      "behaviorOverride",
+      "bind",
+      "bindItems",
+      "bookend",
+      "boolean",
+      "border",
+      "break",
+      "breakAfter",
+      "breakBefore",
+      "button",
+      "cache",
+      "calculate",
+      "calendarSymbols",
+      "caption",
+      "certificate",
+      "certificates",
+      "change",
+      "checkButton",
+      "choiceList",
+      "color",
+      "comb",
+      "command",
+      "common",
+      "compress",
+      "compression",
+      "compressLogicalStructure",
+      "compressObjectStream",
+      "config",
+      "config",
+      "conformance",
+      "connect",
+      "connectionSet",
+      "connectString",
+      "contentArea",
+      "contentCopy",
+      "copies",
+      "corner",
+      "creator",
+      "currencySymbol",
+      "currencySymbols",
+      "currentPage",
+      "data",
+      "dataGroup",
+      "dataModel",
+      "dataValue",
+      "dataWindow",
+      "date",
+      "datePattern",
+      "datePatterns",
+      "dateTime",
+      "dateTimeEdit",
+      "dateTimeSymbols",
+      "day",
+      "dayNames",
+      "debug",
+      "decimal",
+      "defaultTypeface",
+      "defaultUi",
+      "delete",
+      "delta",
+      "deltas",
+      "desc",
+      "destination",
+      "digestMethod",
+      "digestMethods",
+      "documentAssembly",
+      "draw",
+      "driver",
+      "dSigData",
+      "duplexOption",
+      "dynamicRender",
+      "edge",
+      "effectiveInputPolicy",
+      "effectiveOutputPolicy",
+      "embed",
+      "encoding",
+      "encodings",
+      "encrypt",
+      "encryption",
+      "encryptionLevel",
+      "encryptionMethod",
+      "encryptionMethods",
+      "enforce",
+      "equate",
+      "equateRange",
+      "era",
+      "eraNames",
+      "event",
+      "eventPseudoModel",
+      "exclGroup",
+      "exclude",
+      "excludeNS",
+      "exData",
+      "execute",
+      "exObject",
+      "extras",
+      "field",
+      "fill",
+      "filter",
+      "flipLabel",
+      "float",
+      "font",
+      "fontInfo",
+      "form",
+      "format",
+      "formFieldFilling",
+      "groupParent",
+      "handler",
+      "hostPseudoModel",
+      "hyphenation",
+      "ifEmpty",
+      "image",
+      "imageEdit",
+      "includeXDPContent",
+      "incrementalLoad",
+      "incrementalMerge",
+      "insert",
+      "instanceManager",
+      "integer",
+      "interactive",
+      "issuers",
+      "items",
+      "jog",
+      "keep",
+      "keyUsage",
+      "labelPrinter",
+      "layout",
+      "layoutPseudoModel",
+      "level",
+      "line",
+      "linear",
+      "linearized",
+      "list",
+      "locale",
+      "localeSet",
+      "lockDocument",
+      "log",
+      "logPseudoModel",
+      "manifest",
+      "map",
+      "margin",
+      "mdp",
+      "medium",
+      "mediumInfo",
+      "meridiem",
+      "meridiemNames",
+      "message",
+      "messaging",
+      "mode",
+      "modifyAnnots",
+      "month",
+      "monthNames",
+      "msgId",
+      "nameAttr",
+      "neverEmbed",
+      "numberOfCopies",
+      "numberPattern",
+      "numberPatterns",
+      "numberSymbol",
+      "numberSymbols",
+      "numericEdit",
+      "object",
+      "occur",
+      "oid",
+      "oids",
+      "openAction",
+      "operation",
+      "output",
+      "outputBin",
+      "outputXSL",
+      "overflow",
+      "overprint",
+      "packet",
+      "packets",
+      "pageArea",
+      "pageOffset",
+      "pageRange",
+      "pageSet",
+      "pagination",
+      "paginationOverride",
+      "para",
+      "part",
+      "password",
+      "passwordEdit",
+      "pattern",
+      "pcl",
+      "pdf",
+      "pdfa",
+      "permissions",
+      "pickTrayByPDFSize",
+      "picture",
+      "plaintextMetadata",
+      "presence",
+      "present",
+      "present",
+      "print",
+      "printerName",
+      "printHighQuality",
+      "printScaling",
+      "producer",
+      "proto",
+      "ps",
+      "psMap",
+      "query",
+      "radial",
+      "range",
+      "reason",
+      "reasons",
+      "record",
+      "recordSet",
+      "rectangle",
+      "ref",
+      "relevant",
+      "rename",
+      "renderPolicy",
+      "rootElement",
+      "runScripts",
+      "script",
+      "scriptModel",
+      "select",
+      "setProperty",
+      "severity",
+      "signature",
+      "signatureProperties",
+      "signaturePseudoModel",
+      "signData",
+      "signing",
+      "silentPrint",
+      "soapAction",
+      "soapAddress",
+      "solid",
+      "source",
+      "sourceSet",
+      "speak",
+      "staple",
+      "startNode",
+      "startPage",
+      "stipple",
+      "subform",
+      "subform",
+      "subformSet",
+      "subjectDN",
+      "subjectDNs",
+      "submit",
+      "submitFormat",
+      "submitUrl",
+      "subsetBelow",
+      "suppressBanner",
+      "tagged",
+      "template",
+      "template",
+      "templateCache",
+      "#text",
+      "text",
+      "textedit",
+      "textEdit",
+      "threshold",
+      "time",
+      "timePattern",
+      "timePatterns",
+      "timeStamp",
+      "to",
+      "toolTip",
+      "trace",
+      "transform",
+      "traversal",
+      "traverse",
+      "treeList",
+      "type",
+      "typeface",
+      "typefaces",
+      "ui",
+      "update",
+      "uri",
+      "user",
+      "validate",
+      "validate",
+      "validateApprovalSignatures",
+      "validationMessaging",
+      "value",
+      "variables",
+      "version",
+      "versionControl",
+      "viewerPreferences",
+      "webClient",
+      "whitespace",
+      "window",
+      "wsdlAddress",
+      "wsdlConnection",
+      "xdc",
+      "xdp",
+      "xfa",
+      "#xHTML",
+      "#xml",
+      "xmlConnection",
+      "xsdConnection",
+      "xsl",
+      "zpl",
+  };
+
+  size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
+      0, pdfium::size(kXfaElemTags) - 1);
+  return kXfaElemTags[elem_selector];
+}
+
+// Possible XFA attributes values
+std::string GenXfaTagValue(FuzzedDataProvider* data_provider) {
+  static const char* const kXfaTagVals[] = {
+      "0",         "0pt",          "-1",
+      "123",       "1pt",          "203.2mm",
+      "22.1404mm", "255",          "256",
+      "321",       "5431.21mm",    "6.35mm",
+      "8in",       "8pt",          "application/x-javascript",
+      "bold",      "bold",         "consumeData",
+      "en_US",     "form1",        "initialize",
+      "italic",    "middle",       "name2",
+      "name3",     "name4",        "name5",
+      "Page1",     "RadioList[0]", "subform_1",
+      "tb",        "Verdana",      "Verdana",
+  };
+
+  size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
+      0, pdfium::size(kXfaTagVals) - 1);
+  return MaybeQuote(data_provider, kXfaTagVals[elem_selector]);
+}
+
+// possible XFA attributes
+std::string GenXfaTagName(FuzzedDataProvider* data_provider) {
+  static const char* const kXfaTagNames[] = {
+      "activity",    "activity",    "baselineShift",
+      "contentType", "h",           "id",
+      "layout",      "layout",      "leftInset",
+      "locale",      "long",        "marginLeft",
+      "marginRight", "marginRight", "mergeMode",
+      "name",        "ref",         "scriptTest",
+      "short",       "size",        "spaceAbove",
+      "spaceBelow",  "startNew",    "stock",
+      "tetIndent",   "timeStamp",   "typeface",
+      "uuid",        "vAlign",      "value",
+      "w",           "weight",      "x",
+      "y",
+  };
+  size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
+      0, pdfium::size(kXfaTagNames) - 1);
+  return kXfaTagNames[elem_selector];
+}
+
+// Will create a simple XFA script that calls a single function.
+std::string GenXfacript(FuzzedDataProvider* data_provider) {
+  std::string xfa_string = GenXfaScriptFuncName(data_provider);
+  xfa_string += "(";
+
+  int num_params = data_provider->ConsumeIntegralInRange(0, 3);
+  // 0 case we do nothing.
+  if (num_params == 1) {
+    xfa_string += GenXfaScriptParam(data_provider);
+  } else if (num_params == 2) {
+    xfa_string += GenXfaScriptParam(data_provider);
+    xfa_string += ",";
+    xfa_string += GenXfaScriptParam(data_provider);
+  } else if (num_params == 3) {
+    xfa_string += GenXfaScriptParam(data_provider);
+    xfa_string += ",";
+    xfa_string += GenXfaScriptParam(data_provider);
+    xfa_string += ",";
+    xfa_string += GenXfaScriptParam(data_provider);
+  }
+  xfa_string += ")";
+  return xfa_string;
+}
+
+// Will create a single XFA attributes, with both lhs and rhs.
+std::string getXfaElemAttributes(FuzzedDataProvider* data_provider) {
+  // Generate a set of tags, and a set of values for the tags.
+  return GenXfaTagName(data_provider) + " = " + GenXfaTagValue(data_provider);
+}
+
+// Creates an XFA structure wrapped in <xdp tags.
+std::string GenXfaTree(FuzzedDataProvider* data_provider) {
+  std::string xfa_string = "<xdp xmlns=\"http://ns.adobe.com/xdp/\">";
+
+  // One stack iteration
+  int stack_iterations = data_provider->ConsumeIntegralInRange(1, 3);
+  for (int si = 0; si < stack_iterations; si++) {
+    int elem_count = data_provider->ConsumeIntegralInRange(1, 6);
+    std::vector<std::string> xml_stack;
+    xml_stack.reserve(elem_count);
+    for (int i = 0; i < elem_count; i++) {
+      xfa_string += "<";
+      std::string tag = GenXfaTag(data_provider);
+
+      // in 30% of cases, add attributes
+      std::string attribute_string;
+      if (data_provider->ConsumeIntegralInRange(1, 100) > 70) {
+        size_t attribute_count = data_provider->ConsumeIntegralInRange(1, 5);
+        for (; 0 < attribute_count; attribute_count--) {
+          attribute_string += getXfaElemAttributes(data_provider);
+        }
+      }
+      xfa_string += attribute_string;
+      xfa_string += tag + ">";
+
+      // If needed, add a body to the tag
+      if (tag == "script") {
+        xfa_string += GenXfacript(data_provider);
+      }
+
+      // Push the tag to the stack so we can close it when done
+      xml_stack.push_back(tag);
+    }
+    for (const std::string& tag : pdfium::base::Reversed(xml_stack)) {
+      xfa_string += "</" + tag + ">";
+    }
+  }
+  xfa_string += "</xdp>";
+  return xfa_string;
+}
+
+const char kSimplePdfTemplate[] = R"(%PDF-1.7
+1 0 obj
+<</Type /Catalog /Pages 2 0 R /AcroForm <</XFA 30 0 R>> /NeedsRendering true>>
+endobj
+2 0 obj
+<</Type /Pages /Kids [3 0 R] /Count 1>>
+endobj
+3 0 obj
+<</Type /Page /Parent 2 0 R /MediaBox [0 0 3 3]>>
+endobj
+30 0 obj
+<</Length $1>>
+stream
+$2
+endstream
+endobj
+trailer
+<</Root 1 0 R /Size 31>>
+%%EOF)";
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  FuzzedDataProvider data_provider(data, size);
+  std::string xfa_string = GenXfaTree(&data_provider);
+
+  // Add 1 for newline before endstream.
+  std::string xfa_stream_len = std::to_string(xfa_string.size() + 1);
+
+  // Compose the fuzzer
+  std::string xfa_final_str = std::string(kSimplePdfTemplate);
+  xfa_final_str.replace(xfa_final_str.find("$1"), 2, xfa_stream_len);
+  xfa_final_str.replace(xfa_final_str.find("$2"), 2, xfa_string);
+
+#ifdef PDFIUM_FUZZER_DUMP
+  for (size_t i = 0; i < xfa_final_str.size(); i++) {
+    putc(xfa_final_str[i], stdout);
+  }
+#endif
+
+  PDFiumXFAFuzzer fuzzer;
+  fuzzer.RenderPdf(xfa_final_str.c_str(), xfa_final_str.size());
+  return 0;
+}