blob: d346e54e13a723a36c010dbc1f350ec07f075d69 [file] [log] [blame]
// Copyright 2021 The PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <fuzzer/FuzzedDataProvider.h>
#include <string>
#include <vector>
#include "public/fpdf_formfill.h"
#include "testing/fuzzers/pdfium_fuzzer_helper.h"
#include "third_party/base/containers/adapters.h"
#include "third_party/base/cxx17_backports.h"
class PDFiumXFAFuzzer : public PDFiumFuzzerHelper {
PDFiumXFAFuzzer() = default;
~PDFiumXFAFuzzer() override = default;
int GetFormCallbackVersion() const override { return 2; }
// Return false if XFA doesn't load as otherwise we're duplicating the work
// done by the non-xfa fuzzer.
bool OnFormFillEnvLoaded(FPDF_DOCUMENT doc) override {
int form_type = FPDF_GetFormType(doc);
if (form_type != FORMTYPE_XFA_FULL && form_type != FORMTYPE_XFA_FOREGROUND)
return false;
return FPDF_LoadXFA(doc);
// Possible names of an XFA script function
std::string GenXfaScriptFuncName(FuzzedDataProvider* data_provider) {
static const char* const kXfaScriptFuncs[] = {
"Abs", "Apr", "At", "Avg", "Ceil",
"Choose", "Concat", "Count", "Cterm", "Date",
"Date2Num", "DateFmt", "Decode", "Encode", "Eval",
"Exists", "Floor", "Format", "FV", "Get",
"HasValue", "If", "Ipmt", "IsoDate2Num", "IsoTime2Num",
"Left", "Len", "LocalDateFmt", "LocalTimeFmt", "Lower",
"Ltrim", "Max", "Min", "Mod", "NPV",
"Num2Date", "Num2GMTime", "Num2Time", "Oneof", "Parse",
"Pmt", "Post", "PPmt", "Put", "PV",
"Rate", "Ref", "Replace", "Right", "Round",
"Rtrim", "Space", "Str", "Stuff", "Substr",
"Sum", "Term", "Time", "Time2Num", "TimeFmt",
"Translate", "UnitType", "UnitValue", "Upper", "Uuid",
"Within", "WordNum",
size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
0, pdfium::size(kXfaScriptFuncs) - 1);
return kXfaScriptFuncs[elem_selector];
std::string MaybeQuote(FuzzedDataProvider* data_provider, std::string body) {
if (data_provider->ConsumeIntegralInRange<uint32_t>(0, 100) < 20) {
return "\"" + body + "\"";
return body;
// Possible arguments to a XFA script function
std::string GenXfaScriptParam(FuzzedDataProvider* data_provider) {
static const char* const kXfaFuncParams[] = {
" 1 | 0",
"10 * 10 * 10 * 9 * 123",
"10 * a + 9",
"13:13:13 GMT",
"1 and 1",
"1 and 2",
" 2 < 3 + 1",
"2 + 3 + 9",
"3 * 1",
"3 -9",
"5 < 5",
size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
0, pdfium::size(kXfaFuncParams) - 1);
return MaybeQuote(data_provider, kXfaFuncParams[elem_selector]);
// Possible XFA tags
std::string GenXfaTag(FuzzedDataProvider* data_provider) {
static const char* const kXfaElemTags[] = {
size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
0, pdfium::size(kXfaElemTags) - 1);
return kXfaElemTags[elem_selector];
// Possible XFA attributes values
std::string GenXfaTagValue(FuzzedDataProvider* data_provider) {
static const char* const kXfaTagVals[] = {
"0", "0pt", "-1",
"123", "1pt", "203.2mm",
"22.1404mm", "255", "256",
"321", "5431.21mm", "6.35mm",
"8in", "8pt", "application/x-javascript",
"bold", "bold", "consumeData",
"en_US", "form1", "initialize",
"italic", "middle", "name2",
"name3", "name4", "name5",
"Page1", "RadioList[0]", "subform_1",
"tb", "Verdana", "Verdana",
size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
0, pdfium::size(kXfaTagVals) - 1);
return MaybeQuote(data_provider, kXfaTagVals[elem_selector]);
// possible XFA attributes
std::string GenXfaTagName(FuzzedDataProvider* data_provider) {
static const char* const kXfaTagNames[] = {
"activity", "activity", "baselineShift",
"contentType", "h", "id",
"layout", "layout", "leftInset",
"locale", "long", "marginLeft",
"marginRight", "marginRight", "mergeMode",
"name", "ref", "scriptTest",
"short", "size", "spaceAbove",
"spaceBelow", "startNew", "stock",
"tetIndent", "timeStamp", "typeface",
"uuid", "vAlign", "value",
"w", "weight", "x",
size_t elem_selector = data_provider->ConsumeIntegralInRange<size_t>(
0, pdfium::size(kXfaTagNames) - 1);
return kXfaTagNames[elem_selector];
// Will create a simple XFA script that calls a single function.
std::string GenXfacript(FuzzedDataProvider* data_provider) {
std::string xfa_string = GenXfaScriptFuncName(data_provider);
xfa_string += "(";
int num_params = data_provider->ConsumeIntegralInRange(0, 3);
// 0 case we do nothing.
if (num_params == 1) {
xfa_string += GenXfaScriptParam(data_provider);
} else if (num_params == 2) {
xfa_string += GenXfaScriptParam(data_provider);
xfa_string += ",";
xfa_string += GenXfaScriptParam(data_provider);
} else if (num_params == 3) {
xfa_string += GenXfaScriptParam(data_provider);
xfa_string += ",";
xfa_string += GenXfaScriptParam(data_provider);
xfa_string += ",";
xfa_string += GenXfaScriptParam(data_provider);
xfa_string += ")";
return xfa_string;
// Will create a single XFA attributes, with both lhs and rhs.
std::string getXfaElemAttributes(FuzzedDataProvider* data_provider) {
// Generate a set of tags, and a set of values for the tags.
return GenXfaTagName(data_provider) + " = " + GenXfaTagValue(data_provider);
// Creates an XFA structure wrapped in <xdp tags.
std::string GenXfaTree(FuzzedDataProvider* data_provider) {
std::string xfa_string = "<xdp xmlns=\"\">";
// One stack iteration
int stack_iterations = data_provider->ConsumeIntegralInRange(1, 3);
for (int si = 0; si < stack_iterations; si++) {
int elem_count = data_provider->ConsumeIntegralInRange(1, 6);
std::vector<std::string> xml_stack;
for (int i = 0; i < elem_count; i++) {
xfa_string += "<";
std::string tag = GenXfaTag(data_provider);
// in 30% of cases, add attributes
std::string attribute_string;
if (data_provider->ConsumeIntegralInRange(1, 100) > 70) {
size_t attribute_count = data_provider->ConsumeIntegralInRange(1, 5);
for (; 0 < attribute_count; attribute_count--) {
attribute_string += getXfaElemAttributes(data_provider);
xfa_string += attribute_string;
xfa_string += tag + ">";
// If needed, add a body to the tag
if (tag == "script") {
xfa_string += GenXfacript(data_provider);
// Push the tag to the stack so we can close it when done
for (const std::string& tag : pdfium::base::Reversed(xml_stack)) {
xfa_string += "</" + tag + ">";
xfa_string += "</xdp>";
return xfa_string;
const char kSimplePdfTemplate[] = R"(%PDF-1.7
1 0 obj
<</Type /Catalog /Pages 2 0 R /AcroForm <</XFA 30 0 R>> /NeedsRendering true>>
2 0 obj
<</Type /Pages /Kids [3 0 R] /Count 1>>
3 0 obj
<</Type /Page /Parent 2 0 R /MediaBox [0 0 3 3]>>
30 0 obj
<</Length $1>>
<</Root 1 0 R /Size 31>>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider data_provider(data, size);
std::string xfa_string = GenXfaTree(&data_provider);
// Add 1 for newline before endstream.
std::string xfa_stream_len = std::to_string(xfa_string.size() + 1);
// Compose the fuzzer
std::string xfa_final_str = std::string(kSimplePdfTemplate);
xfa_final_str.replace(xfa_final_str.find("$1"), 2, xfa_stream_len);
xfa_final_str.replace(xfa_final_str.find("$2"), 2, xfa_string);
for (size_t i = 0; i < xfa_final_str.size(); i++) {
putc(xfa_final_str[i], stdout);
PDFiumXFAFuzzer fuzzer;
fuzzer.RenderPdf(xfa_final_str.c_str(), xfa_final_str.size());
return 0;