Add targeted XFA fuzzer with a few preconditions
Add an XFA fuzzer that filters on the fuzz input. The filtering is done
to increase chances of exploring XFA-related logic and avoiding for the
fuzzer to explore unrelated, e.g. v8, code.
Bug: chromium: 1276950
Change-Id: I5d9776a16784f0970143daa396096b30d74964e6
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/87630
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/testing/fuzzers/BUILD.gn b/testing/fuzzers/BUILD.gn
index f3e3e05..f9c1c9f 100644
--- a/testing/fuzzers/BUILD.gn
+++ b/testing/fuzzers/BUILD.gn
@@ -73,7 +73,10 @@
"pdf_nametree_fuzzer",
]
if (pdf_enable_xfa) {
- fuzzer_list += [ "pdf_xfa_fdp_fuzzer" ]
+ fuzzer_list += [
+ "pdf_xfa_fdp_fuzzer",
+ "pdf_xfa_raw_fuzzer",
+ ]
}
}
@@ -91,6 +94,10 @@
}
}
+source_set("fuzzer_pdf_templates") {
+ sources = [ "pdf_fuzzer_templates.h" ]
+}
+
source_set("fuzzer_init") {
testonly = true
sources = [ "pdf_fuzzer_init.cc" ]
@@ -465,6 +472,16 @@
sources = [ "pdf_xfa_fdp_fuzzer.cc" ]
deps = [
":fuzzer_helper",
+ ":fuzzer_pdf_templates",
+ "../../third_party:pdfium_base",
+ ]
+ public_fuzzer = true
+ }
+ pdfium_fuzzer("pdf_xfa_raw_fuzzer") {
+ sources = [ "pdf_xfa_raw_fuzzer.cc" ]
+ deps = [
+ ":fuzzer_helper",
+ ":fuzzer_pdf_templates",
"../../third_party:pdfium_base",
]
public_fuzzer = true
diff --git a/testing/fuzzers/pdf_fuzzer_templates.h b/testing/fuzzers/pdf_fuzzer_templates.h
new file mode 100644
index 0000000..d5cdfd9
--- /dev/null
+++ b/testing/fuzzers/pdf_fuzzer_templates.h
@@ -0,0 +1,30 @@
+// Copyright 2021 The PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// File for holding strings representing PDF templates that are used by fuzzers.
+
+#ifndef TESTING_FUZZERS_PDF_FUZZER_TEMPLATES_H_
+#define TESTING_FUZZERS_PDF_FUZZER_TEMPLATES_H_
+
+constexpr char kSimplePdfTemplate[] = R"(%PDF-1.7
+1 0 obj
+<</Type /Catalog /Pages 2 0 R /AcroForm <</XFA 30 0 R>> /NeedsRendering true>>
+endobj
+2 0 obj
+<</Type /Pages /Kids [3 0 R] /Count 1>>
+endobj
+3 0 obj
+<</Type /Page /Parent 2 0 R /MediaBox [0 0 3 3]>>
+endobj
+30 0 obj
+<</Length $1>>
+stream
+$2
+endstream
+endobj
+trailer
+<</Root 1 0 R /Size 31>>
+%%EOF)";
+
+#endif // TESTING_FUZZERS_PDF_FUZZER_TEMPLATES_H_
diff --git a/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc b/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc
index d346e54..7f2707c 100644
--- a/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc
+++ b/testing/fuzzers/pdf_xfa_fdp_fuzzer.cc
@@ -8,6 +8,7 @@
#include <vector>
#include "public/fpdf_formfill.h"
+#include "testing/fuzzers/pdf_fuzzer_templates.h"
#include "testing/fuzzers/pdfium_fuzzer_helper.h"
#include "third_party/base/containers/adapters.h"
#include "third_party/base/cxx17_backports.h"
@@ -592,26 +593,6 @@
return xfa_string;
}
-const char kSimplePdfTemplate[] = R"(%PDF-1.7
-1 0 obj
-<</Type /Catalog /Pages 2 0 R /AcroForm <</XFA 30 0 R>> /NeedsRendering true>>
-endobj
-2 0 obj
-<</Type /Pages /Kids [3 0 R] /Count 1>>
-endobj
-3 0 obj
-<</Type /Page /Parent 2 0 R /MediaBox [0 0 3 3]>>
-endobj
-30 0 obj
-<</Length $1>>
-stream
-$2
-endstream
-endobj
-trailer
-<</Root 1 0 R /Size 31>>
-%%EOF)";
-
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider data_provider(data, size);
std::string xfa_string = GenXfaTree(&data_provider);
diff --git a/testing/fuzzers/pdf_xfa_raw_fuzzer.cc b/testing/fuzzers/pdf_xfa_raw_fuzzer.cc
new file mode 100644
index 0000000..5bf096d
--- /dev/null
+++ b/testing/fuzzers/pdf_xfa_raw_fuzzer.cc
@@ -0,0 +1,101 @@
+// Copyright 2021 The PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <fuzzer/FuzzedDataProvider.h>
+
+#include <cctype>
+#include <string>
+
+#include "public/fpdf_formfill.h"
+#include "testing/fuzzers/pdf_fuzzer_templates.h"
+#include "testing/fuzzers/pdfium_fuzzer_helper.h"
+
+class PDFiumXFAFuzzer : public PDFiumFuzzerHelper {
+ public:
+ PDFiumXFAFuzzer() = default;
+ ~PDFiumXFAFuzzer() override = default;
+
+ int GetFormCallbackVersion() const override { return 2; }
+
+ // Return false if XFA doesn't load as otherwise we're duplicating the work
+ // done by the non-xfa fuzzer.
+ bool OnFormFillEnvLoaded(FPDF_DOCUMENT doc) override {
+ int form_type = FPDF_GetFormType(doc);
+ if (form_type != FORMTYPE_XFA_FULL && form_type != FORMTYPE_XFA_FOREGROUND)
+ return false;
+ return FPDF_LoadXFA(doc);
+ }
+};
+
+bool IsValidForFuzzing(const uint8_t* data, size_t size) {
+ if (size > 2048) {
+ return false;
+ }
+
+ const char* ptr = reinterpret_cast<const char*>(data);
+ bool is_open = false;
+ size_t tag_size = 0;
+ for (size_t i = 0; i < size; i++) {
+ if (!std::isspace(ptr[i]) && !std::isprint(ptr[i])) {
+ return false;
+ }
+
+ // We do not want any script tags. The reason is this fuzzer
+ // should avoid exploring v8 code. Avoiding anything with "script"
+ // is an over-approximation, in that some inputs may contain "script"
+ // and still be a valid fuzz-case. However, this over-approximation is
+ // used to enforce strict constraints and avoid cases where whitespace
+ // may play a role, or other tags, e.g. "Javascript" will end up triggering
+ // large explorations of v8 code. The alternative we considered were
+ // "<script"
+ if (i + 6 < size && memcmp(ptr + i, "script", 6) == 0) {
+ return false;
+ }
+
+ if (ptr[i] == '<') {
+ if (is_open) {
+ return false;
+ }
+ is_open = true;
+ tag_size = 0;
+ } else if (ptr[i] == '>') {
+ if (!is_open || tag_size == 0) {
+ return false;
+ }
+ is_open = false;
+ } else if (is_open) {
+ tag_size++;
+ }
+ }
+ // we must close the last bracket.
+ return !is_open;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ // Filter the string to reduce the state space exploration.
+ if (!IsValidForFuzzing(data, size)) {
+ return 0;
+ }
+ std::string xfa_string = "<xdp xmlns=\"http://ns.adobe.com/xdp/\">";
+ xfa_string += std::string(reinterpret_cast<const char*>(data), size);
+ xfa_string += "</xdp>";
+
+ // Add 1 for newline before endstream.
+ std::string xfa_stream_len = std::to_string(xfa_string.size() + 1);
+
+ // Compose the fuzzer
+ std::string xfa_final_str = std::string(kSimplePdfTemplate);
+ xfa_final_str.replace(xfa_final_str.find("$1"), 2, xfa_stream_len);
+ xfa_final_str.replace(xfa_final_str.find("$2"), 2, xfa_string);
+
+#ifdef PDFIUM_FUZZER_DUMP
+ for (size_t i = 0; i < xfa_final_str.size(); i++) {
+ putc(xfa_final_str[i], stdout);
+ }
+#endif
+
+ PDFiumXFAFuzzer fuzzer;
+ fuzzer.RenderPdf(xfa_final_str.c_str(), xfa_final_str.size());
+ return 0;
+}