Filter out bad font base encodings when flattening

FPDFPage_Flatten() draws annotations into the content stream. From the
perspective of a PDF reader that only reads the content stream, the
flattened PDF potentially uses more fonts than the original PDF. If the
PDF reader is strict about validating fonts, it can then detect
malformed fonts and reject them.

Avoid a case of this by removing font encodings that are out-of-spec
during the flattening process. Then PDF readers that are strict about
validating font encodings can fallback to default values instead of
choking on the bad values.

Update FPDFFlattenEmbedderTest.FlatWithBadFont to reflect this change
and fulfill the TODO related to this issue.

Bug: 344775293
Change-Id: I1d01dd233caffcaecd6a9d1f391d1bf56da075bb
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/120851
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Thomas Sepez <tsepez@google.com>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp
index b5da321..a2792d6 100644
--- a/fpdfsdk/fpdf_flatten.cpp
+++ b/fpdfsdk/fpdf_flatten.cpp
@@ -15,6 +15,7 @@
 
 #include "constants/annotation_common.h"
 #include "constants/annotation_flags.h"
+#include "constants/font_encodings.h"
 #include "constants/page_object.h"
 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
 #include "core/fpdfapi/page/cpdf_page.h"
@@ -245,6 +246,47 @@
   return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
 }
 
+bool IsValidBaseEncoding(ByteString base_encoding) {
+  // ISO 32000-1:2008 spec, table 114.
+  // ISO 32000-2:2020 spec, table 112.
+  return base_encoding == pdfium::font_encodings::kWinAnsiEncoding ||
+         base_encoding == pdfium::font_encodings::kMacRomanEncoding ||
+         base_encoding == pdfium::font_encodings::kMacExpertEncoding;
+}
+
+void SanitizeFont(RetainPtr<CPDF_Dictionary> font_dict) {
+  if (!font_dict) {
+    return;
+  }
+
+  RetainPtr<CPDF_Dictionary> encoding_dict =
+      font_dict->GetMutableDictFor("Encoding");
+  if (encoding_dict) {
+    if (!IsValidBaseEncoding(encoding_dict->GetNameFor("BaseEncoding"))) {
+      font_dict->RemoveFor("Encoding");
+    }
+  }
+}
+
+void SanitizeFontResources(RetainPtr<CPDF_Dictionary> font_resource_dict) {
+  if (!font_resource_dict) {
+    return;
+  }
+
+  CPDF_DictionaryLocker locker(font_resource_dict);
+  for (auto it : locker) {
+    SanitizeFont(ToDictionary(it.second->GetMutableDirect()));
+  }
+}
+
+void SanitizeResources(RetainPtr<CPDF_Dictionary> resources_dict) {
+  if (!resources_dict) {
+    return;
+  }
+
+  SanitizeFontResources(resources_dict->GetMutableDictFor("Font"));
+}
+
 }  // namespace
 
 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
@@ -391,6 +433,7 @@
     RetainPtr<CPDF_Dictionary> ap_stream_dict = ap_stream->GetMutableDict();
     ap_stream_dict->SetNewFor<CPDF_Name>("Type", "XObject");
     ap_stream_dict->SetNewFor<CPDF_Name>("Subtype", "Form");
+    SanitizeResources(ap_stream_dict->GetMutableDictFor("Resources"));
 
     RetainPtr<CPDF_Dictionary> pXObject =
         pNewXORes->GetOrCreateDictFor("XObject");
diff --git a/fpdfsdk/fpdf_flatten_embeddertest.cpp b/fpdfsdk/fpdf_flatten_embeddertest.cpp
index 5815912..2bbc75c 100644
--- a/fpdfsdk/fpdf_flatten_embeddertest.cpp
+++ b/fpdfsdk/fpdf_flatten_embeddertest.cpp
@@ -11,6 +11,7 @@
 #include "testing/gtest/include/gtest/gtest.h"
 
 using testing::HasSubstr;
+using testing::Not;
 
 namespace {
 
@@ -53,8 +54,7 @@
   EXPECT_EQ(FLATTEN_SUCCESS, FPDFPage_Flatten(page, FLAT_PRINT));
   EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
 
-  // TODO(crbug.com/344775293): HasSubstr() should be negated with Not().
-  EXPECT_THAT(GetString(), HasSubstr("/PDFDocEncoding"));
+  EXPECT_THAT(GetString(), Not(HasSubstr("/PDFDocEncoding")));
   UnloadPage(page);
 }