Filter out bad font base encodings when flattening FPDFPage_Flatten() draws annotations into the content stream. From the perspective of a PDF reader that only reads the content stream, the flattened PDF potentially uses more fonts than the original PDF. If the PDF reader is strict about validating fonts, it can then detect malformed fonts and reject them. Avoid a case of this by removing font encodings that are out-of-spec during the flattening process. Then PDF readers that are strict about validating font encodings can fallback to default values instead of choking on the bad values. Update FPDFFlattenEmbedderTest.FlatWithBadFont to reflect this change and fulfill the TODO related to this issue. Bug: 344775293 Change-Id: I1d01dd233caffcaecd6a9d1f391d1bf56da075bb Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/120851 Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Thomas Sepez <tsepez@google.com> Commit-Queue: Lei Zhang <thestig@chromium.org>

commit: 74c148c348846bd3014c2f818e8ca64858db67bd [log] [tgz]
author: Lei Zhang <thestig@chromium.org> Wed Jun 26 21:56:52 2024 +0000
committer: Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed Jun 26 21:56:52 2024 +0000
tree: ac66f0c3658c83ed3d4fb00bbfc01a4ece4817cf
parent: 00da087ac31a2ef4630eeefc7e7cd2260df98f33 [diff]
diff --git a/fpdfsdk/fpdf_flatten.cpp b/fpdfsdk/fpdf_flatten.cpp
index b5da321..a2792d6 100644
--- a/fpdfsdk/fpdf_flatten.cpp
+++ b/fpdfsdk/fpdf_flatten.cpp

@@ -15,6 +15,7 @@
 
 #include "constants/annotation_common.h"
 #include "constants/annotation_flags.h"
+#include "constants/font_encodings.h"
 #include "constants/page_object.h"
 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
 #include "core/fpdfapi/page/cpdf_page.h"
@@ -245,6 +246,47 @@
   return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
 }
 
+bool IsValidBaseEncoding(ByteString base_encoding) {
+  // ISO 32000-1:2008 spec, table 114.
+  // ISO 32000-2:2020 spec, table 112.
+  return base_encoding == pdfium::font_encodings::kWinAnsiEncoding ||
+         base_encoding == pdfium::font_encodings::kMacRomanEncoding ||
+         base_encoding == pdfium::font_encodings::kMacExpertEncoding;
+}
+
+void SanitizeFont(RetainPtr<CPDF_Dictionary> font_dict) {
+  if (!font_dict) {
+    return;
+  }
+
+  RetainPtr<CPDF_Dictionary> encoding_dict =
+      font_dict->GetMutableDictFor("Encoding");
+  if (encoding_dict) {
+    if (!IsValidBaseEncoding(encoding_dict->GetNameFor("BaseEncoding"))) {
+      font_dict->RemoveFor("Encoding");
+    }
+  }
+}
+
+void SanitizeFontResources(RetainPtr<CPDF_Dictionary> font_resource_dict) {
+  if (!font_resource_dict) {
+    return;
+  }
+
+  CPDF_DictionaryLocker locker(font_resource_dict);
+  for (auto it : locker) {
+    SanitizeFont(ToDictionary(it.second->GetMutableDirect()));
+  }
+}
+
+void SanitizeResources(RetainPtr<CPDF_Dictionary> resources_dict) {
+  if (!resources_dict) {
+    return;
+  }
+
+  SanitizeFontResources(resources_dict->GetMutableDictFor("Font"));
+}
+
 }  // namespace
 
 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
@@ -391,6 +433,7 @@
     RetainPtr<CPDF_Dictionary> ap_stream_dict = ap_stream->GetMutableDict();
     ap_stream_dict->SetNewFor<CPDF_Name>("Type", "XObject");
     ap_stream_dict->SetNewFor<CPDF_Name>("Subtype", "Form");
+    SanitizeResources(ap_stream_dict->GetMutableDictFor("Resources"));
 
     RetainPtr<CPDF_Dictionary> pXObject =
         pNewXORes->GetOrCreateDictFor("XObject");

diff --git a/fpdfsdk/fpdf_flatten_embeddertest.cpp b/fpdfsdk/fpdf_flatten_embeddertest.cpp
index 5815912..2bbc75c 100644
--- a/fpdfsdk/fpdf_flatten_embeddertest.cpp
+++ b/fpdfsdk/fpdf_flatten_embeddertest.cpp

@@ -11,6 +11,7 @@
 #include "testing/gtest/include/gtest/gtest.h"
 
 using testing::HasSubstr;
+using testing::Not;
 
 namespace {
 
@@ -53,8 +54,7 @@
   EXPECT_EQ(FLATTEN_SUCCESS, FPDFPage_Flatten(page, FLAT_PRINT));
   EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
 
-  // TODO(crbug.com/344775293): HasSubstr() should be negated with Not().
-  EXPECT_THAT(GetString(), HasSubstr("/PDFDocEncoding"));
+  EXPECT_THAT(GetString(), Not(HasSubstr("/PDFDocEncoding")));
   UnloadPage(page);
 }
commit	74c148c348846bd3014c2f818e8ca64858db67bd	[log] [tgz]
author	Lei Zhang <thestig@chromium.org>	Wed Jun 26 21:56:52 2024 +0000
committer	Pdfium LUCI CQ <pdfium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed Jun 26 21:56:52 2024 +0000
tree	ac66f0c3658c83ed3d4fb00bbfc01a4ece4817cf
parent	00da087ac31a2ef4630eeefc7e7cd2260df98f33 [diff]