core/fpdfapi/edit/cpdf_fontsubsetter.cpp - pdfium - Git at Google

 // Copyright 2026 The PDFium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "core/fpdfapi/edit/cpdf_fontsubsetter.h"

 #include <hb-subset.h>
 #include <hb.h>
 #include <stdint.h>

 #include <algorithm>
 #include <array>
 #include <map>
 #include <memory>
 #include <set>
 #include <utility>
 #include <vector>

 #include "core/fpdfapi/edit/cpdf_font_util.h"
 #include "core/fpdfapi/font/cpdf_font.h"
 #include "core/fpdfapi/page/cpdf_page.h"
 #include "core/fpdfapi/page/cpdf_textobject.h"
 #include "core/fpdfapi/parser/cpdf_array.h"
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
 #include "core/fpdfapi/parser/cpdf_name.h"
 #include "core/fpdfapi/parser/cpdf_number.h"
 #include "core/fpdfapi/parser/cpdf_reference.h"
 #include "core/fpdfapi/parser/cpdf_stream.h"
 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
 #include "core/fxcrt/byteorder.h"
 #include "core/fxcrt/bytestring.h"
 #include "core/fxcrt/check.h"
 #include "core/fxcrt/compiler_specific.h"
 #include "core/fxcrt/data_vector.h"
 #include "core/fxcrt/fx_extension.h"
 #include "core/fxcrt/fx_random.h"
 #include "core/fxcrt/retain_ptr.h"
 #include "core/fxcrt/span.h"
 #include "core/fxcrt/widestring.h"
 #include "core/fxge/cfx_fontmapper.h"
 #include "core/fxge/fx_font.h"

 namespace {

 template <auto DestroyFunction>
 struct HBDeleter {
   template <typename T>
   void operator()(T* ptr) const {
     DestroyFunction(ptr);
   }
 };

 using ScopedHBBlob = std::unique_ptr<hb_blob_t, HBDeleter<hb_blob_destroy>>;
 using ScopedHBFace = std::unique_ptr<hb_face_t, HBDeleter<hb_face_destroy>>;
 using ScopedHBSubsetInput =
     std::unique_ptr<hb_subset_input_t, HBDeleter<hb_subset_input_destroy>>;

 DataVector<uint8_t> GenerateFontSubset(CPDF_Document* doc,
                                        pdfium::span<const uint8_t> font_data,
                                        const std::set<uint32_t>& gids) {
   // Wrap the data.
   ScopedHBBlob blob(
       hb_blob_create_or_fail(reinterpret_cast<const char*>(font_data.data()),
                              static_cast<uint32_t>(font_data.size()),
                              HB_MEMORY_MODE_READONLY, nullptr, nullptr));
   if (!blob) {
     return {};
   }

   ScopedHBSubsetInput input(hb_subset_input_create_or_fail());
   if (!input) {
     return {};
   }

   hb_subset_input_set_flags(input.get(), HB_SUBSET_FLAGS_RETAIN_GIDS |
                                              HB_SUBSET_FLAGS_NOTDEF_OUTLINE);

   hb_set_t* glyphs = hb_subset_input_glyph_set(input.get());
   for (uint32_t gid : gids) {
     hb_set_add(glyphs, gid);
   }

   ScopedHBFace face(hb_face_create(blob.get(), 0));
   ScopedHBFace subset_face(hb_subset_or_fail(face.get(), input.get()));
   if (!subset_face) {
     return {};
   }

   ScopedHBBlob subset_blob(hb_face_reference_blob(subset_face.get()));
   unsigned int out_len;
   const char* out_data = hb_blob_get_data(subset_blob.get(), &out_len);
   if (!out_data || out_len == 0) {
     return {};
   }

   // SAFETY: HarfBuzz guarantees the correct length from hb_blob_get_length.
   return DataVector<uint8_t>(out_data, UNSAFE_BUFFERS(out_data + out_len));
 }

 // Returns a font subset name with a tag prefix, replacing existing subset
 // prefixes if necessary. ISO 32000-1:2008 spec, section 9.6.4 "Font Subsets":
 // the font name must begin with a tag followed by a plus sign (+). The tag must
 // consist of six uppercase letters.
 ByteString GenerateFontSubsetName(ByteString base_font_name) {
   // Replace existing font subset tags if necessary.
   MaybeRemoveSubsettedFontPrefix(base_font_name);

   ByteString subset_font_name;
   subset_font_name.Reserve(kSubsettedFontPrefixLength +
                            base_font_name.GetLength() + 1);
   std::array<uint32_t, kSubsettedFontPrefixLength> random_nums;
   FX_Random::Fill(random_nums);
   for (uint32_t num : random_nums) {
     subset_font_name += 'A' + (num % 26);
   }
   subset_font_name += "+";
   subset_font_name += base_font_name;
   return subset_font_name;
 }

 }  // namespace

 CPDF_FontSubsetter::CPDF_FontSubsetter(CPDF_Document* doc) : doc_(doc) {}

 CPDF_FontSubsetter::~CPDF_FontSubsetter() = default;

 std::map<uint32_t, RetainPtr<const CPDF_Object>>
 CPDF_FontSubsetter::GenerateObjectOverrides(
     pdfium::span<const uint32_t> new_obj_nums) {
   if (new_obj_nums.empty()) {
     return {};
   }

   candidates_.clear();
   CollectSubsetCandidates(new_obj_nums);

   std::map<uint32_t, RetainPtr<const CPDF_Object>> overrides;
   for (auto& [obj_num, candidate] : candidates_) {
     auto original_stream_acc =
         pdfium::MakeRetain<CPDF_StreamAcc>(candidate.font_stream);
     original_stream_acc->LoadAllDataFiltered();
     auto original_stream_span = original_stream_acc->GetSpan();

     DataVector<uint8_t> subsetted_font_data =
         GenerateFontSubset(doc_, original_stream_span, candidate.used_gids);
     if (subsetted_font_data.empty()) {
       continue;
     }

     // OpenType fonts containing CFF data have an "OTTO" tag at the start of the
     // file.
     bool is_opentype_cff = false;
     if (original_stream_span.size() > 4) {
       // OpenType fonts use big-endian order.
       uint32_t tag = fxcrt::GetUInt32MSBFirst(original_stream_span.first<4>());
       is_opentype_cff = tag == CFX_FontMapper::MakeTag('O', 'T', 'T', 'O');
     }

     // Override the font file stream.
     // See ISO 32000-1:2008 section 9.9 "Embedded Font Programs" for OpenType
     // CFF font entries.
     auto subsetted_font_dict = pdfium::MakeRetain<CPDF_Dictionary>();
     if (is_opentype_cff) {
       subsetted_font_dict->SetNewFor<CPDF_Name>("Subtype", "OpenType");
     } else {
       // Only Type 1 and TrueType fonts require a Length1 entry.
       subsetted_font_dict->SetNewFor<CPDF_Number>(
           "Length1", static_cast<int>(subsetted_font_data.size()));
     }
     overrides[obj_num] = pdfium::MakeRetain<CPDF_Stream>(
         std::move(subsetted_font_data), std::move(subsetted_font_dict));

     // Override the root font dict.
     RetainPtr<CPDF_Dictionary> new_root_font =
         ToDictionary(candidate.root_font->Clone());
     new_root_font->SetNewFor<CPDF_Name>("BaseFont", candidate.subset_font_name);
     overrides[candidate.root_font->GetObjNum()] = new_root_font;

     // Override the CID font dict if necessary.
     if (candidate.cid_font) {
       RetainPtr<CPDF_Dictionary> new_cid_font =
           ToDictionary(candidate.cid_font->Clone());
       new_cid_font->SetNewFor<CPDF_Name>("BaseFont",
                                          candidate.subset_font_name);
       if (is_opentype_cff) {
         new_cid_font->SetNewFor<CPDF_Name>("Subtype", "CIDFontType0");
       }
       overrides[candidate.cid_font->GetObjNum()] = new_cid_font;

       // Override widths if necessary.
       RetainPtr<const CPDF_Array> original_widths =
           candidate.cid_font->GetArrayFor("W");
       if (original_widths) {
         overrides[original_widths->GetObjNum()] =
             CreateWidthsArray(candidate.char_code_to_width);
       }
     }

     // Override the font descriptor.
     RetainPtr<CPDF_Dictionary> new_descriptor =
         ToDictionary(candidate.descriptor->Clone());
     new_descriptor->SetNewFor<CPDF_Name>("FontName",
                                          candidate.subset_font_name);
     if (is_opentype_cff) {
       // Always set the symbolic flag and remove the nonsymbolic flag. A
       // subsetted font's character set may not be a strict subset of the
       // "standard Latin character set." Furthermore, the mapping (whether GIDs
       // in a simple font or CIDs in a composite font) is unique to the subset.
       // Marking it symbolic prevents PDF readers from applying font
       // substitution strategies that would result in incorrect glyphs. See ISO
       // 32000-1:2008, section 9.8.2 "Font Descriptor Flags".
       int flags = new_descriptor->GetIntegerFor("Flags");
       flags |= 0x04;
       flags &= ~0x20;
       new_descriptor->SetNewFor<CPDF_Number>("Flags", flags);

       new_descriptor->RemoveFor("FontFile2");
       new_descriptor->SetNewFor<CPDF_Reference>("FontFile3", doc_, obj_num);
     }
     overrides[candidate.descriptor->GetObjNum()] = new_descriptor;

     // Override ToUnicode.
     RetainPtr<const CPDF_Stream> to_unicode =
         candidate.root_font->GetStreamFor("ToUnicode");
     if (to_unicode) {
       overrides[to_unicode->GetObjNum()] =
           LoadUnicode(candidate.char_code_to_unicode);
     }
   }
   return overrides;
 }

 CPDF_FontSubsetter::SubsetCandidate::SubsetCandidate() = default;

 CPDF_FontSubsetter::SubsetCandidate::~SubsetCandidate() = default;

 void CPDF_FontSubsetter::CollectSubsetCandidates(
     pdfium::span<const uint32_t> new_obj_nums) {
   for (int i = 0; i < doc_->GetPageCount(); ++i) {
     RetainPtr<CPDF_Dictionary> page_dict = doc_->GetMutablePageDictionary(i);
     if (!page_dict) {
       continue;
     }

     auto page = pdfium::MakeRetain<CPDF_Page>(doc_, std::move(page_dict));
     page->ParseContent();
     CollectSubsetCandidatesFromPage(page, new_obj_nums);
   }
 }

 void CPDF_FontSubsetter::CollectSubsetCandidatesFromPage(
     CPDF_Page* page,
     pdfium::span<const uint32_t> new_obj_nums) {
   for (const auto& page_obj : *page) {
     const CPDF_TextObject* text = page_obj->AsText();
     if (!text) {
       continue;
     }

     RetainPtr<CPDF_Font> font = text->GetFont();
     RetainPtr<const CPDF_Dictionary> root_font = font->GetFontDict();
     if (!std::ranges::binary_search(new_obj_nums, root_font->GetObjNum())) {
       continue;
     }

     RetainPtr<const CPDF_Dictionary> cid_font;
     RetainPtr<const CPDF_Dictionary> descriptor;
     if (font->IsCIDFont()) {
       RetainPtr<const CPDF_Array> descendants =
           root_font->GetArrayFor("DescendantFonts");
       CHECK(descendants);
       cid_font = descendants->GetDictAt(0);
       CHECK(cid_font);
       descriptor = cid_font->GetDictFor("FontDescriptor");
     } else {
       descriptor = root_font->GetDictFor("FontDescriptor");
     }
     if (!descriptor) {
       continue;
     }

     // Internally, all embedded font file streams are set as "FontFile" or
     // "FontFile2". HarfBuzz does not support subsetting Type 1 fonts, so fonts
     // with "FontFile" are ignored.
     RetainPtr<const CPDF_Stream> font_stream =
         descriptor->GetStreamFor("FontFile2");
     if (!font_stream) {
       continue;
     }

     uint32_t obj_num = font_stream->GetObjNum();
     auto& candidate = candidates_[obj_num];
     if (!candidate.font_stream) {
       candidate.subset_font_name =
           GenerateFontSubsetName(font->GetBaseFontName());
       candidate.font_stream = font_stream;
       candidate.root_font = root_font;
       candidate.cid_font = cid_font;
       candidate.descriptor = descriptor;
     }

     bool subset_widths = cid_font && cid_font->GetArrayFor("W");
     AddUsedText(text, candidate, subset_widths);
   }
 }

 void CPDF_FontSubsetter::AddUsedText(const CPDF_TextObject* text,
                                      SubsetCandidate& candidate,
                                      bool subset_widths) {
   CPDF_Font* font = text->GetFont();
   const std::vector<uint32_t>& char_codes = text->GetCharCodes();
   std::set<uint32_t>& used_gids = candidate.used_gids;
   for (uint32_t char_code : char_codes) {
     int gid = font->GlyphFromCharCode(char_code, /*pVertGlyph=*/nullptr);
     if (gid != -1) {
       used_gids.insert(static_cast<uint32_t>(gid));
     }

     if (subset_widths) {
       int width = font->GetCharWidth(char_code);
       if (width >= 0) {
         candidate.char_code_to_width[char_code] = static_cast<uint32_t>(width);
       }
     }

     WideString unicode = font->UnicodeFromCharCode(char_code);
     if (!unicode.IsEmpty()) {
       candidate.char_code_to_unicode.emplace(char_code,
                                              static_cast<uint32_t>(unicode[0]));
     }
   }
 }
	// Copyright 2026 The PDFium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "core/fpdfapi/edit/cpdf_fontsubsetter.h"

	#include <hb-subset.h>
	#include <hb.h>
	#include <stdint.h>

	#include <algorithm>
	#include <array>
	#include <map>
	#include <memory>
	#include <set>
	#include <utility>
	#include <vector>

	#include "core/fpdfapi/edit/cpdf_font_util.h"
	#include "core/fpdfapi/font/cpdf_font.h"
	#include "core/fpdfapi/page/cpdf_page.h"
	#include "core/fpdfapi/page/cpdf_textobject.h"
	#include "core/fpdfapi/parser/cpdf_array.h"
	#include "core/fpdfapi/parser/cpdf_dictionary.h"
	#include "core/fpdfapi/parser/cpdf_document.h"
	#include "core/fpdfapi/parser/cpdf_name.h"
	#include "core/fpdfapi/parser/cpdf_number.h"
	#include "core/fpdfapi/parser/cpdf_reference.h"
	#include "core/fpdfapi/parser/cpdf_stream.h"
	#include "core/fpdfapi/parser/cpdf_stream_acc.h"
	#include "core/fxcrt/byteorder.h"
	#include "core/fxcrt/bytestring.h"
	#include "core/fxcrt/check.h"
	#include "core/fxcrt/compiler_specific.h"
	#include "core/fxcrt/data_vector.h"
	#include "core/fxcrt/fx_extension.h"
	#include "core/fxcrt/fx_random.h"
	#include "core/fxcrt/retain_ptr.h"
	#include "core/fxcrt/span.h"
	#include "core/fxcrt/widestring.h"
	#include "core/fxge/cfx_fontmapper.h"
	#include "core/fxge/fx_font.h"

	namespace {

	template <auto DestroyFunction>
	struct HBDeleter {
	template <typename T>
	void operator()(T* ptr) const {
	DestroyFunction(ptr);
	}
	};

	using ScopedHBBlob = std::unique_ptr<hb_blob_t, HBDeleter<hb_blob_destroy>>;
	using ScopedHBFace = std::unique_ptr<hb_face_t, HBDeleter<hb_face_destroy>>;
	using ScopedHBSubsetInput =
	std::unique_ptr<hb_subset_input_t, HBDeleter<hb_subset_input_destroy>>;

	DataVector<uint8_t> GenerateFontSubset(CPDF_Document* doc,
	pdfium::span<const uint8_t> font_data,
	const std::set<uint32_t>& gids) {
	// Wrap the data.
	ScopedHBBlob blob(
	hb_blob_create_or_fail(reinterpret_cast<const char*>(font_data.data()),
	static_cast<uint32_t>(font_data.size()),
	HB_MEMORY_MODE_READONLY, nullptr, nullptr));
	if (!blob) {
	return {};
	}

	ScopedHBSubsetInput input(hb_subset_input_create_or_fail());
	if (!input) {
	return {};
	}

	hb_subset_input_set_flags(input.get(), HB_SUBSET_FLAGS_RETAIN_GIDS \|
	HB_SUBSET_FLAGS_NOTDEF_OUTLINE);

	hb_set_t* glyphs = hb_subset_input_glyph_set(input.get());
	for (uint32_t gid : gids) {
	hb_set_add(glyphs, gid);
	}

	ScopedHBFace face(hb_face_create(blob.get(), 0));
	ScopedHBFace subset_face(hb_subset_or_fail(face.get(), input.get()));
	if (!subset_face) {
	return {};
	}

	ScopedHBBlob subset_blob(hb_face_reference_blob(subset_face.get()));
	unsigned int out_len;
	const char* out_data = hb_blob_get_data(subset_blob.get(), &out_len);
	if (!out_data \|\| out_len == 0) {
	return {};
	}

	// SAFETY: HarfBuzz guarantees the correct length from hb_blob_get_length.
	return DataVector<uint8_t>(out_data, UNSAFE_BUFFERS(out_data + out_len));
	}

	// Returns a font subset name with a tag prefix, replacing existing subset
	// prefixes if necessary. ISO 32000-1:2008 spec, section 9.6.4 "Font Subsets":
	// the font name must begin with a tag followed by a plus sign (+). The tag must
	// consist of six uppercase letters.
	ByteString GenerateFontSubsetName(ByteString base_font_name) {
	// Replace existing font subset tags if necessary.
	MaybeRemoveSubsettedFontPrefix(base_font_name);

	ByteString subset_font_name;
	subset_font_name.Reserve(kSubsettedFontPrefixLength +
	base_font_name.GetLength() + 1);
	std::array<uint32_t, kSubsettedFontPrefixLength> random_nums;
	FX_Random::Fill(random_nums);
	for (uint32_t num : random_nums) {
	subset_font_name += 'A' + (num % 26);
	}
	subset_font_name += "+";
	subset_font_name += base_font_name;
	return subset_font_name;
	}

	} // namespace

	CPDF_FontSubsetter::CPDF_FontSubsetter(CPDF_Document* doc) : doc_(doc) {}

	CPDF_FontSubsetter::~CPDF_FontSubsetter() = default;

	std::map<uint32_t, RetainPtr<const CPDF_Object>>
	CPDF_FontSubsetter::GenerateObjectOverrides(
	pdfium::span<const uint32_t> new_obj_nums) {
	if (new_obj_nums.empty()) {
	return {};
	}

	candidates_.clear();
	CollectSubsetCandidates(new_obj_nums);

	std::map<uint32_t, RetainPtr<const CPDF_Object>> overrides;
	for (auto& [obj_num, candidate] : candidates_) {
	auto original_stream_acc =
	pdfium::MakeRetain<CPDF_StreamAcc>(candidate.font_stream);
	original_stream_acc->LoadAllDataFiltered();
	auto original_stream_span = original_stream_acc->GetSpan();

	DataVector<uint8_t> subsetted_font_data =
	GenerateFontSubset(doc_, original_stream_span, candidate.used_gids);
	if (subsetted_font_data.empty()) {
	continue;
	}

	// OpenType fonts containing CFF data have an "OTTO" tag at the start of the
	// file.
	bool is_opentype_cff = false;
	if (original_stream_span.size() > 4) {
	// OpenType fonts use big-endian order.
	uint32_t tag = fxcrt::GetUInt32MSBFirst(original_stream_span.first<4>());
	is_opentype_cff = tag == CFX_FontMapper::MakeTag('O', 'T', 'T', 'O');
	}

	// Override the font file stream.
	// See ISO 32000-1:2008 section 9.9 "Embedded Font Programs" for OpenType
	// CFF font entries.
	auto subsetted_font_dict = pdfium::MakeRetain<CPDF_Dictionary>();
	if (is_opentype_cff) {
	subsetted_font_dict->SetNewFor<CPDF_Name>("Subtype", "OpenType");
	} else {
	// Only Type 1 and TrueType fonts require a Length1 entry.
	subsetted_font_dict->SetNewFor<CPDF_Number>(
	"Length1", static_cast<int>(subsetted_font_data.size()));
	}
	overrides[obj_num] = pdfium::MakeRetain<CPDF_Stream>(
	std::move(subsetted_font_data), std::move(subsetted_font_dict));

	// Override the root font dict.
	RetainPtr<CPDF_Dictionary> new_root_font =
	ToDictionary(candidate.root_font->Clone());
	new_root_font->SetNewFor<CPDF_Name>("BaseFont", candidate.subset_font_name);
	overrides[candidate.root_font->GetObjNum()] = new_root_font;

	// Override the CID font dict if necessary.
	if (candidate.cid_font) {
	RetainPtr<CPDF_Dictionary> new_cid_font =
	ToDictionary(candidate.cid_font->Clone());
	new_cid_font->SetNewFor<CPDF_Name>("BaseFont",
	candidate.subset_font_name);
	if (is_opentype_cff) {
	new_cid_font->SetNewFor<CPDF_Name>("Subtype", "CIDFontType0");
	}
	overrides[candidate.cid_font->GetObjNum()] = new_cid_font;

	// Override widths if necessary.
	RetainPtr<const CPDF_Array> original_widths =
	candidate.cid_font->GetArrayFor("W");
	if (original_widths) {
	overrides[original_widths->GetObjNum()] =
	CreateWidthsArray(candidate.char_code_to_width);
	}
	}

	// Override the font descriptor.
	RetainPtr<CPDF_Dictionary> new_descriptor =
	ToDictionary(candidate.descriptor->Clone());
	new_descriptor->SetNewFor<CPDF_Name>("FontName",
	candidate.subset_font_name);
	if (is_opentype_cff) {
	// Always set the symbolic flag and remove the nonsymbolic flag. A
	// subsetted font's character set may not be a strict subset of the
	// "standard Latin character set." Furthermore, the mapping (whether GIDs
	// in a simple font or CIDs in a composite font) is unique to the subset.
	// Marking it symbolic prevents PDF readers from applying font
	// substitution strategies that would result in incorrect glyphs. See ISO
	// 32000-1:2008, section 9.8.2 "Font Descriptor Flags".
	int flags = new_descriptor->GetIntegerFor("Flags");
	flags \|= 0x04;
	flags &= ~0x20;
	new_descriptor->SetNewFor<CPDF_Number>("Flags", flags);

	new_descriptor->RemoveFor("FontFile2");
	new_descriptor->SetNewFor<CPDF_Reference>("FontFile3", doc_, obj_num);
	}
	overrides[candidate.descriptor->GetObjNum()] = new_descriptor;

	// Override ToUnicode.
	RetainPtr<const CPDF_Stream> to_unicode =
	candidate.root_font->GetStreamFor("ToUnicode");
	if (to_unicode) {
	overrides[to_unicode->GetObjNum()] =
	LoadUnicode(candidate.char_code_to_unicode);
	}
	}
	return overrides;
	}

	CPDF_FontSubsetter::SubsetCandidate::SubsetCandidate() = default;

	CPDF_FontSubsetter::SubsetCandidate::~SubsetCandidate() = default;

	void CPDF_FontSubsetter::CollectSubsetCandidates(
	pdfium::span<const uint32_t> new_obj_nums) {
	for (int i = 0; i < doc_->GetPageCount(); ++i) {
	RetainPtr<CPDF_Dictionary> page_dict = doc_->GetMutablePageDictionary(i);
	if (!page_dict) {
	continue;
	}

	auto page = pdfium::MakeRetain<CPDF_Page>(doc_, std::move(page_dict));
	page->ParseContent();
	CollectSubsetCandidatesFromPage(page, new_obj_nums);
	}
	}

	void CPDF_FontSubsetter::CollectSubsetCandidatesFromPage(
	CPDF_Page* page,
	pdfium::span<const uint32_t> new_obj_nums) {
	for (const auto& page_obj : *page) {
	const CPDF_TextObject* text = page_obj->AsText();
	if (!text) {
	continue;
	}

	RetainPtr<CPDF_Font> font = text->GetFont();
	RetainPtr<const CPDF_Dictionary> root_font = font->GetFontDict();
	if (!std::ranges::binary_search(new_obj_nums, root_font->GetObjNum())) {
	continue;
	}

	RetainPtr<const CPDF_Dictionary> cid_font;
	RetainPtr<const CPDF_Dictionary> descriptor;
	if (font->IsCIDFont()) {
	RetainPtr<const CPDF_Array> descendants =
	root_font->GetArrayFor("DescendantFonts");
	CHECK(descendants);
	cid_font = descendants->GetDictAt(0);
	CHECK(cid_font);
	descriptor = cid_font->GetDictFor("FontDescriptor");
	} else {
	descriptor = root_font->GetDictFor("FontDescriptor");
	}
	if (!descriptor) {
	continue;
	}

	// Internally, all embedded font file streams are set as "FontFile" or
	// "FontFile2". HarfBuzz does not support subsetting Type 1 fonts, so fonts
	// with "FontFile" are ignored.
	RetainPtr<const CPDF_Stream> font_stream =
	descriptor->GetStreamFor("FontFile2");
	if (!font_stream) {
	continue;
	}

	uint32_t obj_num = font_stream->GetObjNum();
	auto& candidate = candidates_[obj_num];
	if (!candidate.font_stream) {
	candidate.subset_font_name =
	GenerateFontSubsetName(font->GetBaseFontName());
	candidate.font_stream = font_stream;
	candidate.root_font = root_font;
	candidate.cid_font = cid_font;
	candidate.descriptor = descriptor;
	}

	bool subset_widths = cid_font && cid_font->GetArrayFor("W");
	AddUsedText(text, candidate, subset_widths);
	}
	}

	void CPDF_FontSubsetter::AddUsedText(const CPDF_TextObject* text,
	SubsetCandidate& candidate,
	bool subset_widths) {
	CPDF_Font* font = text->GetFont();
	const std::vector<uint32_t>& char_codes = text->GetCharCodes();
	std::set<uint32_t>& used_gids = candidate.used_gids;
	for (uint32_t char_code : char_codes) {
	int gid = font->GlyphFromCharCode(char_code, /pVertGlyph=/nullptr);
	if (gid != -1) {
	used_gids.insert(static_cast<uint32_t>(gid));
	}

	if (subset_widths) {
	int width = font->GetCharWidth(char_code);
	if (width >= 0) {
	candidate.char_code_to_width[char_code] = static_cast<uint32_t>(width);
	}
	}

	WideString unicode = font->UnicodeFromCharCode(char_code);
	if (!unicode.IsEmpty()) {
	candidate.char_code_to_unicode.emplace(char_code,
	static_cast<uint32_t>(unicode[0]));
	}
	}
	}