Improve looping behavior in CPDF_Parser::LoadCrossRefV5().

1) Stop parsing earlier in CPDF_Parser::LoadCrossRefV5() on error.

The object numbers in the /XRef object are in sequential order. So if a
bad object number has been detected, all subsequent object numbers are
also bad. Thus it is better to stop parsing and just call it quits.

2) Continue parsing when an invalid archive object number has been
   detected.

There is no reason to exit the method entirely when this happens. Be
consistent with most other error handlers and continue.

Also rename `objnum` to `obj_num` to be consistent with other variables
in the method.

Change-Id: Ic2f3342c692ecc3d39999bd8175ed68898e9c037
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86150
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 3e456e8..fffcac4 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -753,6 +753,10 @@
       continue;
 
     for (uint32_t i = 0; i < index.obj_count; i++) {
+      const uint32_t obj_num = index.start_obj_num + i;
+      if (obj_num >= CPDF_Parser::kMaxObjectNumber)
+        break;
+
       ObjectType type = ObjectType::kNotCompressed;
       pdfium::span<const uint8_t> entry_span =
           seg_span.subspan(i * total_width, total_width);
@@ -764,16 +768,12 @@
           continue;
       }
 
-      const uint32_t objnum = index.start_obj_num + i;
-      if (objnum >= CPDF_Parser::kMaxObjectNumber)
-        continue;
-
-      const ObjectType existing_type = GetObjectType(objnum);
+      const ObjectType existing_type = GetObjectType(obj_num);
       if (existing_type == ObjectType::kNull) {
         uint32_t offset =
             GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
         if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
-          m_CrossRefTable->AddNormal(objnum, 0, offset);
+          m_CrossRefTable->AddNormal(obj_num, 0, offset);
         continue;
       }
 
@@ -781,7 +781,7 @@
         continue;
 
       if (type == ObjectType::kFree) {
-        m_CrossRefTable->SetFree(objnum);
+        m_CrossRefTable->SetFree(obj_num);
         continue;
       }
 
@@ -790,16 +790,16 @@
       if (type == ObjectType::kNotCompressed) {
         const uint32_t offset = entry_value;
         if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
-          m_CrossRefTable->AddNormal(objnum, 0, offset);
+          m_CrossRefTable->AddNormal(obj_num, 0, offset);
         continue;
       }
 
       DCHECK_EQ(type, ObjectType::kCompressed);
       const uint32_t archive_obj_num = entry_value;
       if (!IsValidObjectNumber(archive_obj_num))
-        return false;
+        continue;
 
-      m_CrossRefTable->AddCompressed(objnum, archive_obj_num);
+      m_CrossRefTable->AddCompressed(obj_num, archive_obj_num);
     }
     segindex += index.obj_count;
   }
diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
index 65f00f9..8f21229 100644
--- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp
@@ -437,19 +437,14 @@
   const auto& objects_info = parser.GetCrossRefTable()->objects_info();
   EXPECT_EQ(2u, objects_info.size());
 
-  // Found by rebuilding the xref table. There are no other objects besides this
-  // and the placeholder, because CPDF_Parser currently stops parsing altogether
-  // with a failure when it encounters an invalid archive object number. Thus
-  // triggering the rebuild.
-  auto rebuilt_object_it = objects_info.find(7);
-  ASSERT_NE(rebuilt_object_it, objects_info.end());
-  EXPECT_EQ(CPDF_Parser::ObjectType::kNormal, rebuilt_object_it->second.type);
-  EXPECT_EQ(14, rebuilt_object_it->second.pos);
+  // Skip over the first object, and continue parsing the remaining objects.
+  auto second_object_it = objects_info.find(1);
+  ASSERT_NE(second_object_it, objects_info.end());
+  EXPECT_EQ(CPDF_Parser::ObjectType::kNormal, second_object_it->second.type);
+  EXPECT_EQ(15, second_object_it->second.pos);
 
-  // TODO(thestig): Should the xref table contain object 2?
-  // Consider reworking CPDF_Parser's object representation to avoid having to
-  // store this placeholder object.
-  auto placeholder_object_it = objects_info.find(2);
-  ASSERT_NE(placeholder_object_it, objects_info.end());
-  EXPECT_EQ(CPDF_Parser::ObjectType::kFree, placeholder_object_it->second.type);
+  auto third_object_it = objects_info.find(2);
+  ASSERT_NE(third_object_it, objects_info.end());
+  EXPECT_EQ(CPDF_Parser::ObjectType::kNormal, third_object_it->second.type);
+  EXPECT_EQ(18, third_object_it->second.pos);
 }