API to return text range for weblinks

PDF text is parsed to find links using regex matching. These are not
links which are present as annotations in the PDF but are still
considered links (categorized as Auto-Detected links).
While parsing the links PDFium identifies the start char index and char
count of the detected link.

This CL exposes a public method to get start char index and char count
of such auto detected links. This is required for accessibility to
generate the relative mapping of links and text.

This CL adds a unit test to cover both cases: asking the text range for
a valid link and an invalid link.

Bug: pdfium:1334
Change-Id: I6d98b35eb9c6e7fcf4d800776b9129588fce0e09
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/56810
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index 37e8dfa..12b09a7 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -582,6 +582,54 @@
   UnloadPage(page);
 }
 
+TEST_F(FPDFTextEmbedderTest, WebLinksCharRanges) {
+  ASSERT_TRUE(OpenDocument("weblinks.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
+  ASSERT_TRUE(text_page);
+
+  FPDF_PAGELINK page_link = FPDFLink_LoadWebLinks(text_page);
+  EXPECT_TRUE(page_link);
+
+  // Test for char indices of a valid link
+  int start_char_index;
+  int char_count;
+  ASSERT_TRUE(
+      FPDFLink_GetTextRange(page_link, 0, &start_char_index, &char_count));
+  EXPECT_EQ(35, start_char_index);
+  EXPECT_EQ(24, char_count);
+
+  // Test for char indices of an invalid link
+  start_char_index = -10;
+  char_count = -8;
+  ASSERT_FALSE(
+      FPDFLink_GetTextRange(page_link, 6, &start_char_index, &char_count));
+  EXPECT_EQ(start_char_index, -10);
+  EXPECT_EQ(char_count, -8);
+
+  // Test for pagelink = nullptr
+  start_char_index = -10;
+  char_count = -8;
+  ASSERT_FALSE(
+      FPDFLink_GetTextRange(nullptr, 0, &start_char_index, &char_count));
+  EXPECT_EQ(start_char_index, -10);
+  EXPECT_EQ(char_count, -8);
+
+  // Test for link_index < 0
+  start_char_index = -10;
+  char_count = -8;
+  ASSERT_FALSE(
+      FPDFLink_GetTextRange(page_link, -4, &start_char_index, &char_count));
+  EXPECT_EQ(start_char_index, -10);
+  EXPECT_EQ(char_count, -8);
+
+  FPDFLink_CloseWebLinks(page_link);
+  FPDFText_ClosePage(text_page);
+  UnloadPage(page);
+}
+
 TEST_F(FPDFTextEmbedderTest, GetFontSize) {
   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
   FPDF_PAGE page = LoadPage(0);