Improve FPDFLink_LoadWebLinks() performance.

Internally, FPDFLink_LoadWebLinks() calls CPDF_LinkExtract::ParseLink(),
which in turn calls CPDF_TextPage::GetPageText() repeatedly to get
sub-strings from CPDF_TextPage. GetPageText() has to do many repeated
calculations each time it is called, so this makes ParseLink() really
slow for pages with lots of text.

Avoud this problem by calling CPDF_TextPage::GetAllPageText() once to
extract the text. Then use WideString::Mid() to get the desired
sub-strings.

Bug: b/146845409
Change-Id: I5792436d32cd158401f01bd67fca34baca7dfbeb
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65310
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 7f66832..91c7e4b 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -128,6 +128,7 @@
   int nTotalChar = m_pTextPage->CountChars();
   bool bAfterHyphen = false;
   bool bLineBreak = false;
+  const WideString page_text = m_pTextPage->GetAllPageText();
   while (pos < nTotalChar) {
     FPDF_CHAR_INFO pageChar;
     m_pTextPage->GetCharInfo(pos, &pageChar);
@@ -143,8 +144,7 @@
         pos++;
         continue;
       }
-      WideString strBeCheck;
-      strBeCheck = m_pTextPage->GetPageText(start, nCount);
+      WideString strBeCheck = page_text.Mid(start, nCount);
       if (bLineBreak) {
         strBeCheck.Remove(TEXT_LINEFEED_CHAR);
         strBeCheck.Remove(TEXT_RETURN_CHAR);