Rearrange CPDF_LinkExtract::ExtractLinks(). Use early breaks/continues to reduce nesting. Change-Id: I824d4b93fbbb572dad14f2824a33e8fd203c6167 Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65351 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>

commit: c3e55aa23f888aaf9334a3fd35c1f49a3179869c [log] [tgz]
author: Lei Zhang <thestig@chromium.org> Tue Jan 21 21:40:01 2020 +0000
committer: Chromium commit bot <commit-bot@chromium.org> Tue Jan 21 21:40:01 2020 +0000
tree: 2d8283a29f9573989e5e31cb5c06dc7a0d427d90
parent: edfd85442cd6bd5aeae064863a00a9cacdd63b9f [diff]
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 53006b7..fcd1d95 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp

@@ -117,64 +117,66 @@
 
   int start = 0;
   int pos = 0;
-  int nTotalChar = m_pTextPage->CountChars();
   bool bAfterHyphen = false;
   bool bLineBreak = false;
+  const int nTotalChar = m_pTextPage->CountChars();
   const WideString page_text = m_pTextPage->GetAllPageText();
   while (pos < nTotalChar) {
     FPDF_CHAR_INFO pageChar;
     m_pTextPage->GetCharInfo(pos, &pageChar);
-    if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED ||
-        pageChar.m_Unicode == TEXT_SPACE_CHAR || pos == nTotalChar - 1) {
-      int nCount = pos - start;
-      if (pos == nTotalChar - 1) {
-        nCount++;
-      } else if (bAfterHyphen && (pageChar.m_Unicode == TEXT_LINEFEED_CHAR ||
-                                  pageChar.m_Unicode == TEXT_RETURN_CHAR)) {
-        // Handle text breaks with a hyphen to the next line.
-        bLineBreak = true;
-        pos++;
-        continue;
-      }
-      WideString strBeCheck = page_text.Mid(start, nCount);
-      if (bLineBreak) {
-        strBeCheck.Remove(TEXT_LINEFEED_CHAR);
-        strBeCheck.Remove(TEXT_RETURN_CHAR);
-        bLineBreak = false;
-      }
-      // Replace the generated code with the hyphen char.
-      strBeCheck.Replace(L"\xfffe", TEXT_HYPHEN);
-
-      if (strBeCheck.GetLength() > 5) {
-        while (strBeCheck.GetLength() > 0) {
-          wchar_t ch = strBeCheck[strBeCheck.GetLength() - 1];
-          if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
-            strBeCheck = strBeCheck.Left(strBeCheck.GetLength() - 1);
-            nCount--;
-          } else {
-            break;
-          }
-        }
-        // Check for potential web URLs and email addresses.
-        // Ftp address, file system links, data, blob etc. are not checked.
-        if (nCount > 5) {
-          int32_t nStartOffset;
-          int32_t nCountOverload;
-          if (CheckWebLink(&strBeCheck, &nStartOffset, &nCountOverload)) {
-            m_LinkArray.push_back(
-                {start + nStartOffset, nCountOverload, strBeCheck});
-          } else if (CheckMailLink(&strBeCheck)) {
-            m_LinkArray.push_back({start, nCount, strBeCheck});
-          }
-        }
-      }
-      start = ++pos;
-    } else {
+    if (pageChar.m_Flag != FPDFTEXT_CHAR_GENERATED &&
+        pageChar.m_Unicode != TEXT_SPACE_CHAR && pos != nTotalChar - 1) {
       bAfterHyphen = (pageChar.m_Flag == FPDFTEXT_CHAR_HYPHEN ||
                       (pageChar.m_Flag == FPDFTEXT_CHAR_NORMAL &&
                        pageChar.m_Unicode == TEXT_HYPHEN_CHAR));
-      pos++;
+      ++pos;
+      continue;
     }
+
+    int nCount = pos - start;
+    if (pos == nTotalChar - 1) {
+      ++nCount;
+    } else if (bAfterHyphen && (pageChar.m_Unicode == TEXT_LINEFEED_CHAR ||
+                                pageChar.m_Unicode == TEXT_RETURN_CHAR)) {
+      // Handle text breaks with a hyphen to the next line.
+      bLineBreak = true;
+      ++pos;
+      continue;
+    }
+
+    WideString strBeCheck = page_text.Mid(start, nCount);
+    if (bLineBreak) {
+      strBeCheck.Remove(TEXT_LINEFEED_CHAR);
+      strBeCheck.Remove(TEXT_RETURN_CHAR);
+      bLineBreak = false;
+    }
+    // Replace the generated code with the hyphen char.
+    strBeCheck.Replace(L"\xfffe", TEXT_HYPHEN);
+
+    if (strBeCheck.GetLength() > 5) {
+      while (strBeCheck.GetLength() > 0) {
+        wchar_t ch = strBeCheck[strBeCheck.GetLength() - 1];
+        if (ch != L')' && ch != L',' && ch != L'>' && ch != L'.')
+          break;
+
+        strBeCheck = strBeCheck.Left(strBeCheck.GetLength() - 1);
+        nCount--;
+      }
+
+      // Check for potential web URLs and email addresses.
+      // Ftp address, file system links, data, blob etc. are not checked.
+      if (nCount > 5) {
+        int32_t nStartOffset;
+        int32_t nCountOverload;
+        if (CheckWebLink(&strBeCheck, &nStartOffset, &nCountOverload)) {
+          m_LinkArray.push_back(
+              {start + nStartOffset, nCountOverload, strBeCheck});
+        } else if (CheckMailLink(&strBeCheck)) {
+          m_LinkArray.push_back({start, nCount, strBeCheck});
+        }
+      }
+    }
+    start = ++pos;
   }
 }
commit	c3e55aa23f888aaf9334a3fd35c1f49a3179869c	[log] [tgz]
author	Lei Zhang <thestig@chromium.org>	Tue Jan 21 21:40:01 2020 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	Tue Jan 21 21:40:01 2020 +0000
tree	2d8283a29f9573989e5e31cb5c06dc7a0d427d90
parent	edfd85442cd6bd5aeae064863a00a9cacdd63b9f [diff]