Add tests for CXFA_Document's parsing of "Use" expressions.

Split parsing into static methods that operate on strings so
they can be tested without creating the whole gc'd environment.

-- handle and test empty strings just because.
-- use early returns where possible.
-- move one conditional into only branch where it can run.

Change-Id: I90c14486585007d5636953660736c6b958fffa91
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/86791
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/xfa/fxfa/parser/cxfa_document.cpp b/xfa/fxfa/parser/cxfa_document.cpp
index b9e69f2..4b2c25b 100644
--- a/xfa/fxfa/parser/cxfa_document.cpp
+++ b/xfa/fxfa/parser/cxfa_document.cpp
@@ -1554,36 +1554,14 @@
     WideStringView wsURI;
     WideStringView wsID;
     WideStringView wsSOM;
-
     if (!wsUseVal.IsEmpty()) {
-      auto uSharpPos = wsUseVal.Find('#');
-      if (!uSharpPos.has_value()) {
-        wsURI = wsUseVal.AsStringView();
-      } else {
-        wsURI = WideStringView(wsUseVal.c_str(), uSharpPos.value());
-        size_t uLen = wsUseVal.GetLength();
-        if (uLen >= uSharpPos.value() + 5 &&
-            WideStringView(wsUseVal.c_str() + uSharpPos.value(), 5) ==
-                L"#som(" &&
-            wsUseVal[uLen - 1] == ')') {
-          wsSOM = WideStringView(wsUseVal.c_str() + uSharpPos.value() + 5,
-                                 uLen - 1 - uSharpPos.value() - 5);
-        } else {
-          wsID = WideStringView(wsUseVal.c_str() + uSharpPos.value() + 1,
-                                uLen - uSharpPos.value() - 1);
-        }
-      }
+      ParseUseHref(wsUseVal, wsURI, wsID, wsSOM);
+      if (!wsURI.IsEmpty() && !wsURI.EqualsASCII("."))
+        continue;
     } else {
       wsUseVal = pUseHrefNode->JSObject()->GetCData(XFA_Attribute::Use);
-      if (!wsUseVal.IsEmpty()) {
-        if (wsUseVal[0] == '#')
-          wsID = WideStringView(wsUseVal.c_str() + 1, wsUseVal.GetLength() - 1);
-        else
-          wsSOM = WideStringView(wsUseVal.c_str(), wsUseVal.GetLength());
-      }
+      ParseUse(wsUseVal, wsID, wsSOM);
     }
-    if (!wsURI.IsEmpty() && !wsURI.EqualsASCII("."))
-      continue;
 
     CXFA_Node* pProtoNode = nullptr;
     if (!wsSOM.IsEmpty()) {
@@ -1612,6 +1590,46 @@
   }
 }
 
+// static
+void CXFA_Document::ParseUseHref(const WideString& wsUseVal,
+                                 WideStringView& wsURI,
+                                 WideStringView& wsID,
+                                 WideStringView& wsSOM) {
+  if (wsUseVal.IsEmpty())
+    return;
+
+  auto uSharpPos = wsUseVal.Find('#');
+  if (!uSharpPos.has_value()) {
+    wsURI = wsUseVal.AsStringView();
+    return;
+  }
+  wsURI = WideStringView(wsUseVal.c_str(), uSharpPos.value());
+  size_t uLen = wsUseVal.GetLength();
+  if (uLen >= uSharpPos.value() + 5 &&
+      WideStringView(wsUseVal.c_str() + uSharpPos.value(), 5) == L"#som(" &&
+      wsUseVal[uLen - 1] == ')') {
+    wsSOM = WideStringView(wsUseVal.c_str() + uSharpPos.value() + 5,
+                           uLen - 1 - uSharpPos.value() - 5);
+    return;
+  }
+  wsID = WideStringView(wsUseVal.c_str() + uSharpPos.value() + 1,
+                        uLen - uSharpPos.value() - 1);
+}
+
+// static
+void CXFA_Document::ParseUse(const WideString& wsUseVal,
+                             WideStringView& wsID,
+                             WideStringView& wsSOM) {
+  if (wsUseVal.IsEmpty())
+    return;
+
+  if (wsUseVal[0] == '#') {
+    wsID = WideStringView(wsUseVal.c_str() + 1, wsUseVal.GetLength() - 1);
+    return;
+  }
+  wsSOM = WideStringView(wsUseVal.c_str(), wsUseVal.GetLength());
+}
+
 CXFA_Node* CXFA_Document::DataMerge_CopyContainer(CXFA_Node* pTemplateNode,
                                                   CXFA_Node* pFormNode,
                                                   CXFA_Node* pDataScope,
diff --git a/xfa/fxfa/parser/cxfa_document.h b/xfa/fxfa/parser/cxfa_document.h
index f7e2a537..add7b6f 100644
--- a/xfa/fxfa/parser/cxfa_document.h
+++ b/xfa/fxfa/parser/cxfa_document.h
@@ -147,8 +147,17 @@
 
  private:
   friend class CXFA_DocumentTest_ParseXFAVersion_Test;
+  friend class CXFA_DocumentTest_ParseUseHref_Test;
+  friend class CXFA_DocumentTest_ParseUse_Test;
 
   static XFA_VERSION ParseXFAVersion(const WideString& wsTemplateNS);
+  static void ParseUseHref(const WideString& wsUseVal,
+                           WideStringView& wsURI,
+                           WideStringView& wsID,
+                           WideStringView& wsSOM);
+  static void ParseUse(const WideString& wsUseVal,
+                       WideStringView& wsID,
+                       WideStringView& wsSOM);
 
   CXFA_Document(CXFA_FFNotify* notify,
                 cppgc::Heap* heap,
diff --git a/xfa/fxfa/parser/cxfa_document_unittest.cpp b/xfa/fxfa/parser/cxfa_document_unittest.cpp
index 43872d1..25d0d80 100644
--- a/xfa/fxfa/parser/cxfa_document_unittest.cpp
+++ b/xfa/fxfa/parser/cxfa_document_unittest.cpp
@@ -81,3 +81,94 @@
   EXPECT_EQ(317, CXFA_Document::ParseXFAVersion(
                      L"http://www.xfa.org/schema/xfa-template/3.17"));
 }
+
+TEST(CXFA_DocumentTest, ParseUseHref) {
+  {
+    WideString wsEmpty;  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsEmpty, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"", wsURI);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsNoSharp(L"url-part-only");  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsNoSharp, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"url-part-only", wsURI);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsNoSom(L"url-part#frag");  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsNoSom, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"url-part", wsURI);
+    EXPECT_EQ(L"frag", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsIncompleteSom(L"url-part#som(");  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsIncompleteSom, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"url-part", wsURI);
+    EXPECT_EQ(L"som(", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsEmptySom(L"url-part#som()");  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsEmptySom, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"url-part", wsURI);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsHasSom(
+        L"url-part#som(nested(foo.bar))");  // Must outlive views.
+    WideStringView wsURI;
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUseHref(wsHasSom, wsURI, wsID, wsSOM);
+    EXPECT_EQ(L"url-part", wsURI);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"nested(foo.bar)", wsSOM);
+  }
+}
+
+TEST(CXFA_DocumentTest, ParseUse) {
+  {
+    WideString wsUseVal(L"");  // Must outlive views.
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUse(wsUseVal, wsID, wsSOM);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+  {
+    WideString wsUseVal(L"clams");  // Must outlive views.
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUse(wsUseVal, wsID, wsSOM);
+    EXPECT_EQ(L"", wsID);
+    EXPECT_EQ(L"clams", wsSOM);
+  }
+  {
+    WideString wsUseVal(L"#clams");  // Must outlive views.
+    WideStringView wsID;
+    WideStringView wsSOM;
+    CXFA_Document::ParseUse(wsUseVal, wsID, wsSOM);
+    EXPECT_EQ(L"clams", wsID);
+    EXPECT_EQ(L"", wsSOM);
+  }
+}