Cleanup call expression handling in fm2js

Remove handling of arbitrary function calls, and only allow supported
function calls. Issues with the lexer being overly permissive led to
large blobs of javascript being dropped into the output. Specifically
driver code was assuming that anything marked as a function would just
be an identifier that could be inserted into the javascript, but the
lexer marks things like ()()() as a function, which would lead to the
following JS being inserted as an identifier.

This change is intended to be a patch for the specific issue that was
being seen from the fuzzer test, and further work will be needed to
make the lexer more strict.

BUG=724913
TEST=Ran fuzzer test case. Ran unittests,embeddertests,corpustests.

Change-Id: Ib7d9239bf6fece853bea0f4915ee4ad72d3cd290
Reviewed-on: https://pdfium-review.googlesource.com/7032
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
diff --git a/testing/xfa_js_embedder_test.cpp b/testing/xfa_js_embedder_test.cpp
index 4a29872..e5a30f6 100644
--- a/testing/xfa_js_embedder_test.cpp
+++ b/testing/xfa_js_embedder_test.cpp
@@ -53,18 +53,27 @@
 }
 
 bool XFAJSEmbedderTest::Execute(const CFX_ByteStringC& input) {
-  value_ = pdfium::MakeUnique<CFXJSE_Value>(GetIsolate());
-  if (script_context_->RunScript(XFA_SCRIPTLANGTYPE_Formcalc,
-                                 CFX_WideString::FromUTF8(input).AsStringC(),
-                                 value_.get(), GetXFADocument()->GetRoot())) {
+  if (ExecuteHelper(input)) {
     return true;
   }
 
   CFXJSE_Value msg(GetIsolate());
   value_->GetObjectPropertyByIdx(1, &msg);
-  EXPECT_TRUE(msg.IsString());
-
   fprintf(stderr, "JS: %.*s\n", input.GetLength(), input.c_str());
-  fprintf(stderr, "JS ERROR: %ls\n", msg.ToWideString().c_str());
+  // If the parsing of the input fails, then v8 will not run, so there will be
+  // no value here to print.
+  if (msg.IsString() && !msg.ToWideString().IsEmpty())
+    fprintf(stderr, "JS ERROR: %ls\n", msg.ToWideString().c_str());
   return false;
 }
+
+bool XFAJSEmbedderTest::ExecuteSilenceFailure(const CFX_ByteStringC& input) {
+  return ExecuteHelper(input);
+}
+
+bool XFAJSEmbedderTest::ExecuteHelper(const CFX_ByteStringC& input) {
+  value_ = pdfium::MakeUnique<CFXJSE_Value>(GetIsolate());
+  return script_context_->RunScript(XFA_SCRIPTLANGTYPE_Formcalc,
+                                    CFX_WideString::FromUTF8(input).AsStringC(),
+                                    value_.get(), GetXFADocument()->GetRoot());
+}
diff --git a/testing/xfa_js_embedder_test.h b/testing/xfa_js_embedder_test.h
index afbdb26..1dc06f0 100644
--- a/testing/xfa_js_embedder_test.h
+++ b/testing/xfa_js_embedder_test.h
@@ -33,6 +33,8 @@
   CXFA_Document* GetXFADocument();
 
   bool Execute(const CFX_ByteStringC& input);
+  bool ExecuteSilenceFailure(const CFX_ByteStringC& input);
+
   CFXJSE_Value* GetValue() const { return value_.get(); }
 
  private:
@@ -40,6 +42,8 @@
   std::unique_ptr<CFXJSE_Value> value_;
   v8::Isolate* isolate_;
   CXFA_ScriptContext* script_context_;
+
+  bool ExecuteHelper(const CFX_ByteStringC& input);
 };
 
 #endif  // TESTING_XFA_JS_EMBEDDER_TEST_H_
diff --git a/xfa/fxfa/fm2js/cxfa_fm2jscontext_embeddertest.cpp b/xfa/fxfa/fm2js/cxfa_fm2jscontext_embeddertest.cpp
index 7902bb7..ff2e200 100644
--- a/xfa/fxfa/fm2js/cxfa_fm2jscontext_embeddertest.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fm2jscontext_embeddertest.cpp
@@ -1432,3 +1432,15 @@
 TEST_F(FM2JSContextEmbedderTest, Put) {
   // TODO(dsinclair): Is this supported?
 }
+
+TEST_F(FM2JSContextEmbedderTest, InvalidFunctions) {
+  ASSERT_TRUE(OpenDocument("simple_xfa.pdf"));
+
+  const char* const tests[] = {
+      "F()", "()", "()()()", "Round(2.0)()",
+  };
+
+  for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+    EXPECT_FALSE(ExecuteSilenceFailure(tests[i]));
+  }
+}
diff --git a/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.cpp b/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.cpp
index 1305126..7ab2e13 100644
--- a/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.cpp
@@ -6,6 +6,8 @@
 
 #include "xfa/fxfa/fm2js/cxfa_fmsimpleexpression.h"
 
+#include <algorithm>
+#include <iostream>
 #include <utility>
 
 #include "core/fxcrt/fx_extension.h"
@@ -25,46 +27,27 @@
     L"pfm_rt.var_filter",
 };
 
-struct XFA_FMBuildInFunc {
-  uint32_t m_uHash;
-  const wchar_t* m_buildinfunc;
+const wchar_t* const g_BuiltInFuncs[] = {
+    L"Abs",          L"Apr",       L"At",       L"Avg",
+    L"Ceil",         L"Choose",    L"Concat",   L"Count",
+    L"Cterm",        L"Date",      L"Date2Num", L"DateFmt",
+    L"Decode",       L"Encode",    L"Eval",     L"Exists",
+    L"Floor",        L"Format",    L"FV",       L"Get",
+    L"HasValue",     L"If",        L"Ipmt",     L"IsoDate2Num",
+    L"IsoTime2Num",  L"Left",      L"Len",      L"LocalDateFmt",
+    L"LocalTimeFmt", L"Lower",     L"Ltrim",    L"Max",
+    L"Min",          L"Mod",       L"NPV",      L"Num2Date",
+    L"Num2GMTime",   L"Num2Time",  L"Oneof",    L"Parse",
+    L"Pmt",          L"Post",      L"PPmt",     L"Put",
+    L"PV",           L"Rate",      L"Ref",      L"Replace",
+    L"Right",        L"Round",     L"Rtrim",    L"Space",
+    L"Str",          L"Stuff",     L"Substr",   L"Sum",
+    L"Term",         L"Time",      L"Time2Num", L"TimeFmt",
+    L"UnitType",     L"UnitValue", L"Upper",    L"Uuid",
+    L"Within",       L"WordNum",
 };
 
-const XFA_FMBuildInFunc g_BuildInFuncs[] = {
-    {0x0001f1f5, L"At"},           {0x00020b9c, L"FV"},
-    {0x00021aef, L"If"},           {0x00023ee6, L"PV"},
-    {0x04b5c9ee, L"Encode"},       {0x08e96685, L"DateFmt"},
-    {0x09f99db6, L"Abs"},          {0x09f9e583, L"Apr"},
-    {0x09fa043e, L"Avg"},          {0x0a9782a0, L"Get"},
-    {0x0b1b09df, L"Len"},          {0x0b3543a6, L"Max"},
-    {0x0b356ca4, L"Min"},          {0x0b358b60, L"Mod"},
-    {0x0b4fded4, L"NPV"},          {0x0b846bf1, L"Pmt"},
-    {0x0b8494f9, L"Put"},          {0x0bb8df5d, L"Ref"},
-    {0x0bd37a99, L"Str"},          {0x0bd37fb5, L"Sum"},
-    {0x1048469b, L"Cterm"},        {0x11e03660, L"Exists"},
-    {0x126236e6, L"Post"},         {0x127c6661, L"PPmt"},
-    {0x193ade3e, L"Right"},        {0x1ec8ab2c, L"Rate"},
-    {0x20e476dc, L"IsoTime2Num"},  {0x23eb6816, L"TimeFmt"},
-    {0x24fb17b0, L"LocalDateFmt"}, {0x28dee6e9, L"Format"},
-    {0x2d0890b8, L"Term"},         {0x2d71b00f, L"Time"},
-    {0x2f890fb1, L"Num2Time"},     {0x3767511d, L"Ceil"},
-    {0x3ffd1941, L"LocalTimeFmt"}, {0x442f68c8, L"Round"},
-    {0x46fd1128, L"Eval"},         {0x4d629440, L"Date2Num"},
-    {0x4dcf25f8, L"Concat"},       {0x4e00255d, L"UnitValue"},
-    {0x55a5cc29, L"Lower"},        {0x5e43e04c, L"WordNum"},
-    {0x620ce6ba, L"Ipmt"},         {0x6f544d49, L"Count"},
-    {0x7e241013, L"Within"},       {0x9b9a6e2b, L"IsoDate2Num"},
-    {0xb2c941c2, L"UnitType"},     {0xb598a1f7, L"Uuid"},
-    {0xbde9abde, L"Date"},         {0xc0010b80, L"Num2Date"},
-    {0xc1f6144c, L"Upper"},        {0xc44028f7, L"Oneof"},
-    {0xc62c1b2c, L"Space"},        {0xd0ff50f9, L"HasValue"},
-    {0xd1537042, L"Floor"},        {0xd2ac9cf1, L"Time2Num"},
-    {0xd907aee5, L"Num2GMTime"},   {0xdf24f7c4, L"Decode"},
-    {0xe2664803, L"Substr"},       {0xe3e7b528, L"Stuff"},
-    {0xe6792d4e, L"Rtrim"},        {0xe8c23f5b, L"Parse"},
-    {0xea18d121, L"Choose"},       {0xebfef69c, L"Replace"},
-    {0xf5ad782b, L"Left"},         {0xf7bb2248, L"Ltrim"},
-};
+const FX_STRSIZE g_BuiltInFuncsMaxLen = 12;
 
 struct XFA_FMSOMMethod {
   uint32_t m_uHash;
@@ -533,17 +516,20 @@
 
 CXFA_FMCallExpression::~CXFA_FMCallExpression() {}
 
-bool CXFA_FMCallExpression::IsBuildInFunc(CFX_WideTextBuf* funcName) {
-  uint32_t uHash = FX_HashCode_GetW(funcName->AsStringC(), true);
-  const XFA_FMBuildInFunc* pEnd = g_BuildInFuncs + FX_ArraySize(g_BuildInFuncs);
-  const XFA_FMBuildInFunc* pFunc =
-      std::lower_bound(g_BuildInFuncs, pEnd, uHash,
-                       [](const XFA_FMBuildInFunc& func, uint32_t hash) {
-                         return func.m_uHash < hash;
-                       });
-  if (pFunc < pEnd && uHash == pFunc->m_uHash) {
+bool CXFA_FMCallExpression::IsBuiltInFunc(CFX_WideTextBuf* funcName) {
+  if (funcName->GetLength() > g_BuiltInFuncsMaxLen)
+    return false;
+
+  CFX_WideString str = funcName->MakeString();
+  const wchar_t* const* pEnd = g_BuiltInFuncs + FX_ArraySize(g_BuiltInFuncs);
+  const wchar_t* const* pMatchResult = std::lower_bound(
+      g_BuiltInFuncs, pEnd, str,
+      [](const wchar_t* iter, const CFX_WideString& val) -> bool {
+        return val.CompareNoCase(iter) > 0;
+      });
+  if (pMatchResult < pEnd && !str.CompareNoCase(*pMatchResult)) {
     funcName->Clear();
-    *funcName << pFunc->m_buildinfunc;
+    *funcName << *pMatchResult;
     return true;
   }
   return false;
@@ -618,7 +604,7 @@
   } else {
     bool isEvalFunc = false;
     bool isExistsFunc = false;
-    if (IsBuildInFunc(&funcName)) {
+    if (IsBuiltInFunc(&funcName)) {
       if (funcName.AsStringC() == L"Eval") {
         isEvalFunc = true;
         javascript << L"eval.call(this, ";
@@ -633,7 +619,11 @@
         javascript << funcName;
       }
     } else {
-      javascript << funcName;
+      // If a function is not a SomMethod or a built-in then the input was
+      // invalid, so failing. The scanner/lexer should catch this, but currently
+      // doesn't. This failure will bubble up to the top-level and cause the
+      // transpile to fail.
+      return false;
     }
     javascript << L"(";
     if (isExistsFunc) {
diff --git a/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.h b/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.h
index 8cdaf35..6f01429 100644
--- a/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.h
+++ b/xfa/fxfa/fm2js/cxfa_fmsimpleexpression.h
@@ -238,7 +238,7 @@
       bool bIsSomMethod);
   ~CXFA_FMCallExpression() override;
 
-  bool IsBuildInFunc(CFX_WideTextBuf* funcName);
+  bool IsBuiltInFunc(CFX_WideTextBuf* funcName);
   uint32_t IsMethodWithObjParam(const CFX_WideStringC& methodName);
   bool ToJavaScript(CFX_WideTextBuf& javascript) override;