Create ParseDataType unit tests based on specs.

Test cases that are commented out are failing with our current
implementation.

Change-Id: I9f80003af5a5d182f53cc655454aec44397d278b
Reviewed-on: https://pdfium-review.googlesource.com/7890
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 66095d4..c13d290 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1981,7 +1981,10 @@
     deps += [ "//skia" ]
   }
   if (pdf_enable_v8) {
-    sources += [ "fpdfsdk/javascript/public_methods_unittest.cpp" ]
+    sources += [
+      "fpdfsdk/javascript/public_methods_unittest.cpp",
+      "fpdfsdk/javascript/util_unittest.cpp",
+    ]
     include_dirs += [
       "//v8",
       "//v8/include",
diff --git a/fpdfsdk/javascript/util.cpp b/fpdfsdk/javascript/util.cpp
index 3338a3a..7f0fe1e 100644
--- a/fpdfsdk/javascript/util.cpp
+++ b/fpdfsdk/javascript/util.cpp
@@ -70,40 +70,6 @@
 #endif
 };
 
-int ParseDataType(std::wstring* sFormat) {
-  bool bPercent = false;
-  for (size_t i = 0; i < sFormat->length(); ++i) {
-    wchar_t c = (*sFormat)[i];
-    if (c == L'%') {
-      bPercent = true;
-      continue;
-    }
-
-    if (bPercent) {
-      if (c == L'c' || c == L'C' || c == L'd' || c == L'i' || c == L'o' ||
-          c == L'u' || c == L'x' || c == L'X') {
-        return UTIL_INT;
-      }
-      if (c == L'e' || c == L'E' || c == L'f' || c == L'g' || c == L'G') {
-        return UTIL_DOUBLE;
-      }
-      if (c == L's' || c == L'S') {
-        // Map s to S since we always deal internally
-        // with wchar_t strings.
-        (*sFormat)[i] = L'S';
-        return UTIL_STRING;
-      }
-      if (c == L'.' || c == L'+' || c == L'-' || c == L'#' || c == L' ' ||
-          std::iswdigit(c)) {
-        continue;
-      }
-      break;
-    }
-  }
-
-  return -1;
-}
-
 }  // namespace
 
 util::util(CJS_Object* pJSObject) : CJS_EmbedObj(pJSObject) {}
@@ -480,3 +446,37 @@
   vRet = CJS_Value(pRuntime, wStr.c_str());
   return true;
 }
+
+int util::ParseDataType(std::wstring* sFormat) {
+  bool bPercent = false;
+  for (size_t i = 0; i < sFormat->length(); ++i) {
+    wchar_t c = (*sFormat)[i];
+    if (c == L'%') {
+      bPercent = true;
+      continue;
+    }
+
+    if (bPercent) {
+      if (c == L'c' || c == L'C' || c == L'd' || c == L'i' || c == L'o' ||
+          c == L'u' || c == L'x' || c == L'X') {
+        return UTIL_INT;
+      }
+      if (c == L'e' || c == L'E' || c == L'f' || c == L'g' || c == L'G') {
+        return UTIL_DOUBLE;
+      }
+      if (c == L's' || c == L'S') {
+        // Map s to S since we always deal internally
+        // with wchar_t strings.
+        (*sFormat)[i] = L'S';
+        return UTIL_STRING;
+      }
+      if (c == L'.' || c == L'+' || c == L'-' || c == L'#' || c == L' ' ||
+          std::iswdigit(c)) {
+        continue;
+      }
+      break;
+    }
+  }
+
+  return -1;
+}
diff --git a/fpdfsdk/javascript/util.h b/fpdfsdk/javascript/util.h
index 98761b6..125d193 100644
--- a/fpdfsdk/javascript/util.h
+++ b/fpdfsdk/javascript/util.h
@@ -40,6 +40,11 @@
 
   static CFX_WideString printx(const CFX_WideString& cFormat,
                                const CFX_WideString& cSource);
+
+ private:
+  friend class CJS_Util_ParseDataType_Test;
+
+  static int ParseDataType(std::wstring* sFormat);
 };
 
 class CJS_Util : public CJS_Object {
diff --git a/fpdfsdk/javascript/util_unittest.cpp b/fpdfsdk/javascript/util_unittest.cpp
new file mode 100644
index 0000000..eaebc9c
--- /dev/null
+++ b/fpdfsdk/javascript/util_unittest.cpp
@@ -0,0 +1,118 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "fpdfsdk/javascript/util.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
+
+// Should mirror the defines in util.cpp.
+#define UTIL_INT 0
+#define UTIL_DOUBLE 1
+#define UTIL_STRING 2
+
+TEST(CJS_Util, ParseDataType) {
+  struct ParseDataTypeCase {
+    const wchar_t* const input_string;
+    const int expected;
+  };
+
+  // Commented out tests follow the spec but are not passing.
+  const ParseDataTypeCase cases[] = {
+      // Not conversions
+      {L"", -1},
+      {L"d", -1},
+
+      // Simple cases
+      {L"%d", UTIL_INT},
+      {L"%x", UTIL_INT},
+      {L"%f", UTIL_DOUBLE},
+      {L"%s", UTIL_STRING},
+
+      // nDecSep Not implemented
+      // {L"%,0d", UTIL_INT},
+      // {L"%,1d", UTIL_INT},
+      // {L"%,2d", UTIL_INT},
+      // {L"%,3d", UTIL_INT},
+      // {L"%,4d", -1},
+      // {L"%,d", -1},
+
+      // cFlags("+ 0#"") are only valid for numeric conversions.
+      {L"%+d", UTIL_INT},
+      {L"%+x", UTIL_INT},
+      {L"%+f", UTIL_DOUBLE},
+      // {L"%+s", -1},
+      {L"% d", UTIL_INT},
+      {L"% x", UTIL_INT},
+      {L"% f", UTIL_DOUBLE},
+      // {L"% s", -1},
+      {L"%0d", UTIL_INT},
+      {L"%0x", UTIL_INT},
+      {L"%0f", UTIL_DOUBLE},
+      // {L"%0s", -1},
+      {L"%#d", UTIL_INT},
+      {L"%#x", UTIL_INT},
+      {L"%#f", UTIL_DOUBLE},
+      // {L"%#s", -1},
+
+      // nWidth should work. for all conversions, can be combined with cFlags=0
+      // for numbers.
+      {L"%5d", UTIL_INT},
+      {L"%05d", UTIL_INT},
+      {L"%5x", UTIL_INT},
+      {L"%05x", UTIL_INT},
+      {L"%5f", UTIL_DOUBLE},
+      {L"%05f", UTIL_DOUBLE},
+      {L"%5s", UTIL_STRING},
+      // {L"%05s", -1},
+
+      // nPrecision should only work for float
+      // {L"%.5d", -1},
+      // {L"%.5x", -1},
+      {L"%.5f", UTIL_DOUBLE},
+      // {L"%.5s", -1},
+      // {L"%.14d", -1},
+      // {L"%.14x", -1},
+      {L"%.14f", UTIL_DOUBLE},
+      // {L"%.14s", -1},
+      // {L"%.f", -1},
+
+      // nPrecision too large (> 260) causes crashes in Windows.
+      // TODO(tsepez): Reenable when fix is out.
+      // {L"%.261d", -1},
+      // {L"%.261x", -1},
+      // {L"%.261f", -1},
+      // {L"%.261s", -1},
+
+      // Unexpected characters
+      {L"%ad", -1},
+      {L"%bx", -1},
+      // {L"%cf", -1},
+      // {L"%es", -1},
+      // {L"%gd", -1},
+      {L"%hx", -1},
+      // {L"%if", -1},
+      {L"%js", -1},
+      {L"%@d", -1},
+      {L"%~x", -1},
+      {L"%[f", -1},
+      {L"%\0s", -1},
+      {L"%\nd", -1},
+      {L"%\rx", -1},
+      // {L"%%f", -1},
+      // {L"%  s", -1},
+
+      // Combine multiple valid components
+      {L"%+6d", UTIL_INT},
+      {L"% 7x", UTIL_INT},
+      {L"%#9.3f", UTIL_DOUBLE},
+      {L"%10s", UTIL_STRING},
+  };
+
+  for (size_t i = 0; i < FX_ArraySize(cases); i++) {
+    std::wstring input(cases[i].input_string);
+    EXPECT_EQ(cases[i].expected, util::ParseDataType(&input))
+        << cases[i].input_string;
+  }
+}