More CFGAS_FormatString tests

This CL adds more tests for various CFGAS_FormatString methods. A few
bugs are fixed up along the way.

Change-Id: Ida1f2792d1bd72f4f52b3c7cc1ff0022b6f45fda
Reviewed-on: https://pdfium-review.googlesource.com/6179
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/xfa/fgas/crt/cfgas_formatstring.cpp b/xfa/fgas/crt/cfgas_formatstring.cpp
index d0cb479..58e4c95 100644
--- a/xfa/fgas/crt/cfgas_formatstring.cpp
+++ b/xfa/fgas/crt/cfgas_formatstring.cpp
@@ -732,7 +732,7 @@
       wsWeekDay.Format(L"%d", wWeekDay ? wWeekDay : 7);
       wsResult += wsWeekDay;
     } else if (dwSymbol == FXBSTR_ID(0, 0, 'G', '1')) {
-      wsResult += pLocale->GetEraName(year < 0);
+      wsResult += pLocale->GetEraName(year > 0);
     } else if (dwSymbol == FXBSTR_ID(0, 0, 'Y', '2')) {
       CFX_WideString wsYear;
       wsYear.Format(L"%02d", year % 100);
@@ -760,8 +760,6 @@
                 IFX_Locale* pLocale,
                 const CFX_DateTime& datetime,
                 CFX_WideString& wsResult) {
-  bool bGMT = false;
-  bool bRet = true;
   uint8_t hour = datetime.GetHour();
   uint8_t minute = datetime.GetMinute();
   uint8_t second = datetime.GetSecond();
@@ -864,7 +862,7 @@
     } else if (dwSymbol == FXBSTR_ID(0, 0, 'Z', '1')) {
       wsResult += L"GMT";
       FX_TIMEZONE tz = pLocale->GetTimeZone();
-      if (!bGMT && (tz.tzHour != 0 || tz.tzMinute != 0)) {
+      if (tz.tzHour != 0 || tz.tzMinute != 0) {
         wsResult += tz.tzHour < 0 ? L"-" : L"+";
 
         CFX_WideString wsTimezone;
@@ -873,7 +871,7 @@
       }
     } else if (dwSymbol == FXBSTR_ID(0, 0, 'z', '1')) {
       FX_TIMEZONE tz = pLocale->GetTimeZone();
-      if (!bGMT && tz.tzHour != 0 && tz.tzMinute != 0) {
+      if (tz.tzHour != 0 && tz.tzMinute != 0) {
         wsResult += tz.tzHour < 0 ? L"-" : L"+";
 
         CFX_WideString wsTimezone;
@@ -882,7 +880,7 @@
       }
     }
   }
-  return bRet;
+  return true;
 }
 
 bool FormatDateTimeInternal(const CFX_DateTime& dt,
@@ -892,9 +890,11 @@
                             IFX_Locale* pLocale,
                             CFX_WideString& wsOutput) {
   bool bRet = true;
-  CFX_WideString wsDateOut, wsTimeOut;
+  CFX_WideString wsDateOut;
   if (!wsDatePattern.IsEmpty())
     bRet &= DateFormat(wsDatePattern, pLocale, dt, wsDateOut);
+
+  CFX_WideString wsTimeOut;
   if (!wsTimePattern.IsEmpty())
     bRet &= TimeFormat(wsTimePattern, pLocale, dt, wsTimeOut);
 
@@ -926,6 +926,7 @@
   year = wYear;
   if (cc < 4 || wYear < 1900)
     return false;
+
   if (cc < len) {
     if (str[cc] == '-')
       cc++;
@@ -1010,41 +1011,45 @@
     }
     if (cc == cc_start + 1 || minute >= 60)
       return false;
+
     if (cc < len) {
       if (str[cc] == ':')
         cc++;
 
-      cc_start = cc;
-      while (cc < len && cc < cc_start + 2) {
-        if (!FXSYS_isDecimalDigit(str[cc]))
-          return false;
-
-        second = second * 10 + str[cc++] - '0';
-      }
-      if (cc == cc_start + 1 || second >= 60)
-        return false;
-      if (cc < len) {
-        if (str[cc] == '.') {
-          cc++;
-          cc_start = cc;
-          while (cc < len && cc < cc_start + 3) {
-            if (!FXSYS_isDecimalDigit(str[cc]))
-              return false;
-
-            millisecond = millisecond * 10 + str[cc++] - '0';
-          }
-          if (cc < cc_start + 3)
+      if (str[cc] != 'Z') {
+        cc_start = cc;
+        while (cc < len && cc < cc_start + 2) {
+          if (!FXSYS_isDecimalDigit(str[cc]))
             return false;
-        }
-        if (cc < len) {
-          FX_TIMEZONE tzDiff;
-          tzDiff.tzHour = 0;
-          tzDiff.tzMinute = 0;
-          if (str[cc] != 'Z')
-            cc += ParseTimeZone(str + cc, len - cc, &tzDiff);
 
-          ResolveZone(hour, minute, tzDiff, pLocale);
+          second = second * 10 + str[cc++] - '0';
         }
+        if (cc == cc_start + 1 || second >= 60)
+          return false;
+        if (cc < len) {
+          if (str[cc] == '.') {
+            cc++;
+            cc_start = cc;
+            while (cc < len && cc < cc_start + 3) {
+              if (!FXSYS_isDecimalDigit(str[cc]))
+                return false;
+
+              millisecond = millisecond * 10 + str[cc++] - '0';
+            }
+            if (cc < cc_start + 3)
+              return false;
+          }
+        }
+      }
+
+      if (cc < len) {
+        FX_TIMEZONE tzDiff;
+        tzDiff.tzHour = 0;
+        tzDiff.tzMinute = 0;
+        if (str[cc] != 'Z')
+          cc += ParseTimeZone(str + cc, len - cc, &tzDiff);
+
+        ResolveZone(hour, minute, tzDiff, pLocale);
       }
     }
   }
@@ -2628,7 +2633,6 @@
                               wsSrcDateTime.GetLength() - iT - 1);
     if (wsSrcDate.IsEmpty() || wsSrcTime.IsEmpty())
       return false;
-
     if (FX_DateFromCanonical(wsSrcDate, &dt) &&
         FX_TimeFromCanonical(wsSrcTime, &dt, pLocale)) {
       return FormatDateTimeInternal(dt, wsDatePattern, wsTimePattern,
diff --git a/xfa/fgas/crt/cfgas_formatstring_unittest.cpp b/xfa/fgas/crt/cfgas_formatstring_unittest.cpp
index 0663919..e26c523 100644
--- a/xfa/fgas/crt/cfgas_formatstring_unittest.cpp
+++ b/xfa/fgas/crt/cfgas_formatstring_unittest.cpp
@@ -6,6 +6,8 @@
 
 #include "xfa/fgas/crt/cfgas_formatstring.h"
 
+#include <time.h>
+
 #include <memory>
 
 #include "core/fpdfapi/cpdf_modulemgr.h"
@@ -17,7 +19,16 @@
 
 class CFGAS_FormatStringTest : public testing::Test {
  public:
-  CFGAS_FormatStringTest() { CPDF_ModuleMgr::Get()->Init(); }
+  CFGAS_FormatStringTest() {
+#if _FXM_PLATFORM_ == _FXM_PLATFORM_WINDOWS_
+    _putenv_s("TZ", "UTC");
+    _tzset();
+#else
+    setenv("TZ", "UTC", 1);
+    tzset();
+#endif
+    CPDF_ModuleMgr::Get()->Init();
+  }
 
   ~CFGAS_FormatStringTest() override { CPDF_ModuleMgr::Get()->Destroy(); }
 
@@ -61,12 +72,44 @@
        L"Saturday, the 1 of January, 2000"},
       {L"en", L"19991202", L"MM/D/YY", L"12/2/99"},
       {L"en", L"19990110", L"MMM D, YYYY", L"Jan 10, 1999"},
+      {L"en", L"19990102", L"J", L"2"},
+      {L"en", L"19990102", L"JJJ", L"002"},
+      {L"en", L"19990102", L"M", L"1"},
+      {L"en", L"19990102", L"MMM", L"Jan"},
+      {L"en", L"19990102", L"YYYY G", L"1999 AD"},
+      // Week 01 of the year is the week containing Jan 04.
+      // {L"en", L"19990102", L"WW", L"00"},  -- Returns 01 incorrectly
+      // {L"en", L"19990104", L"WW", L"01"},  -- Returns 02 incorrectly
+      // The ?*+ should format as whitespace.
+      // {L"en", L"19990104", L"YYYY?*+MM", L"1999   01"},
+      // {L"en", L"1999-07-16", L"date{DD/MM/YY} '('date{MMM DD, YYYY}')'",
+      //  L"16/07/99 (Jul 16, 1999)"},
       {L"de_CH", L"20041030", L"D. MMMM YYYY", L"30. Oktober 2004"},
       {L"fr_CA", L"20041030", L"D MMMM YYYY", L"30 octobre 2004"},
+      {L"en", L"2002-10-25", L"date(fr){DD MMMM, YYYY}", L"25 octobre, 2002"},
+      {L"en", L"2002-10-25", L"date(es){EEEE, D 'de' MMMM 'de' YYYY}",
+       L"viernes, 25 de octubre de 2002"},
+      // {L"en", L"2002-20-25", L"date.long(fr)()", L"25 octobre, 2002"},
       // {L"ja", L"2003-11-03", L"gY/M/D", L"H15/11/3"},
       // {L"ja", L"1989-01-08", L"ggY-M-D", L"\u5e731-1-8"},
       // {L"ja", L"1989-11-03", L"gggYY/MM/DD", L"\u5e73\u621089/11/03"},
-      // {L"ja", L"1989-01-08", L"\u0067\u0067YY/MM/DD", L"\u337b89/01/08"}
+
+      // Full width D == U+FF24
+      // I don't actually know what ideograpic numeric value for 25 is.
+      // {L"ja", L"2002-20-25", L"\uff24\uff24\uff24", L" .... "},
+      // {L"ja", L"2002-20-25", L"\uff24\uff24\uff24\uff24", L" ... "},
+      // Full width M == U+FF2D
+      // {L"ja", L"2002-20-25", L"\uff2d\uff2d\uff2d", L" ... "},
+      // {L"ja", L"2002-20-25", L"\uff2d\uff2d\uff2d\uff2d", L" ... "},
+      // Full width E == U+FF25
+      // {L"ja", L"2002-20-25", L"\uff25", L" ... "},
+      // Full width e == U+FF45
+      // {L"ja", L"2002-20-25", L"\uff45", L" ... "},
+      // Full width g == U+FF47
+      // {L"ja", L"1989-01-08", L"\uff47\uff47YY/MM/DD", L"\u337b89/01/08"}
+      // Full width Y == U+FF39
+      // {L"ja", L"2002-20-25", L"\uff39\uff39\uff39", L" ... "},
+      // {L"ja", L"2002-20-25", L"\uff39\uff39\uff39\uff39\uff39", L" ... "}
   };
 
   for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
@@ -78,6 +121,52 @@
   }
 }
 
+TEST_F(CFGAS_FormatStringTest, TimeFormat) {
+  struct {
+    const wchar_t* locale;
+    const wchar_t* input;
+    const wchar_t* pattern;
+    const wchar_t* output;
+  } tests[] = {{L"en", L"11:11:11", L"h:MM A", L"11:11 AM"},
+               {L"en", L"11:11:11", L"HH:MM:SS 'o''clock' A Z",
+                L"11:11:11 o'clock AM GMT"},
+               {L"en", L"14:30:59", L"h:MM A", L"2:30 PM"},
+               {L"en", L"14:30:59", L"HH:MM:SS A Z", L"14:30:59 PM GMT"}};
+
+  for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+    CFX_WideString result;
+    EXPECT_TRUE(fmt(tests[i].locale)
+                    ->FormatDateTime(tests[i].input, tests[i].pattern, result,
+                                     FX_DATETIMETYPE_Time));
+    EXPECT_STREQ(tests[i].output, result.c_str()) << " TEST: " << i;
+  }
+}
+
+TEST_F(CFGAS_FormatStringTest, DateTimeFormat) {
+  struct {
+    const wchar_t* locale;
+    const wchar_t* input;
+    const wchar_t* pattern;
+    const wchar_t* output;
+  } tests[] = {{L"en", L"1999-07-16T10:30Z",
+                L"'At' time{HH:MM Z} 'on' date{MMM DD, YYYY}",
+                L"At 10:30 GMT on Jul 16, 1999"},
+               {L"en", L"1999-07-16T10:30Z",
+                L"time{'At' HH:MM Z} date{'on' MMM DD, YYYY}",
+                L"At 10:30 GMT on Jul 16, 1999"},
+               {L"en", L"1999-07-16T10:30Z",
+                L"time{'At 'HH:MM Z}date{' on 'MMM DD, YYYY}",
+                L"At 10:30 GMT on Jul 16, 1999"}};
+
+  for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+    CFX_WideString result;
+    EXPECT_TRUE(fmt(tests[i].locale)
+                    ->FormatDateTime(tests[i].input, tests[i].pattern, result,
+                                     FX_DATETIMETYPE_TimeDate));
+    EXPECT_STREQ(tests[i].output, result.c_str()) << " TEST: " << i;
+  }
+}
+
 TEST_F(CFGAS_FormatStringTest, DateParse) {
   struct {
     const wchar_t* locale;
@@ -102,6 +191,28 @@
       //  CFX_DateTime(1989, 11, 3, 0, 0, 0, 0)},
       // {L"ja", L"u337b99/01/08", L"\u0067\u0067YY/MM/DD",
       //  CFX_DateTime(1999, 1, 8, 0, 0, 0, 0)}
+      // Full width D == U+FF24
+      // I don't actually know what ideograpic numeric value for 25 is.
+      // {L"ja", L"...", L"\uff24\uff24\uff24",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // {L"ja", L"...", L"\uff24\uff24\uff24\uff24",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // Full width M == U+FF2D
+      // {L"ja", L"...", L"\uff2d\uff2d\uff2d",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // {L"ja", L"...", L"\uff2d\uff2d\uff2d\uff2d",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // Full width E == U+FF25
+      // {L"ja", L"...", L"\uff25", CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // Full width e == U+FF45
+      // {L"ja", L"...", L"\uff45", CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // Full width g == U+FF47
+      // {L"ja", L"1989-01-08", L"\uff47\uff47YY/MM/DD", L"\u337b89/01/08"}
+      // Full width Y == U+FF39
+      // {L"ja", L"...", L"\uff39\uff39\uff39",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)},
+      // {L"ja", L"...", L"\uff39\uff39\uff39\uff39\uff39",
+      //  CFX_DateTime(2002, 20, 25, 0, 0, 0, 0)}
   };
 
   for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
@@ -112,3 +223,75 @@
     EXPECT_EQ(tests[i].output, result) << " TEST: " << i;
   }
 }
+
+TEST_F(CFGAS_FormatStringTest, SplitFormatString) {
+  std::vector<CFX_WideString> results;
+  fmt(L"en")->SplitFormatString(
+      L"null{'No data'} | null{} | text{999*9999} | text{999*999*9999}",
+      results);
+  EXPECT_EQ(4UL, results.size());
+
+  const wchar_t* patterns[] = {L"null{'No data'} ", L" null{} ",
+                               L" text{999*9999} ", L" text{999*999*9999}"};
+
+  for (size_t i = 0; i < results.size(); ++i) {
+    EXPECT_STREQ(patterns[i], results[i].c_str());
+  }
+}
+
+// TODO(dsinclair): Numeric parsing fails when encountering a .
+// TEST_F(CFGAS_FormatStringTest, NumParse) {
+//   struct {
+//     const wchar_t* locale;
+//     const wchar_t* input;
+//     const wchar_t* pattern;
+//     const wchar_t* output;
+//   } tests[] = {
+//       // {L"en", L"€100.00", L"num(en_GB){$z,zz9.99}", L"100"},
+//   };
+
+//   for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+//     CFX_WideString result;
+//     EXPECT_TRUE(fmt(tests[i].locale)
+//                     ->ParseNum(tests[i].input, tests[i].pattern, result));
+//     EXPECT_STREQ(tests[i].output, result.c_str()) << " TEST: " << i;
+//   }
+// }
+
+// TODO(dsinclair) Text parsing is missing support for the global modifiers:
+//  ? - wildcard
+//  * - zero or more whitespace
+//  + - one or more whitespace
+// TEST_F(CFGAS_FormatStringTest, TextParse) {
+//   struct {
+//     const wchar_t* locale;
+//     const wchar_t* input;
+//     const wchar_t* pattern;
+//     const wchar_t* output;
+//   } tests[] = {
+//       // {L"en", L"555-1212", L"text(th_TH){999*9999}", L"5551212"},
+//   };
+
+//   for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+//     CFX_WideString result;
+//     EXPECT_TRUE(fmt(tests[i].locale)
+//                     ->ParseText(tests[i].input, tests[i].pattern, result));
+//     EXPECT_STREQ(tests[i].output, result.c_str()) << " TEST: " << i;
+//   }
+// }
+
+TEST_F(CFGAS_FormatStringTest, NullParse) {
+  struct {
+    const wchar_t* locale;
+    const wchar_t* input;
+    const wchar_t* pattern;
+  } tests[] = {
+      {L"en", L"", L"null{}"}, {L"en", L"No data", L"null{'No data'}"},
+  };
+
+  for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
+    EXPECT_TRUE(
+        fmt(tests[i].locale)->ParseNull(tests[i].input, tests[i].pattern))
+        << " TEST: " << i;
+  }
+}
diff --git a/xfa/fxfa/parser/cxfa_localemgr.cpp b/xfa/fxfa/parser/cxfa_localemgr.cpp
index fbb12e2..e9b47e7 100644
--- a/xfa/fxfa/parser/cxfa_localemgr.cpp
+++ b/xfa/fxfa/parser/cxfa_localemgr.cpp
@@ -36,6 +36,13 @@
 #define FX_LANG_es_LA 0x080a
 #define FX_LANG_es_ES 0x0c0a
 
+// These arrays are the hex encoded XML strings which define the locale.
+// <locale name="en_US" desc="English(America)">
+//   <calendarSymbols name="gregorian">
+//     <monthNames>
+//       <month>January</month>
+//       <month>February</month>
+//   ...
 const uint8_t g_enUS_Locale[] = {
     0x78, 0x9C, 0x95, 0x56, 0xD1, 0x6E, 0x9B, 0x30, 0x14, 0x7D, 0x9F, 0xB4,
     0x7F, 0x40, 0xD6, 0x2A, 0xB5, 0x52, 0x56, 0x6F, 0x8F, 0xA9, 0x88, 0xA5,