Add string pools to save storage.

Adds string hashes so CFX strings will interoperate with
STL unordered containers.

These will be employed per-document in a subsequent cl.

BUG=pdfium:597

Review-Url: https://codereview.chromium.org/2341683005
diff --git a/BUILD.gn b/BUILD.gn
index ccb1e0c..df8cdba 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -709,6 +709,7 @@
     "core/fxcrt/include/cfx_count_ref.h",
     "core/fxcrt/include/cfx_observable.h",
     "core/fxcrt/include/cfx_retain_ptr.h",
+    "core/fxcrt/include/cfx_string_pool_template.h",
     "core/fxcrt/include/fx_basic.h",
     "core/fxcrt/include/fx_coordinates.h",
     "core/fxcrt/include/fx_ext.h",
@@ -1654,6 +1655,7 @@
     "core/fxcrt/cfx_count_ref_unittest.cpp",
     "core/fxcrt/cfx_observable_unittest.cpp",
     "core/fxcrt/cfx_retain_ptr_unittest.cpp",
+    "core/fxcrt/cfx_string_pool_template_unittest.cpp",
     "core/fxcrt/fx_basic_bstring_unittest.cpp",
     "core/fxcrt/fx_basic_gcc_unittest.cpp",
     "core/fxcrt/fx_basic_memmgr_unittest.cpp",
diff --git a/core/fxcrt/cfx_string_pool_template_unittest.cpp b/core/fxcrt/cfx_string_pool_template_unittest.cpp
new file mode 100644
index 0000000..95a9007
--- /dev/null
+++ b/core/fxcrt/cfx_string_pool_template_unittest.cpp
@@ -0,0 +1,94 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxcrt/include/cfx_string_pool_template.h"
+#include "core/fxcrt/include/fx_string.h"
+#include "testing/fx_string_testhelpers.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(fxcrt, ByteStringPool) {
+  CFX_ByteStringPool pool;
+
+  CFX_ByteString null1;
+  CFX_ByteString null2;
+  CFX_ByteString goats1("goats");
+  CFX_ByteString goats2("goats");
+
+  // Underlying storage, if non-null, is not shared.
+  EXPECT_EQ(nullptr, null1.m_pData.Get());
+  EXPECT_EQ(nullptr, null2.m_pData.Get());
+  EXPECT_NE(goats1.m_pData, goats2.m_pData);
+
+  CFX_ByteString interned_null1 = pool.Intern(null1);
+  CFX_ByteString interned_null2 = pool.Intern(null2);
+  CFX_ByteString interned_goats1 = pool.Intern(goats1);
+  CFX_ByteString interned_goats2 = pool.Intern(goats2);
+
+  // Strings are logically equal after being interned.
+  EXPECT_EQ(null1, interned_null1);
+  EXPECT_EQ(null2, interned_null2);
+  EXPECT_EQ(goats1, interned_goats1);
+  EXPECT_EQ(goats2, interned_goats2);
+
+  // Interned underlying storage, if non-null, belongs to first seen.
+  EXPECT_EQ(nullptr, interned_null1.m_pData.Get());
+  EXPECT_EQ(nullptr, interned_null2.m_pData.Get());
+  EXPECT_EQ(goats1.m_pData, interned_goats1.m_pData);
+  EXPECT_EQ(goats1.m_pData, interned_goats2.m_pData);
+
+  pool.Clear();
+  CFX_ByteString reinterned_null2 = pool.Intern(null2);
+  CFX_ByteString reinterned_null1 = pool.Intern(null2);
+  CFX_ByteString reinterned_goats2 = pool.Intern(goats2);
+  CFX_ByteString reinterned_goats1 = pool.Intern(goats2);
+
+  // After clearing pool, storage was re-interned using second strings.
+  EXPECT_EQ(nullptr, interned_null1.m_pData.Get());
+  EXPECT_EQ(nullptr, interned_null2.m_pData.Get());
+  EXPECT_EQ(goats2.m_pData, reinterned_goats1.m_pData);
+  EXPECT_EQ(goats2.m_pData, reinterned_goats2.m_pData);
+}
+
+TEST(fxcrt, WideStringPool) {
+  CFX_WideStringPool pool;
+
+  CFX_WideString null1;
+  CFX_WideString null2;
+  CFX_WideString goats1(L"goats");
+  CFX_WideString goats2(L"goats");
+
+  // Underlying storage, if non-null, is not shared.
+  EXPECT_EQ(nullptr, null1.m_pData.Get());
+  EXPECT_EQ(nullptr, null2.m_pData.Get());
+  EXPECT_NE(goats1.m_pData, goats2.m_pData);
+
+  CFX_WideString interned_null1 = pool.Intern(null1);
+  CFX_WideString interned_null2 = pool.Intern(null2);
+  CFX_WideString interned_goats1 = pool.Intern(goats1);
+  CFX_WideString interned_goats2 = pool.Intern(goats2);
+
+  // Strings are logically equal after being interned.
+  EXPECT_EQ(null1, interned_null1);
+  EXPECT_EQ(null2, interned_null2);
+  EXPECT_EQ(goats1, interned_goats1);
+  EXPECT_EQ(goats2, interned_goats2);
+
+  // Interned underlying storage, if non-null, belongs to first seen.
+  EXPECT_EQ(nullptr, interned_null1.m_pData.Get());
+  EXPECT_EQ(nullptr, interned_null2.m_pData.Get());
+  EXPECT_EQ(goats1.m_pData, interned_goats1.m_pData);
+  EXPECT_EQ(goats1.m_pData, interned_goats2.m_pData);
+
+  pool.Clear();
+  CFX_WideString reinterned_null2 = pool.Intern(null2);
+  CFX_WideString reinterned_null1 = pool.Intern(null2);
+  CFX_WideString reinterned_goats2 = pool.Intern(goats2);
+  CFX_WideString reinterned_goats1 = pool.Intern(goats2);
+
+  // After clearing pool, storage was re-interned using second strings.
+  EXPECT_EQ(nullptr, interned_null1.m_pData.Get());
+  EXPECT_EQ(nullptr, interned_null2.m_pData.Get());
+  EXPECT_EQ(goats2.m_pData, reinterned_goats1.m_pData);
+  EXPECT_EQ(goats2.m_pData, reinterned_goats2.m_pData);
+}
diff --git a/core/fxcrt/fx_basic_bstring.cpp b/core/fxcrt/fx_basic_bstring.cpp
index c5979a7..63db86e 100644
--- a/core/fxcrt/fx_basic_bstring.cpp
+++ b/core/fxcrt/fx_basic_bstring.cpp
@@ -9,11 +9,14 @@
 #include <algorithm>
 #include <cctype>
 
+#include "core/fxcrt/include/cfx_string_pool_template.h"
 #include "core/fxcrt/include/fx_basic.h"
 #include "third_party/base/numerics/safe_math.h"
 
 template class CFX_StringDataTemplate<FX_CHAR>;
 template class CFX_StringCTemplate<FX_CHAR>;
+template class CFX_StringPoolTemplate<CFX_ByteString>;
+template struct std::hash<CFX_ByteString>;
 
 namespace {
 
diff --git a/core/fxcrt/fx_basic_wstring.cpp b/core/fxcrt/fx_basic_wstring.cpp
index 29e915f..377f09c 100644
--- a/core/fxcrt/fx_basic_wstring.cpp
+++ b/core/fxcrt/fx_basic_wstring.cpp
@@ -9,12 +9,15 @@
 #include <algorithm>
 #include <cctype>
 
+#include "core/fxcrt/include/cfx_string_pool_template.h"
 #include "core/fxcrt/include/fx_basic.h"
 #include "core/fxcrt/include/fx_ext.h"
 #include "third_party/base/numerics/safe_math.h"
 
 template class CFX_StringDataTemplate<FX_WCHAR>;
 template class CFX_StringCTemplate<FX_WCHAR>;
+template class CFX_StringPoolTemplate<CFX_WideString>;
+template struct std::hash<CFX_WideString>;
 
 namespace {
 
diff --git a/core/fxcrt/include/cfx_string_pool_template.h b/core/fxcrt/include/cfx_string_pool_template.h
new file mode 100644
index 0000000..a59d13a
--- /dev/null
+++ b/core/fxcrt/include/cfx_string_pool_template.h
@@ -0,0 +1,30 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_
+#define CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_
+
+#include <unordered_set>
+
+#include "core/fxcrt/include/fx_string.h"
+
+template <typename StringType>
+class CFX_StringPoolTemplate {
+ public:
+  StringType Intern(const StringType& str) { return *m_Pool.insert(str).first; }
+  void Clear() { m_Pool.clear(); }
+
+ private:
+  std::unordered_set<StringType> m_Pool;
+};
+
+using CFX_ByteStringPool = CFX_StringPoolTemplate<CFX_ByteString>;
+using CFX_WideStringPool = CFX_StringPoolTemplate<CFX_WideString>;
+
+extern template class CFX_StringPoolTemplate<CFX_ByteString>;
+extern template class CFX_StringPoolTemplate<CFX_WideString>;
+
+#endif  // CORE_FXCRT_INCLUDE_CFX_STRING_POOL_TEMPLATE_H_
diff --git a/core/fxcrt/include/fx_ext.h b/core/fxcrt/include/fx_ext.h
index f7aca68..e33d57b 100644
--- a/core/fxcrt/include/fx_ext.h
+++ b/core/fxcrt/include/fx_ext.h
@@ -83,19 +83,11 @@
 FX_FLOAT FXSYS_FractionalScale(size_t scale_factor, int value);
 int FXSYS_FractionalScaleCount();
 
-uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase);
-uint32_t FX_HashCode_GetW(const CFX_WideStringC& Str, bool bIgnoreCase);
-
 void* FX_Random_MT_Start(uint32_t dwSeed);
-
-uint32_t FX_Random_MT_Generate(void* pContext);
-
 void FX_Random_MT_Close(void* pContext);
-
+uint32_t FX_Random_MT_Generate(void* pContext);
 void FX_Random_GenerateBase(uint32_t* pBuffer, int32_t iCount);
-
 void FX_Random_GenerateMT(uint32_t* pBuffer, int32_t iCount);
-
 void FX_Random_GenerateCrypto(uint32_t* pBuffer, int32_t iCount);
 
 #ifdef PDF_ENABLE_XFA
diff --git a/core/fxcrt/include/fx_string.h b/core/fxcrt/include/fx_string.h
index 4837858..6e9af22 100644
--- a/core/fxcrt/include/fx_string.h
+++ b/core/fxcrt/include/fx_string.h
@@ -8,7 +8,9 @@
 #define CORE_FXCRT_INCLUDE_FX_STRING_H_
 
 #include <stdint.h>  // For intptr_t.
+
 #include <algorithm>
+#include <functional>
 
 #include "core/fxcrt/cfx_string_c_template.h"
 #include "core/fxcrt/cfx_string_data_template.h"
@@ -166,7 +168,9 @@
   void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
 
   CFX_RetainPtr<StringData> m_pData;
+
   friend class fxcrt_ByteStringConcat_Test;
+  friend class fxcrt_ByteStringPool_Test;
 };
 
 inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
@@ -357,7 +361,9 @@
   void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
 
   CFX_RetainPtr<StringData> m_pData;
+
   friend class fxcrt_WideStringConcatInPlace_Test;
+  friend class fxcrt_WideStringPool_Test;
 };
 
 inline CFX_WideString operator+(const CFX_WideStringC& str1,
@@ -432,4 +438,28 @@
 bool FX_atonum(const CFX_ByteStringC& str, void* pData);
 FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
 
+uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase);
+uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase);
+
+namespace std {
+
+template <>
+struct hash<CFX_ByteString> {
+  std::size_t operator()(const CFX_ByteString& str) const {
+    return FX_HashCode_GetA(str.AsStringC(), false);
+  }
+};
+
+template <>
+struct hash<CFX_WideString> {
+  std::size_t operator()(const CFX_WideString& str) const {
+    return FX_HashCode_GetW(str.AsStringC(), false);
+  }
+};
+
+}  // namespace std
+
+extern template struct std::hash<CFX_ByteString>;
+extern template struct std::hash<CFX_WideString>;
+
 #endif  // CORE_FXCRT_INCLUDE_FX_STRING_H_