Split kTextLayoutCodeProperties[] into xfa and non-xfa portions.

Save 128KB of data in the non-XFA cases (esp. android).

Change-Id: I3c0ba57324357d97ebb5f1d4fe710f82eadd5870
Reviewed-on: https://pdfium-review.googlesource.com/c/48111
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fxcrt/fx_unicode.cpp b/core/fxcrt/fx_unicode.cpp
index 4bb4c80..10d15c0 100644
--- a/core/fxcrt/fx_unicode.cpp
+++ b/core/fxcrt/fx_unicode.cpp
@@ -10,41 +10,22 @@
 
 namespace {
 
-// Format of uint32_t values in kTextLayoutCodeProperties[].
-constexpr uint32_t kBreakTypeBitPos = 0;
-constexpr uint32_t kBreakTypeBitCount = 6;
-constexpr uint32_t kBreakTypeBitMask =
-    (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
-
-constexpr uint32_t kBidiClassBitPos = 6;
-constexpr uint32_t kBidiClassBitCount = 5;
-constexpr uint32_t kBidiClassBitMask =
+// Format of uint16_t values in kTextLayoutCodeProperties[].
+constexpr uint16_t kBidiClassBitPos = 0;
+constexpr uint16_t kBidiClassBitCount = 5;
+constexpr uint16_t kBidiClassBitMask =
     (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
 
-constexpr uint32_t kCharTypeBitPos = 11;
-constexpr uint32_t kCharTypeBitCount = 4;
-constexpr uint32_t kCharTypeBitMask =
-    (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
-
-// TODO(tsepez): Unknown, possibly unused field.
-constexpr uint32_t kField2BitPos = 15;
-constexpr uint32_t kField2BitCount = 8;
-constexpr uint32_t kField2BitMask =
-    (((1 << kField2BitCount) - 1) << kField2BitPos);
-
-constexpr uint32_t kMirrorBitPos = 23;
-constexpr uint32_t kMirrorBitCount = 9;
-constexpr uint32_t kMirrorBitMask =
+constexpr uint16_t kMirrorBitPos = 5;
+constexpr uint16_t kMirrorBitCount = 9;
+constexpr uint16_t kMirrorBitMask =
     (((1 << kMirrorBitCount) - 1) << kMirrorBitPos);
 
 #undef CHARPROP____
-#define CHARPROP____(mirror, f2, ct, bd, bt)                       \
-  ((mirror << kMirrorBitPos) | (f2 << kField2BitPos) |             \
-   (static_cast<uint32_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) |   \
-   (static_cast<uint32_t>(FX_BIDICLASS::bd) << kBidiClassBitPos) | \
-   (static_cast<uint32_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos))
-
-const uint32_t kTextLayoutCodeProperties[] = {
+#define CHARPROP____(mirror, f2, ct, bd, bt) \
+  ((mirror << kMirrorBitPos) |               \
+   (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos))
+const uint16_t kTextLayoutCodeProperties[] = {
 #include "core/fxcrt/fx_ucddata.inc"
 };
 #undef CHARPROP____
@@ -54,20 +35,48 @@
 
 static_assert(kTextLayoutCodePropertiesSize == 65536, "missing characters");
 
-static_assert((kBreakTypeBitMask | kBidiClassBitMask | kCharTypeBitMask |
-               kField2BitMask | kMirrorBitMask) == 0xffffffff,
-              "missing bits in mask");
+uint16_t GetUnicodeProperties(wchar_t wch) {
+  size_t idx = static_cast<size_t>(wch);
+  if (idx < kTextLayoutCodePropertiesSize)
+    return kTextLayoutCodeProperties[idx];
+  return 0;
+}
 
-static_assert((kBreakTypeBitMask & kBidiClassBitMask) == 0, "overlapping bits");
-static_assert((kBreakTypeBitMask & kCharTypeBitMask) == 0, "overlapping bits");
-static_assert((kBreakTypeBitMask & kField2BitMask) == 0, "overlapping bits");
-static_assert((kBreakTypeBitMask & kMirrorBitMask) == 0, "overlapping bits");
-static_assert((kBidiClassBitMask & kCharTypeBitMask) == 0, "overlapping bits");
-static_assert((kBidiClassBitMask & kField2BitMask) == 0, "overlapping bits");
-static_assert((kBidiClassBitMask & kMirrorBitMask) == 0, "overlapping bits");
-static_assert((kCharTypeBitMask & kField2BitMask) == 0, "overlapping bits");
-static_assert((kCharTypeBitMask & kMirrorBitMask) == 0, "overlapping bits");
-static_assert((kField2BitMask & kMirrorBitMask) == 0, "overlapping bits");
+#ifdef PDF_ENABLE_XFA
+// Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
+constexpr uint16_t kBreakTypeBitPos = 0;
+constexpr uint16_t kBreakTypeBitCount = 6;
+constexpr uint16_t kBreakTypeBitMask =
+    (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
+
+constexpr uint16_t kCharTypeBitPos = 6;
+constexpr uint16_t kCharTypeBitCount = 4;
+constexpr uint16_t kCharTypeBitMask =
+    (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
+
+#undef CHARPROP____
+#define CHARPROP____(mirror, f2, ct, bd, bt)                     \
+  ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
+   (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos))
+const uint16_t kExtendedTextLayoutCodeProperties[] = {
+#include "core/fxcrt/fx_ucddata.inc"
+};
+#undef CHARPROP____
+
+const size_t kExtendedTextLayoutCodePropertiesSize =
+    FX_ArraySize(kExtendedTextLayoutCodeProperties);
+
+static_assert(kExtendedTextLayoutCodePropertiesSize == 65536,
+              "missing characters");
+
+uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
+  size_t idx = static_cast<size_t>(wch);
+  if (idx < kExtendedTextLayoutCodePropertiesSize)
+    return kExtendedTextLayoutCodeProperties[idx];
+  return 0;
+}
+
+#endif  // PDF_ENBABLE_XFA
 
 const uint16_t kFXTextLayoutBidiMirror[] = {
     0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
@@ -132,43 +141,36 @@
 const size_t kFXTextLayoutBidiMirrorSize =
     FX_ArraySize(kFXTextLayoutBidiMirror);
 
-uint32_t GetUnicodeProperties(wchar_t wch) {
-  size_t idx = static_cast<size_t>(wch);
-  if (idx < kTextLayoutCodePropertiesSize)
-    return kTextLayoutCodeProperties[idx];
-  return 0;
-}
-
 }  // namespace
 
 wchar_t FX_GetMirrorChar(wchar_t wch) {
-  uint32_t dwProps = GetUnicodeProperties(wch);
-  uint32_t dwTemp = (dwProps & kMirrorBitMask);
-  if (dwTemp == kMirrorBitMask)
+  uint16_t prop = GetUnicodeProperties(wch);
+  uint16_t temp = (prop & kMirrorBitMask);
+  if (temp == kMirrorBitMask)
     return wch;
-  size_t idx = dwTemp >> kMirrorBitPos;
+  size_t idx = temp >> kMirrorBitPos;
   return idx < kFXTextLayoutBidiMirrorSize ? kFXTextLayoutBidiMirror[idx] : wch;
 }
 
 FX_BIDICLASS FX_GetBidiClass(wchar_t wch) {
-  uint32_t prop = GetUnicodeProperties(wch);
-  uint32_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
-  ASSERT(result <= static_cast<uint32_t>(FX_BIDICLASS::kPDF));
+  uint16_t prop = GetUnicodeProperties(wch);
+  uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
+  ASSERT(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
   return static_cast<FX_BIDICLASS>(result);
 }
 
 #ifdef PDF_ENABLE_XFA
 FX_CHARTYPE FX_GetCharType(wchar_t wch) {
-  uint32_t prop = GetUnicodeProperties(wch);
-  uint32_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
-  ASSERT(result <= static_cast<uint32_t>(FX_CHARTYPE::kArabic));
+  uint16_t prop = GetExtendedUnicodeProperties(wch);
+  uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
+  ASSERT(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
   return static_cast<FX_CHARTYPE>(result);
 }
 
 FX_BREAKPROPERTY FX_GetBreakProperty(wchar_t wch) {
-  uint32_t prop = GetUnicodeProperties(wch);
-  uint32_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
-  ASSERT(result <= static_cast<uint32_t>(FX_BREAKPROPERTY::kTB));
+  uint16_t prop = GetExtendedUnicodeProperties(wch);
+  uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
+  ASSERT(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
   return static_cast<FX_BREAKPROPERTY>(result);
 }
 #endif  // PDF_ENABLE_XFA
diff --git a/core/fxcrt/fx_unicode.h b/core/fxcrt/fx_unicode.h
index e482406..f1a1966 100644
--- a/core/fxcrt/fx_unicode.h
+++ b/core/fxcrt/fx_unicode.h
@@ -9,6 +9,33 @@
 
 #include "core/fxcrt/fx_system.h"
 
+enum class FX_BIDICLASS : uint8_t {
+  kON = 0,    // Other Neutral
+  kL = 1,     // Left Letter
+  kR = 2,     // Right Letter
+  kAN = 3,    // Arabic Number
+  kEN = 4,    // European Number
+  kAL = 5,    // Arabic Letter
+  kNSM = 6,   // Non-spacing Mark
+  kCS = 7,    // Common Number Separator
+  kES = 8,    // European Separator
+  kET = 9,    // European Number Terminator
+  kBN = 10,   // Boundary Neutral
+  kS = 11,    // Segment Separator
+  kWS = 12,   // Whitespace
+  kB = 13,    // Paragraph Separator
+  kRLO = 14,  // Right-to-Left Override
+  kRLE = 15,  // Right-to-Left Embedding
+  kLRO = 16,  // Left-to-Right Override
+  kLRE = 17,  // Left-to-Right Embedding
+  kPDF = 18,  // Pop Directional Format
+  kN = kON,
+};
+
+wchar_t FX_GetMirrorChar(wchar_t wch);
+FX_BIDICLASS FX_GetBidiClass(wchar_t wch);
+
+#ifdef PDF_ENABLE_XFA
 // As defined in http://www.unicode.org/reports/tr14
 enum class FX_BREAKPROPERTY : uint8_t {
   kOP = 0,
@@ -51,29 +78,6 @@
   kTB = 37,
 };
 
-enum class FX_BIDICLASS : uint8_t {
-  kON = 0,    // Other Neutral
-  kL = 1,     // Left Letter
-  kR = 2,     // Right Letter
-  kAN = 3,    // Arabic Number
-  kEN = 4,    // European Number
-  kAL = 5,    // Arabic Letter
-  kNSM = 6,   // Non-spacing Mark
-  kCS = 7,    // Common Number Separator
-  kES = 8,    // European Separator
-  kET = 9,    // European Number Terminator
-  kBN = 10,   // Boundary Neutral
-  kS = 11,    // Segment Separator
-  kWS = 12,   // Whitespace
-  kB = 13,    // Paragraph Separator
-  kRLO = 14,  // Right-to-Left Override
-  kRLE = 15,  // Right-to-Left Embedding
-  kLRO = 16,  // Left-to-Right Override
-  kLRE = 17,  // Left-to-Right Embedding
-  kPDF = 18,  // Pop Directional Format
-  kN = kON,
-};
-
 enum class FX_CHARTYPE : uint8_t {
   kUnknown = 0,
   kTab,
@@ -90,10 +94,6 @@
   kArabic,
 };
 
-wchar_t FX_GetMirrorChar(wchar_t wch);
-FX_BIDICLASS FX_GetBidiClass(wchar_t wch);
-
-#ifdef PDF_ENABLE_XFA
 FX_CHARTYPE FX_GetCharType(wchar_t wch);
 
 // Analagous to ULineBreak in icu's uchar.h, but permuted order, and a