Tidy cfde_wordbreak_data.cpp
- Rename WordBreakValue to WordBreakMask.
- Calculate values directly rather than asserting equivalence.
- Comment about layout of table and constraints on enum values.
- Replace strange size calculation with well-known constant.
Change-Id: If1722f60935a22d0ec9c11fa92adc535ae153b79
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/65416
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/xfa/fde/cfde_wordbreak_data.cpp b/xfa/fde/cfde_wordbreak_data.cpp
index 9d85efd..9331253 100644
--- a/xfa/fde/cfde_wordbreak_data.cpp
+++ b/xfa/fde/cfde_wordbreak_data.cpp
@@ -11,111 +11,76 @@
namespace {
-enum WordBreakValue : uint16_t {
- kWordBreakValueNone = 1 << 0,
- kWordBreakValueCR = 1 << 1,
- kWordBreakValueLF = 1 << 2,
- kWordBreakValueNewLine = 1 << 3,
- kWordBreakValueExtend = 1 << 4,
- kWordBreakValueFormat = 1 << 5,
- kWordBreakValueKataKana = 1 << 6,
- kWordBreakValueALetter = 1 << 7,
- kWordBreakValueMidLetter = 1 << 8,
- kWordBreakValueMidNum = 1 << 9,
- kWordBreakValueMidNumLet = 1 << 10,
- kWordBreakValueNumeric = 1 << 11,
- kWordBreakValueExtendNumLet = 1 << 12,
+enum WordBreakMask : uint16_t {
+ kWordBreakMaskNone = 1 << static_cast<int>(WordBreakProperty::kNone),
+ kWordBreakMaskCR = 1 << static_cast<int>(WordBreakProperty::kCR),
+ kWordBreakMaskLF = 1 << static_cast<int>(WordBreakProperty::kLF),
+ kWordBreakMaskNewLine = 1 << static_cast<int>(WordBreakProperty::kNewLine),
+ kWordBreakMaskExtend = 1 << static_cast<int>(WordBreakProperty::kExtend),
+ kWordBreakMaskFormat = 1 << static_cast<int>(WordBreakProperty::kFormat),
+ kWordBreakMaskKataKana = 1 << static_cast<int>(WordBreakProperty::kKataKana),
+ kWordBreakMaskALetter = 1 << static_cast<int>(WordBreakProperty::kALetter),
+ kWordBreakMaskMidLetter = 1
+ << static_cast<int>(WordBreakProperty::kMidLetter),
+ kWordBreakMaskMidNum = 1 << static_cast<int>(WordBreakProperty::kMidNum),
+ kWordBreakMaskMidNumLet = 1
+ << static_cast<int>(WordBreakProperty::kMidNumLet),
+ kWordBreakMaskNumeric = 1 << static_cast<int>(WordBreakProperty::kNumeric),
+ kWordBreakMaskExtendNumLet =
+ 1 << static_cast<int>(WordBreakProperty::kExtendNumLet),
};
-static_assert(kWordBreakValueNone ==
- (1 << static_cast<int>(WordBreakProperty::kNone)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueCR ==
- (1 << static_cast<int>(WordBreakProperty::kCR)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueLF ==
- (1 << static_cast<int>(WordBreakProperty::kLF)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueNewLine ==
- (1 << static_cast<int>(WordBreakProperty::kNewLine)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueExtend ==
- (1 << static_cast<int>(WordBreakProperty::kExtend)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueFormat ==
- (1 << static_cast<int>(WordBreakProperty::kFormat)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueKataKana ==
- (1 << static_cast<int>(WordBreakProperty::kKataKana)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueALetter ==
- (1 << static_cast<int>(WordBreakProperty::kALetter)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueMidLetter ==
- (1 << static_cast<int>(WordBreakProperty::kMidLetter)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueMidNum ==
- (1 << static_cast<int>(WordBreakProperty::kMidNum)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueMidNumLet ==
- (1 << static_cast<int>(WordBreakProperty::kMidNumLet)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueNumeric ==
- (1 << static_cast<int>(WordBreakProperty::kNumeric)),
- "WordBreakValue must match");
-static_assert(kWordBreakValueExtendNumLet ==
- (1 << static_cast<int>(WordBreakProperty::kExtendNumLet)),
- "WordBreakValue must match");
-
const uint16_t kWordBreakTable[] = {
// WordBreakProperty::kNone
0xFFFF,
// WordBreakProperty::kCR
- static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueCR)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF | kWordBreakMaskCR)),
// WordBreakProperty::kLF
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kNewLine
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kExtend
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakPropery:: kFormat
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kKataKana
- static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueKataKana |
- kWordBreakValueExtendNumLet)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF | kWordBreakMaskKataKana |
+ kWordBreakMaskExtendNumLet)),
// WordBreakProperty::kALetter
- static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueALetter |
- kWordBreakValueNumeric |
- kWordBreakValueExtendNumLet)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF | kWordBreakMaskALetter |
+ kWordBreakMaskNumeric |
+ kWordBreakMaskExtendNumLet)),
// WordBreakProperty::kMidLetter
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kMidNum
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kMidNumLet
- static_cast<uint16_t>(~(kWordBreakValueLF)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF)),
// WordBreakProperty::kNumeric
- static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueALetter |
- kWordBreakValueNumeric |
- kWordBreakValueExtendNumLet)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF | kWordBreakMaskALetter |
+ kWordBreakMaskNumeric |
+ kWordBreakMaskExtendNumLet)),
// WordBreakProperty::kExtendNumLet
- static_cast<uint16_t>(~(kWordBreakValueLF | kWordBreakValueKataKana |
- kWordBreakValueALetter | kWordBreakValueNumeric |
- kWordBreakValueExtendNumLet)),
+ static_cast<uint16_t>(~(kWordBreakMaskLF | kWordBreakMaskKataKana |
+ kWordBreakMaskALetter | kWordBreakMaskNumeric |
+ kWordBreakMaskExtendNumLet)),
};
-const uint8_t kCodePointProperties[(0xFFFF - 1) / 2 + 1] = {
+// Table of |WordBreakProperty| for each of the possible uint16_t values,
+// packed as nibbles, with the low nibble first.
+const uint8_t kCodePointProperties[32768] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x90, 0xA0,
0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0x89, 0x00, 0x00, 0x07, 0x77, 0x77, 0x77,
diff --git a/xfa/fde/cfde_wordbreak_data.h b/xfa/fde/cfde_wordbreak_data.h
index ca07b20..1465f5c 100644
--- a/xfa/fde/cfde_wordbreak_data.h
+++ b/xfa/fde/cfde_wordbreak_data.h
@@ -10,6 +10,8 @@
#include <stdint.h>
enum class WordBreakProperty : uint8_t {
+ // Internal tables depend on constants computed from these values, so do
+ // not re-order.
kNone = 0,
kCR,
kLF,