Improve RTL support for numbers with separators
Numbers with separators (e.g. 123-456, 1/1/2000, 1,111.11)
were reversed due to Issue 1994 (also partially mentioned in Issue
1456).
This fixes the bug by creating a new LeftWeak Bidi class
(see https://unicode.org/reports/tr9/). The new class makes sure that
numbers are in the same segment together with their separators.
This prevents the multiple segments inside the number from being
reversed in CFX_BidiString::SetOverallDirectionRight().
Bug: pdfium:1994, pdfium:1456
Change-Id: Ib195accc80d24a6a00ab9ed78d3c0716c70a5d2b
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/104130
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
diff --git a/AUTHORS b/AUTHORS
index a4c3f99..f8e3ed9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -16,6 +16,7 @@
Antonio Gomes <tonikitoo@igalia.com>
Chery Cherian <cherycherian@gmail.com>
Claudio DeSouza <claudiomdsjr@gmail.com>
+Dan Ilan <danilan@gmail.com>
Felix Kauselmann <licorn@gmail.com>
GiWan Go <gogil@stealien.com>
Huy Ngo <huyna89@gmail.com>
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 825be03..68c2211 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -734,7 +734,9 @@
for (int m = segment.start + segment.count; m > segment.start; --m)
AddCharInfoByRLDirection(str[m - 1], m_TempCharList[m - 1]);
} else {
- eCurrentDirection = CFX_BidiChar::Direction::kLeft;
+ if (segment.direction != CFX_BidiChar::Direction::kLeftWeak) {
+ eCurrentDirection = CFX_BidiChar::Direction::kLeft;
+ }
for (int m = segment.start; m < segment.start + segment.count; ++m)
AddCharInfoByLRDirection(str[m], m_TempCharList[m]);
}
diff --git a/core/fxcrt/fx_bidi.cpp b/core/fxcrt/fx_bidi.cpp
index 0a13ff7..756aa12 100644
--- a/core/fxcrt/fx_bidi.cpp
+++ b/core/fxcrt/fx_bidi.cpp
@@ -9,7 +9,7 @@
#include <algorithm>
#include "core/fxcrt/fx_unicode.h"
-#include "third_party/base/check.h"
+#include "third_party/base/check_op.h"
CFX_BidiChar::CFX_BidiChar()
: m_CurrentSegment({0, 0, Direction::kNeutral}),
@@ -19,9 +19,16 @@
Direction direction;
switch (pdfium::unicode::GetBidiClass(wch)) {
case FX_BIDICLASS::kL:
+ direction = Direction::kLeft;
+ break;
case FX_BIDICLASS::kAN:
case FX_BIDICLASS::kEN:
- direction = Direction::kLeft;
+ case FX_BIDICLASS::kNSM:
+ case FX_BIDICLASS::kCS:
+ case FX_BIDICLASS::kES:
+ case FX_BIDICLASS::kET:
+ case FX_BIDICLASS::kBN:
+ direction = Direction::kLeftWeak;
break;
case FX_BIDICLASS::kR:
case FX_BIDICLASS::kAL:
@@ -78,7 +85,8 @@
CFX_BidiString::~CFX_BidiString() = default;
CFX_BidiChar::Direction CFX_BidiString::OverallDirection() const {
- DCHECK(m_eOverallDirection != CFX_BidiChar::Direction::kNeutral);
+ DCHECK_NE(m_eOverallDirection, CFX_BidiChar::Direction::kNeutral);
+ DCHECK_NE(m_eOverallDirection, CFX_BidiChar::Direction::kLeftWeak);
return m_eOverallDirection;
}
diff --git a/core/fxcrt/fx_bidi.h b/core/fxcrt/fx_bidi.h
index 86e7f37..19301c4 100644
--- a/core/fxcrt/fx_bidi.h
+++ b/core/fxcrt/fx_bidi.h
@@ -16,7 +16,7 @@
// Processes characters and group them into segments based on text direction.
class CFX_BidiChar {
public:
- enum class Direction { kNeutral, kLeft, kRight };
+ enum class Direction { kNeutral, kLeft, kRight, kLeftWeak };
struct Segment {
int32_t start; // Start position.
int32_t count; // Character count.
@@ -25,7 +25,7 @@
CFX_BidiChar();
- // Append a character and classify it as left, right, or neutral.
+ // Append a character and classify it as left, left-weak, right, or neutral.
// Returns true if the character has a different direction than the
// existing direction to indicate there is a segment to process.
bool AppendChar(wchar_t wch);
diff --git a/core/fxcrt/fx_bidi_unittest.cpp b/core/fxcrt/fx_bidi_unittest.cpp
index b029819..ce2ae66 100644
--- a/core/fxcrt/fx_bidi_unittest.cpp
+++ b/core/fxcrt/fx_bidi_unittest.cpp
@@ -7,9 +7,10 @@
namespace {
-const wchar_t kNeutralChar = 32;
-const wchar_t kLeftChar = 65;
-const wchar_t kRightChar = 1424;
+const wchar_t kNeutralChar = 32; // ' '
+const wchar_t kLeftChar = 65; // 'A'
+const wchar_t kRightChar = 1488; // 'א'
+const wchar_t kLeftWeakChar = 46; // '.'
} // namespace
@@ -84,6 +85,40 @@
EXPECT_FALSE(bidi.EndChar());
}
+TEST(fxcrt, BidiCharLeftLeftWeakRight) {
+ CFX_BidiChar bidi;
+ CFX_BidiChar::Segment info;
+
+ EXPECT_TRUE(bidi.AppendChar(kLeftChar));
+ info = bidi.GetSegmentInfo();
+ EXPECT_EQ(0, info.start);
+ EXPECT_EQ(0, info.count);
+
+ EXPECT_FALSE(bidi.AppendChar(kLeftChar));
+ EXPECT_FALSE(bidi.AppendChar(kLeftChar));
+ EXPECT_TRUE(bidi.AppendChar(kLeftWeakChar));
+ info = bidi.GetSegmentInfo();
+ EXPECT_EQ(0, info.start);
+ EXPECT_EQ(3, info.count);
+
+ EXPECT_FALSE(bidi.AppendChar(kLeftWeakChar));
+ EXPECT_FALSE(bidi.AppendChar(kLeftWeakChar));
+ EXPECT_FALSE(bidi.AppendChar(kLeftWeakChar));
+ EXPECT_TRUE(bidi.AppendChar(kRightChar));
+ info = bidi.GetSegmentInfo();
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeftWeak, info.direction);
+ EXPECT_EQ(3, info.start);
+ EXPECT_EQ(4, info.count);
+
+ EXPECT_TRUE(bidi.EndChar());
+ info = bidi.GetSegmentInfo();
+ EXPECT_EQ(CFX_BidiChar::Direction::kRight, info.direction);
+ EXPECT_EQ(7, info.start);
+ EXPECT_EQ(1, info.count);
+
+ EXPECT_FALSE(bidi.EndChar());
+}
+
TEST(fxcrt, BidiCharLeftRightLeft) {
CFX_BidiChar bidi;
CFX_BidiChar::Segment info;
@@ -198,6 +233,49 @@
}
}
+TEST(fxcrt, BidiStringAllLeftWeak) {
+ {
+ const wchar_t str[] = {kLeftWeakChar, 0};
+ CFX_BidiString bidi(str);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeft, bidi.OverallDirection());
+
+ auto it = bidi.begin();
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(0, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kNeutral, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeftWeak, it->direction);
+
+ ++it;
+ EXPECT_TRUE(it == bidi.end());
+ }
+ {
+ const wchar_t str[] = {kLeftWeakChar, kLeftWeakChar, kLeftWeakChar, 0};
+ CFX_BidiString bidi(str);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeft, bidi.OverallDirection());
+
+ auto it = bidi.begin();
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(0, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kNeutral, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(3, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeftWeak, it->direction);
+
+ ++it;
+ EXPECT_TRUE(it == bidi.end());
+ }
+}
+
TEST(fxcrt, BidiStringAllRight) {
{
const wchar_t str[] = {kRightChar, 0};
@@ -323,24 +401,69 @@
EXPECT_TRUE(it == bidi.end());
}
+TEST(fxcrt, BidiStringRightLeftWeakLeftRight) {
+ const wchar_t str[] = {kRightChar, kLeftWeakChar, kLeftChar, kRightChar, 0};
+ CFX_BidiString bidi(str);
+ EXPECT_EQ(CFX_BidiChar::Direction::kRight, bidi.OverallDirection());
+
+ auto it = bidi.begin();
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(3, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kRight, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(2, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeft, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(1, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeftWeak, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kRight, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(0, it->start);
+ EXPECT_EQ(0, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kNeutral, it->direction);
+
+ ++it;
+ EXPECT_TRUE(it == bidi.end());
+}
+
TEST(fxcrt, BidiStringReverse) {
- const wchar_t str[] = {kLeftChar, kNeutralChar, kRightChar, kLeftChar, 0};
+ const wchar_t str[] = {kLeftChar, kNeutralChar, kRightChar,
+ kLeftWeakChar, kLeftChar, 0};
CFX_BidiString bidi(str);
EXPECT_EQ(CFX_BidiChar::Direction::kLeft, bidi.OverallDirection());
bidi.SetOverallDirectionRight();
auto it = bidi.begin();
ASSERT_FALSE(it == bidi.end());
- EXPECT_EQ(3, it->start);
+ EXPECT_EQ(4, it->start);
EXPECT_EQ(1, it->count);
EXPECT_EQ(CFX_BidiChar::Direction::kLeft, it->direction);
++it;
ASSERT_FALSE(it == bidi.end());
+ EXPECT_EQ(3, it->start);
+ EXPECT_EQ(1, it->count);
+ EXPECT_EQ(CFX_BidiChar::Direction::kLeftWeak, it->direction);
+
+ ++it;
+ ASSERT_FALSE(it == bidi.end());
EXPECT_EQ(2, it->start);
EXPECT_EQ(1, it->count);
EXPECT_EQ(CFX_BidiChar::Direction::kRight, it->direction);
- ASSERT_FALSE(it == bidi.end());
++it;
ASSERT_FALSE(it == bidi.end());