blob: 4b653bdabe2ec53aa921ff9476f615c2ae6cfb80 [file] [log] [blame]
Tom Sepez26b8a5b2015-01-27 12:42:36 -08001// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Lei Zhang49fa50d2018-06-08 15:31:10 +00005#include <algorithm>
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05006#include <memory>
Lei Zhang49fa50d2018-06-08 15:31:10 +00007#include <utility>
Miklos Vajna3bee9c62018-08-07 21:45:34 +00008#include <vector>
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05009
Lei Zhangfee18fe2019-07-26 23:32:51 +000010#include "build/build_config.h"
Nicolas Pena1b2b76d2018-08-17 19:54:23 +000011#include "core/fxge/fx_font.h"
Andrew Weintraub8e0c0db2018-07-17 21:42:17 +000012#include "public/cpp/fpdf_scopers.h"
Lei Zhangb4e7f302015-11-06 15:52:32 -080013#include "public/fpdf_text.h"
Lei Zhang49fa50d2018-06-08 15:31:10 +000014#include "public/fpdf_transformpage.h"
Lei Zhangb4e7f302015-11-06 15:52:32 -080015#include "public/fpdfview.h"
Wei Li091f7a02015-11-09 12:09:55 -080016#include "testing/embedder_test.h"
Lei Zhangb6992dd2019-02-05 23:30:20 +000017#include "testing/fx_string_testhelpers.h"
Tom Sepez26b8a5b2015-01-27 12:42:36 -080018#include "testing/gtest/include/gtest/gtest.h"
Lei Zhang532886d2021-06-17 19:10:08 +000019#include "third_party/base/cxx17_backports.h"
Tom Sepez26b8a5b2015-01-27 12:42:36 -080020
Tom Sepez526f6d52015-01-28 15:49:13 -080021namespace {
22
Lei Zhanga0608aa2018-06-08 14:50:49 +000023constexpr char kHelloGoodbyeText[] = "Hello, world!\r\nGoodbye, world!";
Lei Zhang4501a502020-05-18 16:52:59 +000024constexpr int kHelloGoodbyeTextSize = pdfium::size(kHelloGoodbyeText);
Lei Zhanga0608aa2018-06-08 14:50:49 +000025
Lei Zhang0f2ea022016-01-11 12:01:23 -080026bool check_unsigned_shorts(const char* expected,
27 const unsigned short* actual,
28 size_t length) {
Ryan Harrison2bf05a62017-09-05 11:48:55 -040029 if (length > strlen(expected) + 1)
Tom Sepez526f6d52015-01-28 15:49:13 -080030 return false;
Ryan Harrison2bf05a62017-09-05 11:48:55 -040031
Tom Sepez526f6d52015-01-28 15:49:13 -080032 for (size_t i = 0; i < length; ++i) {
Ryan Harrison2bf05a62017-09-05 11:48:55 -040033 if (actual[i] != static_cast<unsigned short>(expected[i]))
Tom Sepez526f6d52015-01-28 15:49:13 -080034 return false;
Tom Sepez526f6d52015-01-28 15:49:13 -080035 }
36 return true;
37}
38
39} // namespace
40
Lei Zhangab41f252018-12-23 03:10:50 +000041class FPDFTextEmbedderTest : public EmbedderTest {};
Tom Sepez26b8a5b2015-01-27 12:42:36 -080042
Lei Zhangab41f252018-12-23 03:10:50 +000043TEST_F(FPDFTextEmbedderTest, Text) {
Lei Zhang30e04982018-03-16 22:48:33 +000044 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -080045 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +000046 ASSERT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -080047
48 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +000049 ASSERT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -080050
Lei Zhang502e8592018-03-16 22:04:33 +000051 unsigned short buffer[128];
52 memset(buffer, 0xbd, sizeof(buffer));
Tom Sepez526f6d52015-01-28 15:49:13 -080053
Ryan Harrison3fc7fe52017-11-27 19:30:17 +000054 // Check that edge cases are handled gracefully
Ryan Harrisonc5ac0572017-08-31 16:37:48 -040055 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr));
Lei Zhang502e8592018-03-16 22:04:33 +000056 EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, buffer));
57 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, buffer));
58 EXPECT_EQ(1, FPDFText_GetText(textpage, 0, 0, buffer));
59 EXPECT_EQ(0, buffer[0]);
Ryan Harrisonc5ac0572017-08-31 16:37:48 -040060
Lei Zhang0839a8e2017-11-29 19:22:20 +000061 // Keep going and check the next case.
Lei Zhang502e8592018-03-16 22:04:33 +000062 memset(buffer, 0xbd, sizeof(buffer));
63 EXPECT_EQ(2, FPDFText_GetText(textpage, 0, 1, buffer));
Lei Zhanga0608aa2018-06-08 14:50:49 +000064 EXPECT_EQ(kHelloGoodbyeText[0], buffer[0]);
Lei Zhang502e8592018-03-16 22:04:33 +000065 EXPECT_EQ(0, buffer[1]);
Lei Zhang0839a8e2017-11-29 19:22:20 +000066
Tom Sepez526f6d52015-01-28 15:49:13 -080067 // Check includes the terminating NUL that is provided.
Lei Zhang502e8592018-03-16 22:04:33 +000068 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
Lei Zhanga0608aa2018-06-08 14:50:49 +000069 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
70 EXPECT_TRUE(
71 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
Tom Sepez526f6d52015-01-28 15:49:13 -080072
73 // Count does not include the terminating NUL in the string literal.
Lei Zhanga0608aa2018-06-08 14:50:49 +000074 EXPECT_EQ(kHelloGoodbyeTextSize - 1, FPDFText_CountChars(textpage));
75 for (size_t i = 0; i < kHelloGoodbyeTextSize - 1; ++i) {
76 EXPECT_EQ(static_cast<unsigned int>(kHelloGoodbyeText[i]),
Lei Zhanga0f67242015-08-17 15:39:30 -070077 FPDFText_GetUnicode(textpage, i))
78 << " at " << i;
Tom Sepez526f6d52015-01-28 15:49:13 -080079 }
80
Ryan Harrison2bf05a62017-09-05 11:48:55 -040081 // Extracting using a buffer that will be completely filled. Small buffer is
82 // 12 elements long, since it will need 2 locations per displayed character in
83 // the expected string, plus 2 more for the terminating character.
Lei Zhang502e8592018-03-16 22:04:33 +000084 static const char kSmallExpected[] = "Hello";
Ryan Harrison2bf05a62017-09-05 11:48:55 -040085 unsigned short small_buffer[12];
Lei Zhang502e8592018-03-16 22:04:33 +000086 memset(buffer, 0xbd, sizeof(buffer));
Ryan Harrison3fc7fe52017-11-27 19:30:17 +000087 EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 5, small_buffer));
Lei Zhang502e8592018-03-16 22:04:33 +000088 EXPECT_TRUE(check_unsigned_shorts(kSmallExpected, small_buffer,
89 sizeof(kSmallExpected)));
Ryan Harrison2bf05a62017-09-05 11:48:55 -040090
Tom Sepez526f6d52015-01-28 15:49:13 -080091 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
92 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
93
Lei Zhangea545a62019-11-06 22:12:00 +000094 double left = 1.0;
95 double right = 2.0;
96 double bottom = 3.0;
97 double top = 4.0;
Lei Zhang241752f2018-01-11 14:34:06 +000098 EXPECT_FALSE(FPDFText_GetCharBox(nullptr, 4, &left, &right, &bottom, &top));
Lei Zhangea545a62019-11-06 22:12:00 +000099 EXPECT_DOUBLE_EQ(1.0, left);
100 EXPECT_DOUBLE_EQ(2.0, right);
101 EXPECT_DOUBLE_EQ(3.0, bottom);
102 EXPECT_DOUBLE_EQ(4.0, top);
Lei Zhang241752f2018-01-11 14:34:06 +0000103 EXPECT_FALSE(FPDFText_GetCharBox(textpage, -1, &left, &right, &bottom, &top));
Lei Zhangea545a62019-11-06 22:12:00 +0000104 EXPECT_DOUBLE_EQ(1.0, left);
105 EXPECT_DOUBLE_EQ(2.0, right);
106 EXPECT_DOUBLE_EQ(3.0, bottom);
107 EXPECT_DOUBLE_EQ(4.0, top);
Lei Zhang241752f2018-01-11 14:34:06 +0000108 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 55, &left, &right, &bottom, &top));
Lei Zhangea545a62019-11-06 22:12:00 +0000109 EXPECT_DOUBLE_EQ(1.0, left);
110 EXPECT_DOUBLE_EQ(2.0, right);
111 EXPECT_DOUBLE_EQ(3.0, bottom);
112 EXPECT_DOUBLE_EQ(4.0, top);
113 EXPECT_FALSE(
114 FPDFText_GetCharBox(textpage, 4, nullptr, &right, &bottom, &top));
115 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, nullptr, &bottom, &top));
116 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, &right, nullptr, &top));
117 EXPECT_FALSE(
118 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, nullptr));
119 EXPECT_FALSE(
120 FPDFText_GetCharBox(textpage, 4, nullptr, nullptr, nullptr, nullptr));
Lei Zhang241752f2018-01-11 14:34:06 +0000121
122 EXPECT_TRUE(FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top));
Tom Sepez526f6d52015-01-28 15:49:13 -0800123 EXPECT_NEAR(41.071, left, 0.001);
124 EXPECT_NEAR(46.243, right, 0.001);
125 EXPECT_NEAR(49.844, bottom, 0.001);
126 EXPECT_NEAR(55.520, top, 0.001);
127
Lei Zhang8da98232019-12-11 23:29:33 +0000128 FS_RECTF rect = {4.0f, 1.0f, 3.0f, 2.0f};
129 EXPECT_FALSE(FPDFText_GetLooseCharBox(nullptr, 4, &rect));
130 EXPECT_FLOAT_EQ(4.0f, rect.left);
131 EXPECT_FLOAT_EQ(3.0f, rect.right);
132 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
133 EXPECT_FLOAT_EQ(1.0f, rect.top);
134 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, -1, &rect));
135 EXPECT_FLOAT_EQ(4.0f, rect.left);
136 EXPECT_FLOAT_EQ(3.0f, rect.right);
137 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
138 EXPECT_FLOAT_EQ(1.0f, rect.top);
139 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 55, &rect));
140 EXPECT_FLOAT_EQ(4.0f, rect.left);
141 EXPECT_FLOAT_EQ(3.0f, rect.right);
142 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
143 EXPECT_FLOAT_EQ(1.0f, rect.top);
144 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 4, nullptr));
Lei Zhangea545a62019-11-06 22:12:00 +0000145
Lei Zhang8da98232019-12-11 23:29:33 +0000146 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 4, &rect));
147 EXPECT_FLOAT_EQ(40.664001f, rect.left);
148 EXPECT_FLOAT_EQ(46.664001f, rect.right);
149 EXPECT_FLOAT_EQ(47.667271f, rect.bottom);
150 EXPECT_FLOAT_EQ(59.667271f, rect.top);
Ian Prest0f4ac582019-09-26 20:33:18 +0000151
Andrew Weintraubd3002342017-08-11 11:36:51 -0400152 double x = 0.0;
153 double y = 0.0;
154 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 4, &x, &y));
155 EXPECT_NEAR(40.664, x, 0.001);
156 EXPECT_NEAR(50.000, y, 0.001);
157
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700158 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
159 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
160 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800161
162 // Test out of range indicies.
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700163 EXPECT_EQ(-1,
164 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
165 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800166
167 // Count does not include the terminating NUL in the string literal.
Lei Zhanga0608aa2018-06-08 14:50:49 +0000168 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, kHelloGoodbyeTextSize - 1));
Tom Sepez526f6d52015-01-28 15:49:13 -0800169
170 left = 0.0;
171 right = 0.0;
172 bottom = 0.0;
173 top = 0.0;
Lei Zhang76250212018-01-11 14:28:01 +0000174 EXPECT_TRUE(FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800175 EXPECT_NEAR(20.847, left, 0.001);
176 EXPECT_NEAR(135.167, right, 0.001);
177 EXPECT_NEAR(96.655, bottom, 0.001);
Lei Zhang1d337872020-09-18 22:59:04 +0000178 EXPECT_NEAR(111.648, top, 0.001);
Tom Sepez526f6d52015-01-28 15:49:13 -0800179
180 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
181 left = -1.0;
182 right = -1.0;
183 bottom = -1.0;
184 top = -1.0;
Lei Zhang76250212018-01-11 14:28:01 +0000185 EXPECT_FALSE(FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800186 EXPECT_EQ(0.0, left);
187 EXPECT_EQ(0.0, right);
188 EXPECT_EQ(0.0, bottom);
189 EXPECT_EQ(0.0, top);
190
191 left = -2.0;
192 right = -2.0;
193 bottom = -2.0;
194 top = -2.0;
Lei Zhang76250212018-01-11 14:28:01 +0000195 EXPECT_FALSE(FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800196 EXPECT_EQ(0.0, left);
197 EXPECT_EQ(0.0, right);
198 EXPECT_EQ(0.0, bottom);
199 EXPECT_EQ(0.0, top);
200
Lei Zhang49fa50d2018-06-08 15:31:10 +0000201 EXPECT_EQ(
202 9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, nullptr, 0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800203
204 // Extract starting at character 4 as above.
Lei Zhang502e8592018-03-16 22:04:33 +0000205 memset(buffer, 0xbd, sizeof(buffer));
206 EXPECT_EQ(
207 1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 1));
Lei Zhanga0608aa2018-06-08 14:50:49 +0000208 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 1));
Lei Zhang502e8592018-03-16 22:04:33 +0000209 EXPECT_EQ(0xbdbd, buffer[1]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800210
Lei Zhang502e8592018-03-16 22:04:33 +0000211 memset(buffer, 0xbd, sizeof(buffer));
212 EXPECT_EQ(
213 9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 9));
Lei Zhanga0608aa2018-06-08 14:50:49 +0000214 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
Lei Zhang502e8592018-03-16 22:04:33 +0000215 EXPECT_EQ(0xbdbd, buffer[9]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800216
Lei Zhang502e8592018-03-16 22:04:33 +0000217 memset(buffer, 0xbd, sizeof(buffer));
Oliver Chang35e68a52015-12-09 12:44:33 -0800218 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
Lei Zhang502e8592018-03-16 22:04:33 +0000219 buffer, 128));
Lei Zhanga0608aa2018-06-08 14:50:49 +0000220 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
Lei Zhang502e8592018-03-16 22:04:33 +0000221 EXPECT_EQ(0u, buffer[9]);
222 EXPECT_EQ(0xbdbd, buffer[10]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800223
224 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700225 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800226}
227
Ian Prest0f4ac582019-09-26 20:33:18 +0000228TEST_F(FPDFTextEmbedderTest, TextVertical) {
229 ASSERT_TRUE(OpenDocument("vertical_text.pdf"));
230 FPDF_PAGE page = LoadPage(0);
231 ASSERT_TRUE(page);
232
233 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
234 ASSERT_TRUE(textpage);
235
236 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
237
238 double x = 0.0;
239 double y = 0.0;
240 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 1, &x, &y));
241 EXPECT_NEAR(6.664, x, 0.001);
242 EXPECT_NEAR(171.508, y, 0.001);
243
244 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 2, &x, &y));
245 EXPECT_NEAR(8.668, x, 0.001);
246 EXPECT_NEAR(160.492, y, 0.001);
247
Lei Zhang8da98232019-12-11 23:29:33 +0000248 FS_RECTF rect;
249 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 1, &rect));
250 EXPECT_NEAR(4, rect.left, 0.001);
251 EXPECT_NEAR(16, rect.right, 0.001);
252 EXPECT_NEAR(178.984, rect.bottom, 0.001);
253 EXPECT_NEAR(170.308, rect.top, 0.001);
Ian Prest0f4ac582019-09-26 20:33:18 +0000254
Lei Zhang8da98232019-12-11 23:29:33 +0000255 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 2, &rect));
256 EXPECT_NEAR(4, rect.left, 0.001);
257 EXPECT_NEAR(16, rect.right, 0.001);
258 EXPECT_NEAR(170.308, rect.bottom, 0.001);
259 EXPECT_NEAR(159.292, rect.top, 0.001);
Ian Prest0f4ac582019-09-26 20:33:18 +0000260
261 FPDFText_ClosePage(textpage);
262 UnloadPage(page);
263}
264
Lei Zhang0e550952020-03-27 21:42:33 +0000265TEST_F(FPDFTextEmbedderTest, TextHebrewMirrored) {
266 ASSERT_TRUE(OpenDocument("hebrew_mirrored.pdf"));
267 FPDF_PAGE page = LoadPage(0);
268 ASSERT_TRUE(page);
269
270 {
271 ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
272 ASSERT_TRUE(textpage);
273
274 constexpr int kCharCount = 10;
275 ASSERT_EQ(kCharCount, FPDFText_CountChars(textpage.get()));
276
277 unsigned short buffer[kCharCount + 1];
278 memset(buffer, 0x42, sizeof(buffer));
279 EXPECT_EQ(kCharCount + 1,
280 FPDFText_GetText(textpage.get(), 0, kCharCount, buffer));
281 EXPECT_EQ(0x05d1, buffer[0]);
282 EXPECT_EQ(0x05e0, buffer[1]);
283 EXPECT_EQ(0x05d9, buffer[2]);
284 EXPECT_EQ(0x05de, buffer[3]);
285 EXPECT_EQ(0x05d9, buffer[4]);
286 EXPECT_EQ(0x05df, buffer[5]);
287 EXPECT_EQ(0x000d, buffer[6]);
288 EXPECT_EQ(0x000a, buffer[7]);
289 EXPECT_EQ(0x05df, buffer[8]);
290 EXPECT_EQ(0x05d1, buffer[9]);
291 }
292
293 UnloadPage(page);
294}
295
Lei Zhangab41f252018-12-23 03:10:50 +0000296TEST_F(FPDFTextEmbedderTest, TextSearch) {
Lei Zhang30e04982018-03-16 22:48:33 +0000297 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800298 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000299 ASSERT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800300
301 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000302 ASSERT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -0800303
Lei Zhangf0f67682019-04-08 17:03:21 +0000304 ScopedFPDFWideString nope = GetFPDFWideString(L"nope");
305 ScopedFPDFWideString world = GetFPDFWideString(L"world");
306 ScopedFPDFWideString world_caps = GetFPDFWideString(L"WORLD");
307 ScopedFPDFWideString world_substr = GetFPDFWideString(L"orld");
Tom Sepez526f6d52015-01-28 15:49:13 -0800308
Lei Zhang6589cf62019-02-11 21:35:17 +0000309 {
Lei Zhangc3cbf482019-02-11 21:44:32 +0000310 // No occurrences of "nope" in test page.
Lei Zhang6589cf62019-02-11 21:35:17 +0000311 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, nope.get(), 0, 0));
312 EXPECT_TRUE(search);
313 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
314 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800315
Lei Zhang6589cf62019-02-11 21:35:17 +0000316 // Advancing finds nothing.
317 EXPECT_FALSE(FPDFText_FindNext(search.get()));
318 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
319 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800320
Lei Zhang6589cf62019-02-11 21:35:17 +0000321 // Retreating finds nothing.
322 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
323 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
324 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
325 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800326
Lei Zhang6589cf62019-02-11 21:35:17 +0000327 {
Lei Zhangc3cbf482019-02-11 21:44:32 +0000328 // Two occurrences of "world" in test page.
Lei Zhang6589cf62019-02-11 21:35:17 +0000329 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world.get(), 0, 2));
330 EXPECT_TRUE(search);
Tom Sepez526f6d52015-01-28 15:49:13 -0800331
Lei Zhang6589cf62019-02-11 21:35:17 +0000332 // Remains not found until advanced.
333 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
334 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800335
Lei Zhangc3cbf482019-02-11 21:44:32 +0000336 // First occurrence of "world" in this test page.
Lei Zhang6589cf62019-02-11 21:35:17 +0000337 EXPECT_TRUE(FPDFText_FindNext(search.get()));
338 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
339 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800340
Lei Zhangc3cbf482019-02-11 21:44:32 +0000341 // Last occurrence of "world" in this test page.
Lei Zhang6589cf62019-02-11 21:35:17 +0000342 EXPECT_TRUE(FPDFText_FindNext(search.get()));
343 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
344 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800345
Lei Zhang6589cf62019-02-11 21:35:17 +0000346 // Found position unchanged when fails to advance.
347 EXPECT_FALSE(FPDFText_FindNext(search.get()));
348 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
349 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800350
Lei Zhangc3cbf482019-02-11 21:44:32 +0000351 // Back to first occurrence.
Lei Zhang6589cf62019-02-11 21:35:17 +0000352 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
353 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
354 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
Tom Sepez526f6d52015-01-28 15:49:13 -0800355
Lei Zhang6589cf62019-02-11 21:35:17 +0000356 // Found position unchanged when fails to retreat.
357 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
358 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
359 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
360 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800361
Lei Zhang6589cf62019-02-11 21:35:17 +0000362 {
363 // Exact search unaffected by case sensitiity and whole word flags.
364 ScopedFPDFTextFind search(FPDFText_FindStart(
365 textpage, world.get(), FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0));
366 EXPECT_TRUE(search);
367 EXPECT_TRUE(FPDFText_FindNext(search.get()));
368 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
369 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
370 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800371
Lei Zhang6589cf62019-02-11 21:35:17 +0000372 {
373 // Default is case-insensitive, so matching agaist caps works.
374 ScopedFPDFTextFind search(
375 FPDFText_FindStart(textpage, world_caps.get(), 0, 0));
376 EXPECT_TRUE(search);
377 EXPECT_TRUE(FPDFText_FindNext(search.get()));
378 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
379 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
380 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800381
Lei Zhang6589cf62019-02-11 21:35:17 +0000382 {
383 // But can be made case sensitive, in which case this fails.
384 ScopedFPDFTextFind search(
385 FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0));
386 EXPECT_FALSE(FPDFText_FindNext(search.get()));
387 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
388 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
389 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800390
Lei Zhang6589cf62019-02-11 21:35:17 +0000391 {
392 // Default is match anywhere within word, so matching substring works.
393 ScopedFPDFTextFind search(
394 FPDFText_FindStart(textpage, world_substr.get(), 0, 0));
395 EXPECT_TRUE(FPDFText_FindNext(search.get()));
396 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search.get()));
397 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
398 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800399
Lei Zhang6589cf62019-02-11 21:35:17 +0000400 {
401 // But can be made to mach word boundaries, in which case this fails.
402 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world_substr.get(),
403 FPDF_MATCHWHOLEWORD, 0));
404 EXPECT_FALSE(FPDFText_FindNext(search.get()));
405 // TODO(tsepez): investigate strange index/count values in this state.
406 }
Tom Sepez526f6d52015-01-28 15:49:13 -0800407
408 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700409 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800410}
411
Lei Zhangc3cbf482019-02-11 21:44:32 +0000412TEST_F(FPDFTextEmbedderTest, TextSearchConsecutive) {
413 ASSERT_TRUE(OpenDocument("find_text_consecutive.pdf"));
414 FPDF_PAGE page = LoadPage(0);
415 ASSERT_TRUE(page);
416
417 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
418 ASSERT_TRUE(textpage);
419
Lei Zhangf0f67682019-04-08 17:03:21 +0000420 ScopedFPDFWideString aaaa = GetFPDFWideString(L"aaaa");
Lei Zhangc3cbf482019-02-11 21:44:32 +0000421
422 {
423 // Search for "aaaa" yields 2 results in "aaaaaaaaaa".
424 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, aaaa.get(), 0, 0));
425 EXPECT_TRUE(search);
426
427 // Remains not found until advanced.
428 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
429 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
430
431 // First occurrence of "aaaa" in this test page.
432 EXPECT_TRUE(FPDFText_FindNext(search.get()));
433 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
434 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
435
436 // Last occurrence of "aaaa" in this test page.
437 EXPECT_TRUE(FPDFText_FindNext(search.get()));
438 EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
439 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
440
441 // Found position unchanged when fails to advance.
442 EXPECT_FALSE(FPDFText_FindNext(search.get()));
443 EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
444 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
445
446 // Back to first occurrence.
447 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
448 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
449 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
450
451 // Found position unchanged when fails to retreat.
452 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
453 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
454 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
455 }
456
457 {
458 // Search for "aaaa" yields 7 results in "aaaaaaaaaa", when searching with
459 // FPDF_CONSECUTIVE.
460 ScopedFPDFTextFind search(
461 FPDFText_FindStart(textpage, aaaa.get(), FPDF_CONSECUTIVE, 0));
462 EXPECT_TRUE(search);
463
464 // Remains not found until advanced.
465 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
466 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
467
468 // Find consecutive occurrences of "aaaa" in this test page:
469 for (int i = 0; i < 7; ++i) {
470 EXPECT_TRUE(FPDFText_FindNext(search.get()));
471 EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
472 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
473 }
474
475 // Found position unchanged when fails to advance.
476 EXPECT_FALSE(FPDFText_FindNext(search.get()));
477 EXPECT_EQ(6, FPDFText_GetSchResultIndex(search.get()));
478 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
479
480 for (int i = 5; i >= 0; --i) {
481 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
482 EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
483 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
484 }
485
486 // Found position unchanged when fails to retreat.
487 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
488 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
489 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
490 }
491
492 FPDFText_ClosePage(textpage);
493 UnloadPage(page);
494}
495
Lei Zhangfee18fe2019-07-26 23:32:51 +0000496// Fails on Windows. https://crbug.com/pdfium/1370
497#if defined(OS_WIN)
498#define MAYBE_TextSearchLatinExtended DISABLED_TextSearchLatinExtended
499#else
500#define MAYBE_TextSearchLatinExtended TextSearchLatinExtended
501#endif
502TEST_F(FPDFTextEmbedderTest, MAYBE_TextSearchLatinExtended) {
503 ASSERT_TRUE(OpenDocument("latin_extended.pdf"));
504 FPDF_PAGE page = LoadPage(0);
505 ASSERT_TRUE(page);
506
507 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
508 ASSERT_TRUE(textpage);
509
510 // Upper/lowercase 'a' with breve.
511 constexpr FPDF_WCHAR kNeedleUpper[] = {0x0102, 0x0000};
512 constexpr FPDF_WCHAR kNeedleLower[] = {0x0103, 0x0000};
513
514 for (const auto* needle : {kNeedleUpper, kNeedleLower}) {
515 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, needle, 0, 0));
516 EXPECT_TRUE(search);
517 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
518 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
519
520 // Should find 2 results at position 21/22, both with length 1.
521 EXPECT_TRUE(FPDFText_FindNext(search.get()));
522 EXPECT_EQ(2, FPDFText_GetSchResultIndex(search.get()));
523 EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
524 EXPECT_TRUE(FPDFText_FindNext(search.get()));
525 EXPECT_EQ(3, FPDFText_GetSchResultIndex(search.get()));
526 EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
527 // And no more than 2 results.
528 EXPECT_FALSE(FPDFText_FindNext(search.get()));
529 }
530
531 FPDFText_ClosePage(textpage);
532 UnloadPage(page);
533}
534
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800535// Test that the page has characters despite a bad stream length.
Lei Zhangab41f252018-12-23 03:10:50 +0000536TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
Lei Zhang30e04982018-03-16 22:48:33 +0000537 ASSERT_TRUE(OpenDocument("bug_57.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800538 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000539 ASSERT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800540
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800541 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000542 ASSERT_TRUE(textpage);
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800543 EXPECT_EQ(13, FPDFText_CountChars(textpage));
Tom Sepez526f6d52015-01-28 15:49:13 -0800544
545 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700546 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800547}
548
Lei Zhangab41f252018-12-23 03:10:50 +0000549TEST_F(FPDFTextEmbedderTest, WebLinks) {
Lei Zhang30e04982018-03-16 22:48:33 +0000550 ASSERT_TRUE(OpenDocument("weblinks.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800551 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000552 ASSERT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800553
554 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000555 ASSERT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -0800556
Andrew Weintraub8e0c0db2018-07-17 21:42:17 +0000557 {
558 ScopedFPDFPageLink pagelink(FPDFLink_LoadWebLinks(textpage));
559 EXPECT_TRUE(pagelink);
560
561 // Page contains two HTTP-style URLs.
562 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink.get()));
563
564 // Only a terminating NUL required for bogus links.
565 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 2, nullptr, 0));
566 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 1400, nullptr, 0));
567 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), -1, nullptr, 0));
568 }
569
Tom Sepez526f6d52015-01-28 15:49:13 -0800570 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
thestig4997b222016-06-07 10:46:22 -0700571 EXPECT_TRUE(pagelink);
Tom Sepez526f6d52015-01-28 15:49:13 -0800572
Tom Sepez526f6d52015-01-28 15:49:13 -0800573 // Query the number of characters required for each link (incl NUL).
Oliver Chang35e68a52015-12-09 12:44:33 -0800574 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
575 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800576
Lei Zhangd27acae2015-05-15 15:36:02 -0700577 static const char expected_url[] = "http://example.com?q=foo";
Wei Li05d53f02016-03-29 16:42:53 -0700578 static const size_t expected_len = sizeof(expected_url);
Lei Zhang502e8592018-03-16 22:04:33 +0000579 unsigned short buffer[128];
Tom Sepez526f6d52015-01-28 15:49:13 -0800580
581 // Retrieve a link with too small a buffer. Buffer will not be
582 // NUL-terminated, but must not be modified past indicated length,
583 // so pre-fill with a pattern to check write bounds.
Lei Zhang502e8592018-03-16 22:04:33 +0000584 memset(buffer, 0xbd, sizeof(buffer));
585 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, buffer, 1));
586 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, 1));
587 EXPECT_EQ(0xbdbd, buffer[1]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800588
589 // Check buffer that doesn't have space for a terminating NUL.
Lei Zhang502e8592018-03-16 22:04:33 +0000590 memset(buffer, 0xbd, sizeof(buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700591 EXPECT_EQ(static_cast<int>(expected_len - 1),
Lei Zhang502e8592018-03-16 22:04:33 +0000592 FPDFLink_GetURL(pagelink, 0, buffer, expected_len - 1));
593 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len - 1));
594 EXPECT_EQ(0xbdbd, buffer[expected_len - 1]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800595
596 // Retreive link with exactly-sized buffer.
Lei Zhang502e8592018-03-16 22:04:33 +0000597 memset(buffer, 0xbd, sizeof(buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700598 EXPECT_EQ(static_cast<int>(expected_len),
Lei Zhang502e8592018-03-16 22:04:33 +0000599 FPDFLink_GetURL(pagelink, 0, buffer, expected_len));
600 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
601 EXPECT_EQ(0u, buffer[expected_len - 1]);
602 EXPECT_EQ(0xbdbd, buffer[expected_len]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800603
604 // Retreive link with ample-sized-buffer.
Lei Zhang502e8592018-03-16 22:04:33 +0000605 memset(buffer, 0xbd, sizeof(buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700606 EXPECT_EQ(static_cast<int>(expected_len),
Lei Zhang502e8592018-03-16 22:04:33 +0000607 FPDFLink_GetURL(pagelink, 0, buffer, 128));
608 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
609 EXPECT_EQ(0u, buffer[expected_len - 1]);
610 EXPECT_EQ(0xbdbd, buffer[expected_len]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800611
612 // Each link rendered in a single rect in this test page.
613 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
614 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
615
616 // Each link rendered in a single rect in this test page.
617 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
618 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
619 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
620
621 // Check boundary of valid link index with valid rect index.
622 double left = 0.0;
623 double right = 0.0;
624 double top = 0.0;
625 double bottom = 0.0;
Lei Zhang50bd8fa2018-01-10 17:33:06 +0000626 EXPECT_TRUE(FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800627 EXPECT_NEAR(50.791, left, 0.001);
628 EXPECT_NEAR(187.963, right, 0.001);
629 EXPECT_NEAR(97.624, bottom, 0.001);
630 EXPECT_NEAR(108.736, top, 0.001);
631
632 // Check that valid link with invalid rect index leaves parameters unchanged.
633 left = -1.0;
634 right = -1.0;
635 top = -1.0;
636 bottom = -1.0;
Lei Zhang50bd8fa2018-01-10 17:33:06 +0000637 EXPECT_FALSE(FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800638 EXPECT_EQ(-1.0, left);
639 EXPECT_EQ(-1.0, right);
640 EXPECT_EQ(-1.0, bottom);
641 EXPECT_EQ(-1.0, top);
642
643 // Check that invalid link index leaves parameters unchanged.
644 left = -2.0;
645 right = -2.0;
646 top = -2.0;
647 bottom = -2.0;
Lei Zhang50bd8fa2018-01-10 17:33:06 +0000648 EXPECT_FALSE(FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom));
Tom Sepez526f6d52015-01-28 15:49:13 -0800649 EXPECT_EQ(-2.0, left);
650 EXPECT_EQ(-2.0, right);
651 EXPECT_EQ(-2.0, bottom);
652 EXPECT_EQ(-2.0, top);
653
654 FPDFLink_CloseWebLinks(pagelink);
655 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700656 UnloadPage(page);
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800657}
Lei Zhang0f2ea022016-01-11 12:01:23 -0800658
Lei Zhangab41f252018-12-23 03:10:50 +0000659TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLines) {
Lei Zhang30e04982018-03-16 22:48:33 +0000660 ASSERT_TRUE(OpenDocument("weblinks_across_lines.pdf"));
Wei Li76309072017-03-16 17:31:03 -0700661 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000662 ASSERT_TRUE(page);
Wei Li76309072017-03-16 17:31:03 -0700663
664 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000665 ASSERT_TRUE(textpage);
Wei Li76309072017-03-16 17:31:03 -0700666
667 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
668 EXPECT_TRUE(pagelink);
669
670 static const char* const kExpectedUrls[] = {
Wei Li6c8ed642017-05-19 22:17:38 -0700671 "http://example.com", // from "http://www.example.com?\r\nfoo"
Wei Li76309072017-03-16 17:31:03 -0700672 "http://example.com/", // from "http://www.example.com/\r\nfoo"
673 "http://example.com/test-foo", // from "http://example.com/test-\r\nfoo"
674 "http://abc.com/test-foo", // from "http://abc.com/test-\r\n\r\nfoo"
675 // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/"
Manoj Biswas2016aa02019-07-16 20:19:45 +0000676 "http://example.com/",
677 "http://www.abc.com",
Wei Li76309072017-03-16 17:31:03 -0700678 };
Lei Zhang4501a502020-05-18 16:52:59 +0000679 static const int kNumLinks = static_cast<int>(pdfium::size(kExpectedUrls));
Wei Li76309072017-03-16 17:31:03 -0700680
681 EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink));
682
Lei Zhang502e8592018-03-16 22:04:33 +0000683 unsigned short buffer[128];
Wei Li76309072017-03-16 17:31:03 -0700684 for (int i = 0; i < kNumLinks; i++) {
685 const size_t expected_len = strlen(kExpectedUrls[i]) + 1;
Lei Zhang3e873fa2019-01-23 21:15:50 +0000686 memset(buffer, 0, sizeof(buffer));
Wei Li76309072017-03-16 17:31:03 -0700687 EXPECT_EQ(static_cast<int>(expected_len),
688 FPDFLink_GetURL(pagelink, i, nullptr, 0));
Lei Zhang502e8592018-03-16 22:04:33 +0000689 EXPECT_EQ(static_cast<int>(expected_len),
Lei Zhang4501a502020-05-18 16:52:59 +0000690 FPDFLink_GetURL(pagelink, i, buffer, pdfium::size(buffer)));
Lei Zhang502e8592018-03-16 22:04:33 +0000691 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrls[i], buffer, expected_len));
Wei Li76309072017-03-16 17:31:03 -0700692 }
693
694 FPDFLink_CloseWebLinks(pagelink);
695 FPDFText_ClosePage(textpage);
696 UnloadPage(page);
697}
698
Lei Zhangab41f252018-12-23 03:10:50 +0000699TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLinesBug) {
Lei Zhang30e04982018-03-16 22:48:33 +0000700 ASSERT_TRUE(OpenDocument("bug_650.pdf"));
Wei Li76309072017-03-16 17:31:03 -0700701 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000702 ASSERT_TRUE(page);
Wei Li76309072017-03-16 17:31:03 -0700703
704 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000705 ASSERT_TRUE(textpage);
Wei Li76309072017-03-16 17:31:03 -0700706
707 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
708 EXPECT_TRUE(pagelink);
709
710 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
Lei Zhang502e8592018-03-16 22:04:33 +0000711 unsigned short buffer[128] = {0};
Wei Li76309072017-03-16 17:31:03 -0700712 static const char kExpectedUrl[] =
713 "http://tutorial45.com/learn-autocad-basics-day-166/";
714 static const int kUrlSize = static_cast<int>(sizeof(kExpectedUrl));
715
716 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
Lei Zhang502e8592018-03-16 22:04:33 +0000717 EXPECT_EQ(kUrlSize,
Lei Zhang4501a502020-05-18 16:52:59 +0000718 FPDFLink_GetURL(pagelink, 1, buffer, pdfium::size(buffer)));
Lei Zhang502e8592018-03-16 22:04:33 +0000719 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, buffer, kUrlSize));
Wei Li76309072017-03-16 17:31:03 -0700720
721 FPDFLink_CloseWebLinks(pagelink);
722 FPDFText_ClosePage(textpage);
723 UnloadPage(page);
724}
725
Ankit Kumar 🌪️5cf6e202019-07-04 22:42:08 +0000726TEST_F(FPDFTextEmbedderTest, WebLinksCharRanges) {
727 ASSERT_TRUE(OpenDocument("weblinks.pdf"));
728 FPDF_PAGE page = LoadPage(0);
729 ASSERT_TRUE(page);
730
731 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
732 ASSERT_TRUE(text_page);
733
734 FPDF_PAGELINK page_link = FPDFLink_LoadWebLinks(text_page);
735 EXPECT_TRUE(page_link);
736
737 // Test for char indices of a valid link
738 int start_char_index;
739 int char_count;
740 ASSERT_TRUE(
741 FPDFLink_GetTextRange(page_link, 0, &start_char_index, &char_count));
742 EXPECT_EQ(35, start_char_index);
743 EXPECT_EQ(24, char_count);
744
745 // Test for char indices of an invalid link
746 start_char_index = -10;
747 char_count = -8;
748 ASSERT_FALSE(
749 FPDFLink_GetTextRange(page_link, 6, &start_char_index, &char_count));
750 EXPECT_EQ(start_char_index, -10);
751 EXPECT_EQ(char_count, -8);
752
753 // Test for pagelink = nullptr
754 start_char_index = -10;
755 char_count = -8;
756 ASSERT_FALSE(
757 FPDFLink_GetTextRange(nullptr, 0, &start_char_index, &char_count));
758 EXPECT_EQ(start_char_index, -10);
759 EXPECT_EQ(char_count, -8);
760
761 // Test for link_index < 0
762 start_char_index = -10;
763 char_count = -8;
764 ASSERT_FALSE(
765 FPDFLink_GetTextRange(page_link, -4, &start_char_index, &char_count));
766 EXPECT_EQ(start_char_index, -10);
767 EXPECT_EQ(char_count, -8);
768
769 FPDFLink_CloseWebLinks(page_link);
770 FPDFText_ClosePage(text_page);
771 UnloadPage(page);
772}
773
Manoj Biswas2016aa02019-07-16 20:19:45 +0000774TEST_F(FPDFTextEmbedderTest, AnnotLinks) {
Lei Zhang37991e72020-04-09 18:15:51 +0000775 ASSERT_TRUE(OpenDocument("annots.pdf"));
Manoj Biswas2016aa02019-07-16 20:19:45 +0000776 FPDF_PAGE page = LoadPage(0);
777 ASSERT_TRUE(page);
778
779 // Get link count via checking annotation subtype
780 int annot_count = FPDFPage_GetAnnotCount(page);
Lei Zhang37991e72020-04-09 18:15:51 +0000781 ASSERT_EQ(9, annot_count);
Manoj Biswas2016aa02019-07-16 20:19:45 +0000782 int annot_subtype_link_count = 0;
783 for (int i = 0; i < annot_count; ++i) {
784 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
785 if (FPDFAnnot_GetSubtype(annot.get()) == FPDF_ANNOT_LINK) {
786 ++annot_subtype_link_count;
787 }
788 }
789 EXPECT_EQ(4, annot_subtype_link_count);
790
791 // Validate that FPDFLink_Enumerate() returns same number of links
792 int start_pos = 0;
793 FPDF_LINK link_annot;
794 int link_count = 0;
795 while (FPDFLink_Enumerate(page, &start_pos, &link_annot)) {
796 ASSERT_TRUE(link_annot);
797 if (start_pos == 1 || start_pos == 2) {
798 // First two links point to first and second page within the document
799 // respectively
800 FPDF_DEST link_dest = FPDFLink_GetDest(document(), link_annot);
801 EXPECT_TRUE(link_dest);
802 EXPECT_EQ(start_pos - 1,
803 FPDFDest_GetDestPageIndex(document(), link_dest));
804 } else if (start_pos == 3) { // points to PDF Spec URL
805 FS_RECTF link_rect;
806 EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
807 EXPECT_NEAR(66.0, link_rect.left, 0.001);
808 EXPECT_NEAR(544.0, link_rect.top, 0.001);
809 EXPECT_NEAR(196.0, link_rect.right, 0.001);
810 EXPECT_NEAR(529.0, link_rect.bottom, 0.001);
811 } else if (start_pos == 4) { // this link has quad points
812 int quad_point_count = FPDFLink_CountQuadPoints(link_annot);
813 EXPECT_EQ(1, quad_point_count);
814 FS_QUADPOINTSF quad_points;
815 EXPECT_TRUE(FPDFLink_GetQuadPoints(link_annot, 0, &quad_points));
816 EXPECT_NEAR(83.0, quad_points.x1, 0.001);
817 EXPECT_NEAR(453.0, quad_points.y1, 0.001);
818 EXPECT_NEAR(178.0, quad_points.x2, 0.001);
819 EXPECT_NEAR(453.0, quad_points.y2, 0.001);
820 EXPECT_NEAR(83.0, quad_points.x3, 0.001);
821 EXPECT_NEAR(440.0, quad_points.y3, 0.001);
822 EXPECT_NEAR(178.0, quad_points.x4, 0.001);
823 EXPECT_NEAR(440.0, quad_points.y4, 0.001);
824 // AnnotRect is same as quad points for this link
825 FS_RECTF link_rect;
826 EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
827 EXPECT_NEAR(link_rect.left, quad_points.x1, 0.001);
828 EXPECT_NEAR(link_rect.top, quad_points.y1, 0.001);
829 EXPECT_NEAR(link_rect.right, quad_points.x4, 0.001);
830 EXPECT_NEAR(link_rect.bottom, quad_points.y4, 0.001);
831 }
832 ++link_count;
833 }
834 EXPECT_EQ(annot_subtype_link_count, link_count);
835
836 UnloadPage(page);
837}
838
Lei Zhangab41f252018-12-23 03:10:50 +0000839TEST_F(FPDFTextEmbedderTest, GetFontSize) {
Lei Zhang30e04982018-03-16 22:48:33 +0000840 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
Lei Zhang0f2ea022016-01-11 12:01:23 -0800841 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000842 ASSERT_TRUE(page);
Lei Zhang0f2ea022016-01-11 12:01:23 -0800843
844 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000845 ASSERT_TRUE(textpage);
Lei Zhang0f2ea022016-01-11 12:01:23 -0800846
847 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
848 12, 12, 12, 1, 1, 16, 16, 16, 16, 16,
849 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
850
851 int count = FPDFText_CountChars(textpage);
Lei Zhang4501a502020-05-18 16:52:59 +0000852 ASSERT_EQ(pdfium::size(kExpectedFontsSizes), static_cast<size_t>(count));
Lei Zhang0f2ea022016-01-11 12:01:23 -0800853 for (int i = 0; i < count; ++i)
854 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
855
856 FPDFText_ClosePage(textpage);
857 UnloadPage(page);
858}
npm84be3a32016-09-15 13:27:21 -0700859
Lei Zhangab41f252018-12-23 03:10:50 +0000860TEST_F(FPDFTextEmbedderTest, GetFontInfo) {
Nicolas Pena1b2b76d2018-08-17 19:54:23 +0000861 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
862 FPDF_PAGE page = LoadPage(0);
863 ASSERT_TRUE(page);
864
865 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
866 ASSERT_TRUE(textpage);
867 std::vector<char> font_name;
868 size_t num_chars1 = strlen("Hello, world!");
869 const char kExpectedFontName1[] = "Times-Roman";
870
871 for (size_t i = 0; i < num_chars1; i++) {
872 int flags = -1;
873 unsigned long length =
874 FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
875 static constexpr unsigned long expected_length = sizeof(kExpectedFontName1);
876 ASSERT_EQ(expected_length, length);
877 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
878 font_name.resize(length);
879 std::fill(font_name.begin(), font_name.end(), 'a');
880 flags = -1;
881 EXPECT_EQ(expected_length,
882 FPDFText_GetFontInfo(textpage, i, font_name.data(),
883 font_name.size(), &flags));
884 EXPECT_STREQ(kExpectedFontName1, font_name.data());
885 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
886 }
887 // If the size of the buffer is not large enough, the buffer should remain
888 // unchanged.
889 font_name.pop_back();
890 std::fill(font_name.begin(), font_name.end(), 'a');
891 EXPECT_EQ(sizeof(kExpectedFontName1),
892 FPDFText_GetFontInfo(textpage, 0, font_name.data(),
893 font_name.size(), nullptr));
894 for (char a : font_name)
895 EXPECT_EQ('a', a);
896
897 // The text is "Hello, world!\r\nGoodbye, world!", so the next two characters
898 // do not have any font information.
899 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1, font_name.data(),
900 font_name.size(), nullptr));
901 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1 + 1, font_name.data(),
902 font_name.size(), nullptr));
903
904 size_t num_chars2 = strlen("Goodbye, world!");
905 const char kExpectedFontName2[] = "Helvetica";
906 for (size_t i = num_chars1 + 2; i < num_chars1 + num_chars2 + 2; i++) {
907 int flags = -1;
908 unsigned long length =
909 FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
910 static constexpr unsigned long expected_length = sizeof(kExpectedFontName2);
911 ASSERT_EQ(expected_length, length);
912 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
913 font_name.resize(length);
914 std::fill(font_name.begin(), font_name.end(), 'a');
915 flags = -1;
916 EXPECT_EQ(expected_length,
917 FPDFText_GetFontInfo(textpage, i, font_name.data(),
918 font_name.size(), &flags));
919 EXPECT_STREQ(kExpectedFontName2, font_name.data());
920 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
921 }
922
923 // Now try some out of bounds indices and null pointers to make sure we do not
924 // crash.
925 // No textpage.
926 EXPECT_EQ(0u, FPDFText_GetFontInfo(nullptr, 0, font_name.data(),
927 font_name.size(), nullptr));
928 // No buffer.
929 EXPECT_EQ(sizeof(kExpectedFontName1),
930 FPDFText_GetFontInfo(textpage, 0, nullptr, 0, nullptr));
931 // Negative index.
932 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, -1, font_name.data(),
933 font_name.size(), nullptr));
934 // Out of bounds index.
935 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, 1000, font_name.data(),
936 font_name.size(), nullptr));
937
938 FPDFText_ClosePage(textpage);
939 UnloadPage(page);
940}
941
Lei Zhangab41f252018-12-23 03:10:50 +0000942TEST_F(FPDFTextEmbedderTest, ToUnicode) {
Lei Zhang30e04982018-03-16 22:48:33 +0000943 ASSERT_TRUE(OpenDocument("bug_583.pdf"));
npm84be3a32016-09-15 13:27:21 -0700944 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000945 ASSERT_TRUE(page);
npm84be3a32016-09-15 13:27:21 -0700946
947 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000948 ASSERT_TRUE(textpage);
npm84be3a32016-09-15 13:27:21 -0700949
950 ASSERT_EQ(1, FPDFText_CountChars(textpage));
Lei Zhanga0608aa2018-06-08 14:50:49 +0000951 EXPECT_EQ(0U, FPDFText_GetUnicode(textpage, 0));
npm84be3a32016-09-15 13:27:21 -0700952
953 FPDFText_ClosePage(textpage);
954 UnloadPage(page);
955}
Lei Zhang65f31622017-10-24 08:36:44 -0700956
Lei Zhangab41f252018-12-23 03:10:50 +0000957TEST_F(FPDFTextEmbedderTest, Bug_921) {
Lei Zhang30e04982018-03-16 22:48:33 +0000958 ASSERT_TRUE(OpenDocument("bug_921.pdf"));
Lei Zhang65f31622017-10-24 08:36:44 -0700959 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000960 ASSERT_TRUE(page);
Lei Zhang65f31622017-10-24 08:36:44 -0700961
962 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000963 ASSERT_TRUE(textpage);
Lei Zhang65f31622017-10-24 08:36:44 -0700964
965 static constexpr unsigned int kData[] = {
966 1095, 1077, 1083, 1086, 1074, 1077, 1095, 1077, 1089, 1082, 1086, 1077,
967 32, 1089, 1090, 1088, 1072, 1076, 1072, 1085, 1080, 1077, 46, 32};
968 static constexpr int kStartIndex = 238;
969
970 ASSERT_EQ(268, FPDFText_CountChars(textpage));
Lei Zhang4501a502020-05-18 16:52:59 +0000971 for (size_t i = 0; i < pdfium::size(kData); ++i)
Lei Zhang65f31622017-10-24 08:36:44 -0700972 EXPECT_EQ(kData[i], FPDFText_GetUnicode(textpage, kStartIndex + i));
973
Lei Zhang4501a502020-05-18 16:52:59 +0000974 unsigned short buffer[pdfium::size(kData) + 1];
Lei Zhang65f31622017-10-24 08:36:44 -0700975 memset(buffer, 0xbd, sizeof(buffer));
976 int count =
Lei Zhang4501a502020-05-18 16:52:59 +0000977 FPDFText_GetText(textpage, kStartIndex, pdfium::size(kData), buffer);
Lei Zhang65f31622017-10-24 08:36:44 -0700978 ASSERT_GT(count, 0);
Lei Zhang4501a502020-05-18 16:52:59 +0000979 ASSERT_EQ(pdfium::size(kData) + 1, static_cast<size_t>(count));
980 for (size_t i = 0; i < pdfium::size(kData); ++i)
Lei Zhang65f31622017-10-24 08:36:44 -0700981 EXPECT_EQ(kData[i], buffer[i]);
Lei Zhang4501a502020-05-18 16:52:59 +0000982 EXPECT_EQ(0, buffer[pdfium::size(kData)]);
Lei Zhang65f31622017-10-24 08:36:44 -0700983
984 FPDFText_ClosePage(textpage);
985 UnloadPage(page);
986}
Ryan Harrison9d0d7c82017-11-20 19:32:50 +0000987
Lei Zhangab41f252018-12-23 03:10:50 +0000988TEST_F(FPDFTextEmbedderTest, GetTextWithHyphen) {
Lei Zhang30e04982018-03-16 22:48:33 +0000989 ASSERT_TRUE(OpenDocument("bug_781804.pdf"));
Ryan Harrison9d0d7c82017-11-20 19:32:50 +0000990 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +0000991 ASSERT_TRUE(page);
Ryan Harrison9d0d7c82017-11-20 19:32:50 +0000992
993 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +0000994 ASSERT_TRUE(textpage);
Ryan Harrison9d0d7c82017-11-20 19:32:50 +0000995
996 // Check that soft hyphens are not included
997 // Expecting 'Veritaserum', except there is a \uFFFE where the hyphen was in
998 // the original text. This is a weird thing that Adobe does, which we
999 // replicate.
1000 constexpr unsigned short soft_expected[] = {
1001 0x0056, 0x0065, 0x0072, 0x0069, 0x0074, 0x0061, 0xfffe,
1002 0x0073, 0x0065, 0x0072, 0x0075, 0x006D, 0x0000};
1003 {
Lei Zhang4501a502020-05-18 16:52:59 +00001004 constexpr int count = pdfium::size(soft_expected) - 1;
1005 unsigned short buffer[pdfium::size(soft_expected)];
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001006 memset(buffer, 0, sizeof(buffer));
1007
Ryan Harrison3fc7fe52017-11-27 19:30:17 +00001008 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, 0, count, buffer));
1009 for (int i = 0; i < count; i++)
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001010 EXPECT_EQ(soft_expected[i], buffer[i]);
1011 }
1012
1013 // Check that hard hyphens are included
1014 {
1015 // There isn't the \0 in the actual doc, but there is a \r\n, so need to
1016 // add 1 to get aligned.
Lei Zhang4501a502020-05-18 16:52:59 +00001017 constexpr size_t offset = pdfium::size(soft_expected) + 1;
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001018 // Expecting 'User-\r\ngenerated', the - is a unicode character, so cannnot
1019 // store in a char[].
1020 constexpr unsigned short hard_expected[] = {
1021 0x0055, 0x0073, 0x0065, 0x0072, 0x2010, 0x000d, 0x000a, 0x0067, 0x0065,
1022 0x006e, 0x0065, 0x0072, 0x0061, 0x0074, 0x0065, 0x0064, 0x0000};
Lei Zhang4501a502020-05-18 16:52:59 +00001023 constexpr int count = pdfium::size(hard_expected) - 1;
1024 unsigned short buffer[pdfium::size(hard_expected)];
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001025
Ryan Harrison3fc7fe52017-11-27 19:30:17 +00001026 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, offset, count, buffer));
1027 for (int i = 0; i < count; i++)
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001028 EXPECT_EQ(hard_expected[i], buffer[i]);
1029 }
1030
1031 FPDFText_ClosePage(textpage);
1032 UnloadPage(page);
1033}
1034
Lei Zhangab41f252018-12-23 03:10:50 +00001035TEST_F(FPDFTextEmbedderTest, bug_782596) {
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001036 // If there is a regression in this test, it will only fail under ASAN
Lei Zhang30e04982018-03-16 22:48:33 +00001037 ASSERT_TRUE(OpenDocument("bug_782596.pdf"));
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001038 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +00001039 ASSERT_TRUE(page);
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001040 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +00001041 ASSERT_TRUE(textpage);
Ryan Harrison9d0d7c82017-11-20 19:32:50 +00001042 FPDFText_ClosePage(textpage);
1043 UnloadPage(page);
1044}
Ryan Harrison8b357e72017-11-30 21:02:41 +00001045
Lei Zhangab41f252018-12-23 03:10:50 +00001046TEST_F(FPDFTextEmbedderTest, ControlCharacters) {
Lei Zhang30e04982018-03-16 22:48:33 +00001047 ASSERT_TRUE(OpenDocument("control_characters.pdf"));
Ryan Harrison8b357e72017-11-30 21:02:41 +00001048 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +00001049 ASSERT_TRUE(page);
Ryan Harrison8b357e72017-11-30 21:02:41 +00001050
1051 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +00001052 ASSERT_TRUE(textpage);
Ryan Harrison8b357e72017-11-30 21:02:41 +00001053
1054 // Should not include the control characters in the output
Lei Zhang502e8592018-03-16 22:04:33 +00001055 unsigned short buffer[128];
1056 memset(buffer, 0xbd, sizeof(buffer));
1057 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
Lei Zhanga0608aa2018-06-08 14:50:49 +00001058 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1059 EXPECT_TRUE(
1060 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
Ryan Harrison8b357e72017-11-30 21:02:41 +00001061
1062 // Attempting to get a chunk of text after the control characters
1063 static const char expected_substring[] = "Goodbye, world!";
1064 // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the
1065 // original stream
1066 static const int offset = 17;
Lei Zhang502e8592018-03-16 22:04:33 +00001067 memset(buffer, 0xbd, sizeof(buffer));
1068 num_chars = FPDFText_GetText(textpage, offset, 128, buffer);
Ryan Harrison8b357e72017-11-30 21:02:41 +00001069
1070 ASSERT_GE(num_chars, 0);
1071 EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars));
Lei Zhang502e8592018-03-16 22:04:33 +00001072 EXPECT_TRUE(check_unsigned_shorts(expected_substring, buffer,
Ryan Harrison8b357e72017-11-30 21:02:41 +00001073 sizeof(expected_substring)));
1074
1075 FPDFText_ClosePage(textpage);
1076 UnloadPage(page);
1077}
Ryan Harrison64c66432018-03-12 16:12:44 +00001078
1079// Testing that hyphen makers (0x0002) are replacing hard hyphens when
1080// the word contains non-ASCII characters.
Lei Zhangab41f252018-12-23 03:10:50 +00001081TEST_F(FPDFTextEmbedderTest, bug_1029) {
Lei Zhang30e04982018-03-16 22:48:33 +00001082 ASSERT_TRUE(OpenDocument("bug_1029.pdf"));
Ryan Harrison64c66432018-03-12 16:12:44 +00001083 FPDF_PAGE page = LoadPage(0);
Lei Zhang30e04982018-03-16 22:48:33 +00001084 ASSERT_TRUE(page);
Ryan Harrison64c66432018-03-12 16:12:44 +00001085
1086 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
Lei Zhang30e04982018-03-16 22:48:33 +00001087 ASSERT_TRUE(textpage);
Ryan Harrison64c66432018-03-12 16:12:44 +00001088
1089 constexpr int page_range_offset = 171;
1090 constexpr int page_range_length = 56;
1091
1092 // This text is:
1093 // 'METADATA table. When the split has committed, it noti' followed
1094 // by a 'soft hyphen' (0x0002) and then 'fi'.
1095 //
1096 // The original text has a fi ligature, but that is broken up into
1097 // two characters when the PDF is processed.
1098 constexpr unsigned int expected[] = {
1099 0x004d, 0x0045, 0x0054, 0x0041, 0x0044, 0x0041, 0x0054, 0x0041,
1100 0x0020, 0x0074, 0x0061, 0x0062, 0x006c, 0x0065, 0x002e, 0x0020,
1101 0x0057, 0x0068, 0x0065, 0x006e, 0x0020, 0x0074, 0x0068, 0x0065,
1102 0x0020, 0x0073, 0x0070, 0x006c, 0x0069, 0x0074, 0x0020, 0x0068,
1103 0x0061, 0x0073, 0x0020, 0x0063, 0x006f, 0x006d, 0x006d, 0x0069,
1104 0x0074, 0x0074, 0x0065, 0x0064, 0x002c, 0x0020, 0x0069, 0x0074,
1105 0x0020, 0x006e, 0x006f, 0x0074, 0x0069, 0x0002, 0x0066, 0x0069};
Lei Zhang4501a502020-05-18 16:52:59 +00001106 static_assert(page_range_length == pdfium::size(expected),
Ryan Harrison64c66432018-03-12 16:12:44 +00001107 "Expected should be the same size as the range being "
1108 "extracted from page.");
1109 EXPECT_LT(page_range_offset + page_range_length,
1110 FPDFText_CountChars(textpage));
1111
1112 for (int i = 0; i < page_range_length; ++i) {
1113 EXPECT_EQ(expected[i],
1114 FPDFText_GetUnicode(textpage, page_range_offset + i));
1115 }
1116
1117 FPDFText_ClosePage(textpage);
1118 UnloadPage(page);
1119}
Lei Zhang30e04982018-03-16 22:48:33 +00001120
Lei Zhangab41f252018-12-23 03:10:50 +00001121TEST_F(FPDFTextEmbedderTest, CountRects) {
Lei Zhang30e04982018-03-16 22:48:33 +00001122 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1123 FPDF_PAGE page = LoadPage(0);
1124 ASSERT_TRUE(page);
1125
1126 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1127 ASSERT_TRUE(textpage);
1128
1129 // Sanity check hello_world.pdf.
1130 // |num_chars| check includes the terminating NUL that is provided.
Lei Zhang30e04982018-03-16 22:48:33 +00001131 {
1132 unsigned short buffer[128];
1133 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
Lei Zhanga0608aa2018-06-08 14:50:49 +00001134 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1135 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1136 kHelloGoodbyeTextSize));
Lei Zhang30e04982018-03-16 22:48:33 +00001137 }
1138
1139 // Now test FPDFText_CountRects().
1140 static const int kHelloWorldEnd = strlen("Hello, world!");
1141 static const int kGoodbyeWorldStart = kHelloWorldEnd + 2; // "\r\n"
1142 for (int start = 0; start < kHelloWorldEnd; ++start) {
1143 // Always grab some part of "hello world" and some part of "goodbye world"
1144 // Since -1 means "all".
1145 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -1));
1146
1147 // No characters always means 0 rects.
1148 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1149
1150 // 1 character stays within "hello world"
1151 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1152
1153 // When |start| is 0, Having |kGoodbyeWorldStart| char count does not reach
1154 // "goodbye world".
1155 int expected_value = start ? 2 : 1;
1156 EXPECT_EQ(expected_value,
1157 FPDFText_CountRects(textpage, start, kGoodbyeWorldStart));
1158
1159 // Extremely large character count will always return 2 rects because
1160 // |start| starts inside "hello world".
1161 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, 500));
1162 }
1163
Lei Zhang30e04982018-03-16 22:48:33 +00001164 // Now test negative counts.
1165 for (int start = 0; start < kHelloWorldEnd; ++start) {
1166 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -100));
1167 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -2));
1168 }
Lei Zhang30e04982018-03-16 22:48:33 +00001169
1170 // Now test larger start values.
Lei Zhanga0608aa2018-06-08 14:50:49 +00001171 const int kExpectedLength = strlen(kHelloGoodbyeText);
Lei Zhang30e04982018-03-16 22:48:33 +00001172 for (int start = kGoodbyeWorldStart + 1; start < kExpectedLength; ++start) {
1173 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, -1));
1174 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1175 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1176 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 2));
1177 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 500));
1178 }
1179
Lei Zhang30e04982018-03-16 22:48:33 +00001180 // Now test start values that starts beyond the end of the text.
1181 for (int start = kExpectedLength; start < 100; ++start) {
1182 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, -1));
1183 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1184 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 1));
1185 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 2));
1186 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 500));
1187 }
Lei Zhang30e04982018-03-16 22:48:33 +00001188
1189 FPDFText_ClosePage(textpage);
1190 UnloadPage(page);
1191}
Lei Zhang49fa50d2018-06-08 15:31:10 +00001192
Lei Zhangab41f252018-12-23 03:10:50 +00001193TEST_F(FPDFTextEmbedderTest, GetText) {
Miklos Vajna3bee9c62018-08-07 21:45:34 +00001194 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1195 FPDF_PAGE page = LoadPage(0);
1196 ASSERT_TRUE(page);
1197
1198 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1199 ASSERT_TRUE(text_page);
1200
1201 EXPECT_EQ(2, FPDFPage_CountObjects(page));
1202 FPDF_PAGEOBJECT text_object = FPDFPage_GetObject(page, 0);
1203 ASSERT_TRUE(text_object);
1204
1205 // Positive testing.
1206 constexpr char kHelloText[] = "Hello, world!";
1207 // Return value includes the terminating NUL that is provided.
Lei Zhang4501a502020-05-18 16:52:59 +00001208 constexpr unsigned long kHelloUTF16Size = pdfium::size(kHelloText) * 2;
Miklos Vajna3bee9c62018-08-07 21:45:34 +00001209 constexpr wchar_t kHelloWideText[] = L"Hello, world!";
1210 unsigned long size = FPDFTextObj_GetText(text_object, text_page, nullptr, 0);
1211 ASSERT_EQ(kHelloUTF16Size, size);
1212
1213 std::vector<unsigned short> buffer(size);
1214 ASSERT_EQ(size,
1215 FPDFTextObj_GetText(text_object, text_page, buffer.data(), size));
1216 ASSERT_EQ(kHelloWideText, GetPlatformWString(buffer.data()));
1217
1218 // Negative testing.
1219 ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, text_page, nullptr, 0));
1220 ASSERT_EQ(0U, FPDFTextObj_GetText(text_object, nullptr, nullptr, 0));
1221 ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, nullptr, nullptr, 0));
1222
1223 // Buffer is too small, ensure it's not modified.
1224 buffer.resize(2);
1225 buffer[0] = 'x';
1226 buffer[1] = '\0';
1227 size =
1228 FPDFTextObj_GetText(text_object, text_page, buffer.data(), buffer.size());
1229 ASSERT_EQ(kHelloUTF16Size, size);
1230 ASSERT_EQ('x', buffer[0]);
1231 ASSERT_EQ('\0', buffer[1]);
1232
1233 FPDFText_ClosePage(text_page);
1234 UnloadPage(page);
1235}
1236
Lei Zhangab41f252018-12-23 03:10:50 +00001237TEST_F(FPDFTextEmbedderTest, CroppedText) {
Lei Zhang49fa50d2018-06-08 15:31:10 +00001238 static constexpr int kPageCount = 4;
1239 static constexpr FS_RECTF kBoxes[kPageCount] = {
1240 {50.0f, 150.0f, 150.0f, 50.0f},
1241 {50.0f, 150.0f, 150.0f, 50.0f},
1242 {60.0f, 150.0f, 150.0f, 60.0f},
1243 {60.0f, 150.0f, 150.0f, 60.0f},
1244 };
1245 static constexpr const char* kExpectedText[kPageCount] = {
Manoj Biswas2016aa02019-07-16 20:19:45 +00001246 " world!\r\ndbye, world!",
1247 " world!\r\ndbye, world!",
1248 "bye, world!",
Lei Zhang49fa50d2018-06-08 15:31:10 +00001249 "bye, world!",
1250 };
1251
1252 ASSERT_TRUE(OpenDocument("cropped_text.pdf"));
1253 ASSERT_EQ(kPageCount, FPDF_GetPageCount(document()));
1254
1255 for (int i = 0; i < kPageCount; ++i) {
1256 FPDF_PAGE page = LoadPage(i);
1257 ASSERT_TRUE(page);
1258
1259 FS_RECTF box;
1260 EXPECT_TRUE(FPDF_GetPageBoundingBox(page, &box));
1261 EXPECT_EQ(kBoxes[i].left, box.left);
1262 EXPECT_EQ(kBoxes[i].top, box.top);
1263 EXPECT_EQ(kBoxes[i].right, box.right);
1264 EXPECT_EQ(kBoxes[i].bottom, box.bottom);
1265
1266 {
1267 ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
1268 ASSERT_TRUE(textpage);
1269
1270 unsigned short buffer[128];
1271 memset(buffer, 0xbd, sizeof(buffer));
1272 int num_chars = FPDFText_GetText(textpage.get(), 0, 128, buffer);
1273 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1274 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1275 kHelloGoodbyeTextSize));
1276
1277 int expected_char_count = strlen(kExpectedText[i]);
1278 ASSERT_EQ(expected_char_count,
1279 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1280 box.right, box.bottom, nullptr, 0));
1281
1282 memset(buffer, 0xbd, sizeof(buffer));
1283 ASSERT_EQ(expected_char_count + 1,
1284 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1285 box.right, box.bottom, buffer, 128));
1286 EXPECT_TRUE(
1287 check_unsigned_shorts(kExpectedText[i], buffer, expected_char_count));
1288 }
1289
1290 UnloadPage(page);
1291 }
1292}
Ryan Harrison4d92af52018-08-28 20:22:32 +00001293
Lei Zhangab41f252018-12-23 03:10:50 +00001294TEST_F(FPDFTextEmbedderTest, Bug_1139) {
Ryan Harrison4d92af52018-08-28 20:22:32 +00001295 ASSERT_TRUE(OpenDocument("bug_1139.pdf"));
1296 FPDF_PAGE page = LoadPage(0);
1297 ASSERT_TRUE(page);
1298
1299 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1300 ASSERT_TRUE(text_page);
1301
1302 // -1 for CountChars not including the \0, but +1 for the extra control
1303 // character.
1304 EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1305
1306 // There is an extra control character at the beginning of the string, but it
1307 // should not appear in the output nor prevent extracting the text.
1308 unsigned short buffer[128];
1309 int num_chars = FPDFText_GetText(text_page, 0, 128, buffer);
1310 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1311 EXPECT_TRUE(
1312 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
1313 FPDFText_ClosePage(text_page);
1314 UnloadPage(page);
1315}
Benjamin Beaudryd31c6672019-07-29 22:50:41 +00001316
Hui Yingst52e91462020-01-14 16:27:28 +00001317TEST_F(FPDFTextEmbedderTest, Bug_642) {
1318 ASSERT_TRUE(OpenDocument("bug_642.pdf"));
1319 FPDF_PAGE page = LoadPage(0);
1320 ASSERT_TRUE(page);
1321 {
1322 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1323 ASSERT_TRUE(text_page);
1324
1325 constexpr char kText[] = "ABCD";
Lei Zhang4501a502020-05-18 16:52:59 +00001326 constexpr size_t kTextSize = pdfium::size(kText);
Hui Yingst52e91462020-01-14 16:27:28 +00001327 // -1 for CountChars not including the \0
1328 EXPECT_EQ(static_cast<int>(kTextSize) - 1,
1329 FPDFText_CountChars(text_page.get()));
1330
1331 unsigned short buffer[kTextSize];
1332 int num_chars =
Lei Zhang4501a502020-05-18 16:52:59 +00001333 FPDFText_GetText(text_page.get(), 0, pdfium::size(buffer) - 1, buffer);
Hui Yingst52e91462020-01-14 16:27:28 +00001334 ASSERT_EQ(static_cast<int>(kTextSize), num_chars);
1335 EXPECT_TRUE(check_unsigned_shorts(kText, buffer, kTextSize));
1336 }
1337
1338 UnloadPage(page);
1339}
1340
Benjamin Beaudryd31c6672019-07-29 22:50:41 +00001341TEST_F(FPDFTextEmbedderTest, GetCharAngle) {
1342 ASSERT_TRUE(OpenDocument("rotated_text.pdf"));
1343 FPDF_PAGE page = LoadPage(0);
1344 ASSERT_TRUE(page);
1345
1346 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1347 ASSERT_TRUE(text_page);
1348
Lei Zhang4501a502020-05-18 16:52:59 +00001349 static constexpr int kSubstringsSize[] = {pdfium::size("Hello,"),
1350 pdfium::size(" world!\r\n"),
1351 pdfium::size("Goodbye,")};
Benjamin Beaudryd31c6672019-07-29 22:50:41 +00001352
1353 // -1 for CountChars not including the \0, but +1 for the extra control
1354 // character.
1355 EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1356
Lei Zhang8da98232019-12-11 23:29:33 +00001357 EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(nullptr, 0));
1358 EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(text_page, -1));
1359 EXPECT_FLOAT_EQ(-1.0f,
1360 FPDFText_GetCharAngle(text_page, kHelloGoodbyeTextSize + 1));
Benjamin Beaudryd31c6672019-07-29 22:50:41 +00001361
1362 // Test GetCharAngle for every quadrant
1363 EXPECT_NEAR(FX_PI / 4.0, FPDFText_GetCharAngle(text_page, 0), 0.001);
1364 EXPECT_NEAR(3 * FX_PI / 4.0,
1365 FPDFText_GetCharAngle(text_page, kSubstringsSize[0]), 0.001);
1366 EXPECT_NEAR(
1367 5 * FX_PI / 4.0,
1368 FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1]),
1369 0.001);
1370 EXPECT_NEAR(
1371 7 * FX_PI / 4.0,
1372 FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1] +
1373 kSubstringsSize[2]),
1374 0.001);
1375
1376 FPDFText_ClosePage(text_page);
1377 UnloadPage(page);
1378}
Benjamin Beaudry633c8c32019-09-09 16:27:28 +00001379
1380TEST_F(FPDFTextEmbedderTest, GetFontWeight) {
1381 ASSERT_TRUE(OpenDocument("font_weight.pdf"));
1382 FPDF_PAGE page = LoadPage(0);
1383 ASSERT_TRUE(page);
1384
1385 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1386 ASSERT_TRUE(text_page);
1387
Lei Zhang2a444e12019-11-12 19:31:18 +00001388 EXPECT_EQ(2, FPDFText_CountChars(text_page));
Benjamin Beaudry633c8c32019-09-09 16:27:28 +00001389
1390 EXPECT_EQ(-1, FPDFText_GetFontWeight(nullptr, 0));
1391 EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, -1));
1392 EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, 314));
1393
1394 // The font used for this text only specifies /StemV (80); the weight value
1395 // that is returned should be calculated from that (80*5 == 400).
Lei Zhang2a444e12019-11-12 19:31:18 +00001396 EXPECT_EQ(400, FPDFText_GetFontWeight(text_page, 0));
1397
1398 // Using a /StemV value of 82, the estimate comes out to 410, even though
1399 // /FontWeight is 400.
1400 // TODO(crbug.com/pdfium/1420): Fix this the return value here.
1401 EXPECT_EQ(410, FPDFText_GetFontWeight(text_page, 1));
Benjamin Beaudry633c8c32019-09-09 16:27:28 +00001402
1403 FPDFText_ClosePage(text_page);
1404 UnloadPage(page);
1405}
Benjamin Beaudryc3952612019-09-12 18:14:35 +00001406
1407TEST_F(FPDFTextEmbedderTest, GetTextRenderMode) {
Daniel Hosseinian5af51b62020-07-18 00:53:43 +00001408 ASSERT_TRUE(OpenDocument("text_render_mode.pdf"));
Benjamin Beaudryc3952612019-09-12 18:14:35 +00001409 FPDF_PAGE page = LoadPage(0);
1410 ASSERT_TRUE(page);
1411
1412 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1413 ASSERT_TRUE(text_page);
1414
1415 ASSERT_EQ(12, FPDFText_CountChars(text_page));
1416
Daniel Hosseinian8cb6a652019-12-18 00:50:41 +00001417 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1418 FPDFText_GetTextRenderMode(nullptr, 0));
1419 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1420 FPDFText_GetTextRenderMode(text_page, -1));
1421 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1422 FPDFText_GetTextRenderMode(text_page, 314));
Benjamin Beaudryc3952612019-09-12 18:14:35 +00001423
1424 ASSERT_EQ(FPDF_TEXTRENDERMODE_FILL, FPDFText_GetTextRenderMode(text_page, 0));
1425
1426 ASSERT_EQ(FPDF_TEXTRENDERMODE_STROKE,
1427 FPDFText_GetTextRenderMode(text_page, 7));
1428
1429 FPDFText_ClosePage(text_page);
1430 UnloadPage(page);
1431}
Benjamin Beaudryf5ad5c12019-09-13 17:37:58 +00001432
1433TEST_F(FPDFTextEmbedderTest, GetFillColor) {
1434 ASSERT_TRUE(OpenDocument("text_color.pdf"));
1435 FPDF_PAGE page = LoadPage(0);
1436 ASSERT_TRUE(page);
1437
1438 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1439 ASSERT_TRUE(text_page);
1440
1441 ASSERT_EQ(1, FPDFText_CountChars(text_page));
1442
1443 ASSERT_FALSE(
1444 FPDFText_GetFillColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1445 ASSERT_FALSE(
1446 FPDFText_GetFillColor(text_page, -1, nullptr, nullptr, nullptr, nullptr));
1447 ASSERT_FALSE(FPDFText_GetFillColor(text_page, 314, nullptr, nullptr, nullptr,
1448 nullptr));
1449 ASSERT_FALSE(
1450 FPDFText_GetFillColor(text_page, 0, nullptr, nullptr, nullptr, nullptr));
1451
1452 unsigned int r;
1453 unsigned int g;
1454 unsigned int b;
1455 unsigned int a;
1456 ASSERT_TRUE(FPDFText_GetFillColor(text_page, 0, &r, &g, &b, &a));
1457 ASSERT_EQ(0xffu, r);
1458 ASSERT_EQ(0u, g);
1459 ASSERT_EQ(0u, b);
1460 ASSERT_EQ(0xffu, a);
1461
1462 FPDFText_ClosePage(text_page);
1463 UnloadPage(page);
1464}
1465
1466TEST_F(FPDFTextEmbedderTest, GetStrokeColor) {
1467 ASSERT_TRUE(OpenDocument("text_color.pdf"));
1468 FPDF_PAGE page = LoadPage(0);
1469 ASSERT_TRUE(page);
1470
1471 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1472 ASSERT_TRUE(text_page);
1473
1474 ASSERT_EQ(1, FPDFText_CountChars(text_page));
1475
1476 ASSERT_FALSE(
1477 FPDFText_GetStrokeColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1478 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, -1, nullptr, nullptr, nullptr,
1479 nullptr));
1480 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 314, nullptr, nullptr,
1481 nullptr, nullptr));
1482 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 0, nullptr, nullptr, nullptr,
1483 nullptr));
1484
1485 unsigned int r;
1486 unsigned int g;
1487 unsigned int b;
1488 unsigned int a;
1489 ASSERT_TRUE(FPDFText_GetStrokeColor(text_page, 0, &r, &g, &b, &a));
1490 ASSERT_EQ(0u, r);
1491 ASSERT_EQ(0xffu, g);
1492 ASSERT_EQ(0u, b);
1493 ASSERT_EQ(0xffu, a);
1494
1495 FPDFText_ClosePage(text_page);
1496 UnloadPage(page);
1497}
Lei Zhang1e386c52020-01-23 22:56:38 +00001498
1499TEST_F(FPDFTextEmbedderTest, GetMatrix) {
1500 constexpr char kExpectedText[] = "A1\r\nA2\r\nA3";
Lei Zhang4501a502020-05-18 16:52:59 +00001501 constexpr size_t kExpectedTextSize = pdfium::size(kExpectedText);
Lei Zhang1e386c52020-01-23 22:56:38 +00001502 constexpr FS_MATRIX kExpectedMatrices[] = {
1503 {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1504 {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1505 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1506 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1507 {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1508 {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1509 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1510 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1511 {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1512 {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1513 };
Lei Zhang4501a502020-05-18 16:52:59 +00001514 constexpr size_t kExpectedCount = pdfium::size(kExpectedMatrices);
Lei Zhang1e386c52020-01-23 22:56:38 +00001515 static_assert(kExpectedCount + 1 == kExpectedTextSize,
1516 "Bad expected matrix size");
1517
Lei Zhang1e386c52020-01-23 22:56:38 +00001518 ASSERT_TRUE(OpenDocument("font_matrix.pdf"));
1519 FPDF_PAGE page = LoadPage(0);
1520 ASSERT_TRUE(page);
1521
1522 {
1523 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1524 ASSERT_TRUE(text_page);
1525 ASSERT_EQ(static_cast<int>(kExpectedCount),
1526 FPDFText_CountChars(text_page.get()));
1527
1528 {
1529 // Check the characters.
1530 unsigned short buffer[kExpectedTextSize];
1531 ASSERT_EQ(static_cast<int>(kExpectedTextSize),
1532 FPDFText_GetText(text_page.get(), 0, kExpectedCount, buffer));
1533 EXPECT_TRUE(
1534 check_unsigned_shorts(kExpectedText, buffer, kExpectedTextSize));
1535 }
1536
Lei Zhang1e386c52020-01-23 22:56:38 +00001537 // Check the character matrix.
1538 FS_MATRIX matrix;
1539 for (size_t i = 0; i < kExpectedCount; ++i) {
1540 ASSERT_TRUE(FPDFText_GetMatrix(text_page.get(), i, &matrix)) << i;
1541 EXPECT_FLOAT_EQ(kExpectedMatrices[i].a, matrix.a) << i;
1542 EXPECT_FLOAT_EQ(kExpectedMatrices[i].b, matrix.b) << i;
1543 EXPECT_FLOAT_EQ(kExpectedMatrices[i].c, matrix.c) << i;
1544 EXPECT_FLOAT_EQ(kExpectedMatrices[i].d, matrix.d) << i;
1545 EXPECT_FLOAT_EQ(kExpectedMatrices[i].e, matrix.e) << i;
1546 EXPECT_FLOAT_EQ(kExpectedMatrices[i].f, matrix.f) << i;
1547 }
1548
1549 // Check bad parameters.
1550 EXPECT_FALSE(FPDFText_GetMatrix(nullptr, 0, &matrix));
1551 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 10, &matrix));
1552 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), -1, &matrix));
1553 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 0, nullptr));
1554 }
1555
1556 UnloadPage(page);
1557}
Lei Zhang48c62882020-02-11 23:43:13 +00001558
1559TEST_F(FPDFTextEmbedderTest, CharBox) {
1560 // For a size 12 letter 'A'.
1561 constexpr double kExpectedCharWidth = 8.436;
1562 constexpr double kExpectedCharHeight = 6.77;
1563 constexpr float kExpectedLooseCharWidth = 8.664f;
1564 constexpr float kExpectedLooseCharHeight = 12.0f;
1565
1566 ASSERT_TRUE(OpenDocument("font_matrix.pdf"));
1567 FPDF_PAGE page = LoadPage(0);
1568 ASSERT_TRUE(page);
1569
1570 {
1571 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1572 ASSERT_TRUE(text_page);
1573
1574 // Check the character box size.
1575 double left;
1576 double right;
1577 double bottom;
1578 double top;
1579 ASSERT_TRUE(
1580 FPDFText_GetCharBox(text_page.get(), 0, &left, &right, &bottom, &top));
1581 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1582 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1583 ASSERT_TRUE(
1584 FPDFText_GetCharBox(text_page.get(), 4, &left, &right, &bottom, &top));
1585 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1586 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1587 ASSERT_TRUE(
1588 FPDFText_GetCharBox(text_page.get(), 8, &left, &right, &bottom, &top));
1589 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1590 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1591
1592 // Check the loose character box size.
Lei Zhang48c62882020-02-11 23:43:13 +00001593 FS_RECTF rect;
1594 ASSERT_TRUE(FPDFText_GetLooseCharBox(text_page.get(), 0, &rect));
Lei Zhang12950132020-07-24 20:55:50 +00001595 EXPECT_FLOAT_EQ(kExpectedLooseCharWidth, rect.right - rect.left);
1596 EXPECT_FLOAT_EQ(kExpectedLooseCharHeight, rect.top - rect.bottom);
Lei Zhang48c62882020-02-11 23:43:13 +00001597 ASSERT_TRUE(FPDFText_GetLooseCharBox(text_page.get(), 4, &rect));
Lei Zhang12950132020-07-24 20:55:50 +00001598 EXPECT_FLOAT_EQ(kExpectedLooseCharWidth, rect.right - rect.left);
1599 EXPECT_FLOAT_EQ(kExpectedLooseCharHeight, rect.top - rect.bottom);
Lei Zhang48c62882020-02-11 23:43:13 +00001600 ASSERT_TRUE(FPDFText_GetLooseCharBox(text_page.get(), 8, &rect));
1601 EXPECT_FLOAT_EQ(kExpectedLooseCharWidth, rect.right - rect.left);
1602 EXPECT_FLOAT_EQ(kExpectedLooseCharHeight, rect.top - rect.bottom);
1603 }
1604
1605 UnloadPage(page);
1606}
Lei Zhang2b2e1582020-09-18 22:51:44 +00001607
1608TEST_F(FPDFTextEmbedderTest, SmallType3Glyph) {
1609 ASSERT_TRUE(OpenDocument("bug_1591.pdf"));
1610 FPDF_PAGE page = LoadPage(0);
1611 ASSERT_TRUE(page);
1612
1613 {
1614 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1615 ASSERT_TRUE(text_page);
1616 ASSERT_EQ(5, FPDFText_CountChars(text_page.get()));
1617
1618 EXPECT_EQ(49u, FPDFText_GetUnicode(text_page.get(), 0));
1619 EXPECT_EQ(32u, FPDFText_GetUnicode(text_page.get(), 1));
1620 EXPECT_EQ(50u, FPDFText_GetUnicode(text_page.get(), 2));
1621 EXPECT_EQ(32u, FPDFText_GetUnicode(text_page.get(), 3));
1622 EXPECT_EQ(49u, FPDFText_GetUnicode(text_page.get(), 4));
1623
1624 // Check the character box size.
1625 double left;
1626 double right;
1627 double bottom;
1628 double top;
1629 ASSERT_TRUE(
1630 FPDFText_GetCharBox(text_page.get(), 0, &left, &right, &bottom, &top));
1631 EXPECT_DOUBLE_EQ(63.439998626708984, left);
1632 EXPECT_DOUBLE_EQ(65.360000610351562, right);
1633 EXPECT_DOUBLE_EQ(50.0, bottom);
1634 EXPECT_DOUBLE_EQ(61.520000457763672, top);
1635 ASSERT_TRUE(
1636 FPDFText_GetCharBox(text_page.get(), 1, &left, &right, &bottom, &top));
1637 EXPECT_DOUBLE_EQ(62.007999420166016, left);
1638 EXPECT_DOUBLE_EQ(62.007999420166016, right);
1639 EXPECT_DOUBLE_EQ(50.0, bottom);
1640 EXPECT_DOUBLE_EQ(50.0, top);
1641 ASSERT_TRUE(
1642 FPDFText_GetCharBox(text_page.get(), 2, &left, &right, &bottom, &top));
1643 EXPECT_DOUBLE_EQ(86.0, left);
1644 EXPECT_DOUBLE_EQ(88.400001525878906, right);
1645 EXPECT_DOUBLE_EQ(50.0, bottom);
Lei Zhang1d337872020-09-18 22:59:04 +00001646 EXPECT_DOUBLE_EQ(50.240001678466797, top);
Lei Zhang2b2e1582020-09-18 22:51:44 +00001647 ASSERT_TRUE(
1648 FPDFText_GetCharBox(text_page.get(), 3, &left, &right, &bottom, &top));
1649 EXPECT_DOUBLE_EQ(86.010002136230469, left);
1650 EXPECT_DOUBLE_EQ(86.010002136230469, right);
1651 EXPECT_DOUBLE_EQ(50.0, bottom);
1652 EXPECT_DOUBLE_EQ(50.0, top);
1653 ASSERT_TRUE(
1654 FPDFText_GetCharBox(text_page.get(), 4, &left, &right, &bottom, &top));
1655 EXPECT_DOUBLE_EQ(99.44000244140625, left);
1656 EXPECT_DOUBLE_EQ(101.36000061035156, right);
1657 EXPECT_DOUBLE_EQ(50.0, bottom);
1658 EXPECT_DOUBLE_EQ(61.520000457763672, top);
1659 }
1660
1661 UnloadPage(page);
1662}