blob: f87447da329ca5135a8954602670aa0774851faf [file] [log] [blame]
K. Moon832a6942022-10-31 20:11:31 +00001// Copyright 2014 The PDFium Authors
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Bo Xu35228762014-07-08 15:30:46 -07004
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07005// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Ryan Harrison275e2602017-09-18 14:23:18 -04007#include "core/fxcrt/widestring.h"
Tom Sepezfcdb2df2017-03-31 10:32:07 -07008
Dan Sinclair3ebd1212016-03-09 09:59:23 -05009#include <stddef.h>
Lei Zhangbd353602021-06-30 04:24:07 +000010#include <string.h>
Lei Zhang375a8642016-01-11 11:59:17 -080011
12#include <algorithm>
Tom Sepez15c1e6a2021-11-16 21:02:27 +000013#include <sstream>
Tom Sepez0b36bb42015-05-01 16:12:48 -070014
Dan Sinclairc0aefd42017-05-04 10:17:51 -040015#include "core/fxcrt/fx_codepage.h"
Dan Sinclaircfb19442017-04-20 13:13:04 -040016#include "core/fxcrt/fx_extension.h"
Tom Sepez746c2872017-04-07 16:35:13 -070017#include "core/fxcrt/fx_safe_types.h"
Lei Zhangbd353602021-06-30 04:24:07 +000018#include "core/fxcrt/fx_system.h"
Dan Sinclair8e9e3d82017-09-21 16:49:32 -040019#include "core/fxcrt/string_pool_template.h"
Tom Sepez49f09322021-01-26 22:13:55 +000020#include "third_party/base/check.h"
Lei Zhang45829202021-04-16 16:42:11 +000021#include "third_party/base/check_op.h"
Lei Zhangefd44232021-07-30 17:04:57 +000022#include "third_party/base/cxx17_backports.h"
Lei Zhang8241df72015-11-06 14:38:48 -080023#include "third_party/base/numerics/safe_math.h"
Chris Palmer30f2ff12014-07-08 13:27:00 -070024
Dan Sinclair20a17552017-09-21 16:35:56 -040025template class fxcrt::StringDataTemplate<wchar_t>;
Ryan Harrison275e2602017-09-18 14:23:18 -040026template class fxcrt::StringViewTemplate<wchar_t>;
Dan Sinclair8e9e3d82017-09-21 16:49:32 -040027template class fxcrt::StringPoolTemplate<WideString>;
Ryan Harrison275e2602017-09-18 14:23:18 -040028template struct std::hash<WideString>;
tsepez46bf0332016-04-06 16:48:26 -070029
Tom Sepezbf9104d2017-03-30 13:51:31 -070030#define FORCE_ANSI 0x10000
31#define FORCE_UNICODE 0x20000
32#define FORCE_INT64 0x40000
33
Lei Zhang11d33362016-02-19 14:26:46 -080034namespace {
35
Lei Zhang7981d152018-01-11 14:26:01 +000036constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
Lei Zhang135c6602017-11-21 18:07:00 +000037
Dan Sinclair812e96c2017-03-13 16:43:37 -040038const wchar_t* FX_wcsstr(const wchar_t* haystack,
Tom Sepeza5d8fa12021-11-05 19:29:08 +000039 size_t haystack_len,
Dan Sinclair812e96c2017-03-13 16:43:37 -040040 const wchar_t* needle,
Tom Sepeza5d8fa12021-11-05 19:29:08 +000041 size_t needle_len) {
42 if (needle_len > haystack_len || needle_len == 0)
tsepez46bf0332016-04-06 16:48:26 -070043 return nullptr;
Tom Sepeza5d8fa12021-11-05 19:29:08 +000044
Dan Sinclair812e96c2017-03-13 16:43:37 -040045 const wchar_t* end_ptr = haystack + haystack_len - needle_len;
tsepez46bf0332016-04-06 16:48:26 -070046 while (haystack <= end_ptr) {
Tom Sepeza5d8fa12021-11-05 19:29:08 +000047 size_t i = 0;
Anton Bikineev7ac13342022-01-24 21:25:15 +000048 while (true) {
Tom Sepeza5d8fa12021-11-05 19:29:08 +000049 if (haystack[i] != needle[i])
tsepez46bf0332016-04-06 16:48:26 -070050 break;
Tom Sepeza5d8fa12021-11-05 19:29:08 +000051
tsepez46bf0332016-04-06 16:48:26 -070052 i++;
Tom Sepeza5d8fa12021-11-05 19:29:08 +000053 if (i == needle_len)
tsepez46bf0332016-04-06 16:48:26 -070054 return haystack;
tsepez46bf0332016-04-06 16:48:26 -070055 }
56 haystack++;
57 }
58 return nullptr;
59}
60
Lei Zhang2c495302021-10-07 23:13:30 +000061absl::optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
62 va_list argList) {
Ryan Harrison875e98c2017-09-27 10:53:11 -040063 size_t nMaxLen = 0;
Tom Sepezbf9104d2017-03-30 13:51:31 -070064 for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
65 if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
Tom Sepez3efc0672017-03-30 15:28:23 -070066 ++nMaxLen;
Tom Sepezbf9104d2017-03-30 13:51:31 -070067 continue;
68 }
Tom Sepezad004592021-11-05 22:19:18 +000069 int iWidth = 0;
Tom Sepezbf9104d2017-03-30 13:51:31 -070070 for (; *pStr != 0; pStr++) {
71 if (*pStr == '#') {
72 nMaxLen += 2;
73 } else if (*pStr == '*') {
Tom Sepezad004592021-11-05 22:19:18 +000074 iWidth = va_arg(argList, int);
Tom Sepezbf9104d2017-03-30 13:51:31 -070075 } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76 break;
77 }
78 }
Tom Sepezad004592021-11-05 22:19:18 +000079 if (iWidth == 0) {
80 iWidth = FXSYS_wtoi(pStr);
Lei Zhang4609c5d2018-12-07 20:10:54 +000081 while (FXSYS_IsDecimalDigit(*pStr))
Tom Sepezbf9104d2017-03-30 13:51:31 -070082 ++pStr;
83 }
Tom Sepezad004592021-11-05 22:19:18 +000084 if (iWidth < 0 || iWidth > 128 * 1024)
Lei Zhanga3a739d2021-10-07 20:46:31 +000085 return absl::nullopt;
Tom Sepezad004592021-11-05 22:19:18 +000086 uint32_t nWidth = static_cast<uint32_t>(iWidth);
87 int iPrecision = 0;
Tom Sepezbf9104d2017-03-30 13:51:31 -070088 if (*pStr == '.') {
89 pStr++;
90 if (*pStr == '*') {
Tom Sepezad004592021-11-05 22:19:18 +000091 iPrecision = va_arg(argList, int);
Tom Sepezbf9104d2017-03-30 13:51:31 -070092 pStr++;
93 } else {
Tom Sepezad004592021-11-05 22:19:18 +000094 iPrecision = FXSYS_wtoi(pStr);
Lei Zhang4609c5d2018-12-07 20:10:54 +000095 while (FXSYS_IsDecimalDigit(*pStr))
Tom Sepezbf9104d2017-03-30 13:51:31 -070096 ++pStr;
97 }
98 }
Tom Sepezad004592021-11-05 22:19:18 +000099 if (iPrecision < 0 || iPrecision > 128 * 1024)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000100 return absl::nullopt;
Tom Sepezad004592021-11-05 22:19:18 +0000101 uint32_t nPrecision = static_cast<uint32_t>(iPrecision);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700102 int nModifier = 0;
103 if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
104 pStr += 3;
105 nModifier = FORCE_INT64;
106 } else {
107 switch (*pStr) {
108 case 'h':
109 nModifier = FORCE_ANSI;
110 pStr++;
111 break;
112 case 'l':
113 nModifier = FORCE_UNICODE;
114 pStr++;
115 break;
116 case 'F':
117 case 'N':
118 case 'L':
119 pStr++;
120 break;
121 }
122 }
Tom Sepezad004592021-11-05 22:19:18 +0000123 size_t nItemLen = 0;
Tom Sepezbf9104d2017-03-30 13:51:31 -0700124 switch (*pStr | nModifier) {
125 case 'c':
126 case 'C':
127 nItemLen = 2;
128 va_arg(argList, int);
129 break;
130 case 'c' | FORCE_ANSI:
131 case 'C' | FORCE_ANSI:
132 nItemLen = 2;
133 va_arg(argList, int);
134 break;
135 case 'c' | FORCE_UNICODE:
136 case 'C' | FORCE_UNICODE:
137 nItemLen = 2;
138 va_arg(argList, int);
139 break;
140 case 's': {
141 const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
142 if (pstrNextArg) {
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400143 nItemLen = wcslen(pstrNextArg);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700144 if (nItemLen < 1) {
145 nItemLen = 1;
146 }
147 } else {
148 nItemLen = 6;
149 }
150 } break;
151 case 'S': {
152 const char* pstrNextArg = va_arg(argList, const char*);
153 if (pstrNextArg) {
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400154 nItemLen = strlen(pstrNextArg);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700155 if (nItemLen < 1) {
156 nItemLen = 1;
157 }
158 } else {
159 nItemLen = 6;
160 }
161 } break;
162 case 's' | FORCE_ANSI:
163 case 'S' | FORCE_ANSI: {
164 const char* pstrNextArg = va_arg(argList, const char*);
165 if (pstrNextArg) {
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400166 nItemLen = strlen(pstrNextArg);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700167 if (nItemLen < 1) {
168 nItemLen = 1;
169 }
170 } else {
171 nItemLen = 6;
172 }
173 } break;
174 case 's' | FORCE_UNICODE:
175 case 'S' | FORCE_UNICODE: {
176 const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
177 if (pstrNextArg) {
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400178 nItemLen = wcslen(pstrNextArg);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700179 if (nItemLen < 1) {
180 nItemLen = 1;
181 }
182 } else {
183 nItemLen = 6;
184 }
185 } break;
186 }
187 if (nItemLen != 0) {
188 if (nPrecision != 0 && nItemLen > nPrecision) {
189 nItemLen = nPrecision;
190 }
191 if (nItemLen < nWidth) {
192 nItemLen = nWidth;
193 }
194 } else {
195 switch (*pStr) {
196 case 'd':
197 case 'i':
198 case 'u':
199 case 'x':
200 case 'X':
201 case 'o':
202 if (nModifier & FORCE_INT64) {
203 va_arg(argList, int64_t);
204 } else {
205 va_arg(argList, int);
206 }
207 nItemLen = 32;
208 if (nItemLen < nWidth + nPrecision) {
209 nItemLen = nWidth + nPrecision;
210 }
211 break;
212 case 'a':
213 case 'A':
214 case 'e':
215 case 'E':
216 case 'g':
217 case 'G':
218 va_arg(argList, double);
219 nItemLen = 128;
220 if (nItemLen < nWidth + nPrecision) {
221 nItemLen = nWidth + nPrecision;
222 }
223 break;
224 case 'f':
225 if (nWidth + nPrecision > 100) {
226 nItemLen = nPrecision + nWidth + 128;
227 } else {
228 double f;
229 char pszTemp[256];
230 f = va_arg(argList, double);
231 FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
232 nPrecision + 6, f);
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400233 nItemLen = strlen(pszTemp);
Tom Sepezbf9104d2017-03-30 13:51:31 -0700234 }
235 break;
236 case 'p':
237 va_arg(argList, void*);
238 nItemLen = 32;
239 if (nItemLen < nWidth + nPrecision) {
240 nItemLen = nWidth + nPrecision;
241 }
242 break;
243 case 'n':
244 va_arg(argList, int*);
245 break;
246 }
247 }
248 nMaxLen += nItemLen;
249 }
Tom Sepez3efc0672017-03-30 15:28:23 -0700250 nMaxLen += 32; // Fudge factor.
Daniel Hosseinian338637d2019-10-19 00:37:45 +0000251 return nMaxLen;
Tom Sepezbf9104d2017-03-30 13:51:31 -0700252}
253
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000254// Returns string unless we ran out of space.
Lei Zhang2c495302021-10-07 23:13:30 +0000255absl::optional<WideString> TryVSWPrintf(size_t size,
256 const wchar_t* pFormat,
257 va_list argList) {
Tom Sepezb7973bb2018-04-17 16:41:28 +0000258 if (!size)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000259 return absl::nullopt;
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000260
Tom Sepezb7973bb2018-04-17 16:41:28 +0000261 WideString str;
262 {
263 // Span's lifetime must end before ReleaseBuffer() below.
264 pdfium::span<wchar_t> buffer = str.GetBuffer(size);
265
266 // In the following two calls, there's always space in the WideString
267 // for a terminating NUL that's not included in the span.
268 // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
269 // -1 return code even though the buffer is written. Probably just as well
270 // not to trust the vendor's implementation to write anything anyways.
271 // See https://crbug.com/705912.
272 memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
273 int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
274
275 bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
276 if (!bSufficientBuffer)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000277 return absl::nullopt;
Tom Sepezb7973bb2018-04-17 16:41:28 +0000278 }
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000279 str.ReleaseBuffer(str.GetStringLength());
Tom Sepezb1bd2ea2021-06-05 00:22:21 +0000280 return str;
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000281}
282
Lei Zhang11d33362016-02-19 14:26:46 -0800283} // namespace
284
Ryan Harrison275e2602017-09-18 14:23:18 -0400285namespace fxcrt {
286
287static_assert(sizeof(WideString) <= sizeof(wchar_t*),
tsepez33431942016-04-21 11:17:22 -0700288 "Strings must not require more space than pointers");
289
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000290// static
Tom Sepezcb743742022-04-22 21:04:00 +0000291WideString WideString::FormatInteger(int i) {
292 wchar_t wbuf[32];
293 swprintf(wbuf, std::size(wbuf), L"%d", i);
294 return WideString(wbuf);
295}
296
297// static
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000298WideString WideString::FormatV(const wchar_t* format, va_list argList) {
299 va_list argListCopy;
300 va_copy(argListCopy, argList);
Lei Zhang2bd5d572023-04-18 20:33:37 +0000301 auto guess = GuessSizeForVSWPrintf(format, argListCopy);
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000302 va_end(argListCopy);
303
Lei Zhang2bd5d572023-04-18 20:33:37 +0000304 if (!guess.has_value()) {
305 return WideString();
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000306 }
Lei Zhang2bd5d572023-04-18 20:33:37 +0000307 int maxLen = pdfium::base::checked_cast<int>(guess.value());
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000308
309 while (maxLen < 32 * 1024) {
310 va_copy(argListCopy, argList);
Lei Zhang2c495302021-10-07 23:13:30 +0000311 absl::optional<WideString> ret =
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000312 TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
313 va_end(argListCopy);
Tom Sepeze7081292021-06-04 23:44:21 +0000314 if (ret.has_value())
Tom Sepez685fad32021-06-05 01:04:27 +0000315 return ret.value();
316
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000317 maxLen *= 2;
318 }
Tom Sepez890eac22018-12-03 20:35:51 +0000319 return WideString();
Dan Sinclair3f1c8322017-11-16 21:45:18 +0000320}
321
322// static
323WideString WideString::Format(const wchar_t* pFormat, ...) {
324 va_list argList;
325 va_start(argList, pFormat);
326 WideString ret = FormatV(pFormat, argList);
327 va_end(argList);
328 return ret;
329}
330
Lei Zhang4affe8b2020-10-13 20:01:23 +0000331WideString::WideString() = default;
weilif4bb5802016-06-14 17:21:14 -0700332
Ryan Harrison275e2602017-09-18 14:23:18 -0400333WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
weilif4bb5802016-06-14 17:21:14 -0700334
Ryan Harrison275e2602017-09-18 14:23:18 -0400335WideString::WideString(WideString&& other) noexcept {
weilif4bb5802016-06-14 17:21:14 -0700336 m_pData.Swap(other.m_pData);
337}
338
Ryan Harrison875e98c2017-09-27 10:53:11 -0400339WideString::WideString(const wchar_t* pStr, size_t nLen) {
tsepez46bf0332016-04-06 16:48:26 -0700340 if (nLen)
341 m_pData.Reset(StringData::Create(pStr, nLen));
342}
Tom Sepez7f840ae2015-04-30 15:46:34 -0700343
Ryan Harrison275e2602017-09-18 14:23:18 -0400344WideString::WideString(wchar_t ch) {
tsepez46bf0332016-04-06 16:48:26 -0700345 m_pData.Reset(StringData::Create(1));
346 m_pData->m_String[0] = ch;
347}
348
Ryan Harrison275e2602017-09-18 14:23:18 -0400349WideString::WideString(const wchar_t* ptr)
Ryan Harrison8b1408e2017-09-27 11:07:51 -0400350 : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
weilif4bb5802016-06-14 17:21:14 -0700351
Tom Sepez1ab27572018-12-14 20:31:31 +0000352WideString::WideString(WideStringView stringSrc) {
tsepez46bf0332016-04-06 16:48:26 -0700353 if (!stringSrc.IsEmpty()) {
Tom Sepez33b42e42017-07-19 13:19:12 -0700354 m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
355 stringSrc.GetLength()));
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700356 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700357}
tsepez46bf0332016-04-06 16:48:26 -0700358
Tom Sepez1ab27572018-12-14 20:31:31 +0000359WideString::WideString(WideStringView str1, WideStringView str2) {
Ryan Harrisonbacf75e2017-09-27 10:58:52 -0400360 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
Tom Sepez746c2872017-04-07 16:35:13 -0700361 nSafeLen += str2.GetLength();
362
Ryan Harrison875e98c2017-09-27 10:53:11 -0400363 size_t nNewLen = nSafeLen.ValueOrDie();
tsepez46bf0332016-04-06 16:48:26 -0700364 if (nNewLen == 0)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700365 return;
tsepez46bf0332016-04-06 16:48:26 -0700366
367 m_pData.Reset(StringData::Create(nNewLen));
Tom Sepez33b42e42017-07-19 13:19:12 -0700368 m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
369 m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
370 str2.GetLength());
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700371}
tsepez46bf0332016-04-06 16:48:26 -0700372
Ryan Harrison275e2602017-09-18 14:23:18 -0400373WideString::WideString(const std::initializer_list<WideStringView>& list) {
Ryan Harrisonbacf75e2017-09-27 10:58:52 -0400374 FX_SAFE_SIZE_T nSafeLen = 0;
Tom Sepez746c2872017-04-07 16:35:13 -0700375 for (const auto& item : list)
376 nSafeLen += item.GetLength();
377
Ryan Harrison875e98c2017-09-27 10:53:11 -0400378 size_t nNewLen = nSafeLen.ValueOrDie();
Tom Sepez746c2872017-04-07 16:35:13 -0700379 if (nNewLen == 0)
380 return;
381
382 m_pData.Reset(StringData::Create(nNewLen));
383
Ryan Harrison875e98c2017-09-27 10:53:11 -0400384 size_t nOffset = 0;
Tom Sepez746c2872017-04-07 16:35:13 -0700385 for (const auto& item : list) {
Tom Sepez33b42e42017-07-19 13:19:12 -0700386 m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
387 item.GetLength());
Tom Sepez746c2872017-04-07 16:35:13 -0700388 nOffset += item.GetLength();
389 }
390}
391
Lei Zhang0e744a22020-06-02 00:44:28 +0000392WideString::~WideString() = default;
tsepez46bf0332016-04-06 16:48:26 -0700393
Tom Sepez2246a532021-12-17 00:35:35 +0000394void WideString::clear() {
395 if (m_pData && m_pData->CanOperateInPlace(0)) {
396 m_pData->m_nDataLength = 0;
397 return;
398 }
399 m_pData.Reset();
400}
401
Andrew Weintraub50710e72019-06-24 21:11:55 +0000402WideString& WideString::operator=(const wchar_t* str) {
403 if (!str || !str[0])
tsepez46bf0332016-04-06 16:48:26 -0700404 clear();
405 else
Andrew Weintraub50710e72019-06-24 21:11:55 +0000406 AssignCopy(str, wcslen(str));
tsepez46bf0332016-04-06 16:48:26 -0700407
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700408 return *this;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700409}
tsepez46bf0332016-04-06 16:48:26 -0700410
Andrew Weintraub50710e72019-06-24 21:11:55 +0000411WideString& WideString::operator=(WideStringView str) {
412 if (str.IsEmpty())
tsepez46bf0332016-04-06 16:48:26 -0700413 clear();
414 else
Andrew Weintraub50710e72019-06-24 21:11:55 +0000415 AssignCopy(str.unterminated_c_str(), str.GetLength());
tsepez46bf0332016-04-06 16:48:26 -0700416
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700417 return *this;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700418}
tsepez46bf0332016-04-06 16:48:26 -0700419
Andrew Weintraub50710e72019-06-24 21:11:55 +0000420WideString& WideString::operator=(const WideString& that) {
Tom Sepeza1ea4272018-06-19 14:37:12 +0000421 if (m_pData != that.m_pData)
422 m_pData = that.m_pData;
423
424 return *this;
425}
426
Tom Sepez8a47b822020-09-10 23:16:30 +0000427WideString& WideString::operator=(WideString&& that) noexcept {
Tom Sepeza1ea4272018-06-19 14:37:12 +0000428 if (m_pData != that.m_pData)
429 m_pData = std::move(that.m_pData);
tsepez46bf0332016-04-06 16:48:26 -0700430
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700431 return *this;
432}
tsepez46bf0332016-04-06 16:48:26 -0700433
Andrew Weintraub50710e72019-06-24 21:11:55 +0000434WideString& WideString::operator+=(const wchar_t* str) {
435 if (str)
436 Concat(str, wcslen(str));
tsepez46bf0332016-04-06 16:48:26 -0700437
438 return *this;
439}
440
Andrew Weintraub50710e72019-06-24 21:11:55 +0000441WideString& WideString::operator+=(wchar_t ch) {
tsepez46bf0332016-04-06 16:48:26 -0700442 Concat(&ch, 1);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700443 return *this;
444}
tsepez46bf0332016-04-06 16:48:26 -0700445
Andrew Weintraub50710e72019-06-24 21:11:55 +0000446WideString& WideString::operator+=(const WideString& str) {
tsepez46bf0332016-04-06 16:48:26 -0700447 if (str.m_pData)
448 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
449
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700450 return *this;
451}
tsepez46bf0332016-04-06 16:48:26 -0700452
Andrew Weintraub50710e72019-06-24 21:11:55 +0000453WideString& WideString::operator+=(WideStringView str) {
tsepez46bf0332016-04-06 16:48:26 -0700454 if (!str.IsEmpty())
Tom Sepez33b42e42017-07-19 13:19:12 -0700455 Concat(str.unterminated_c_str(), str.GetLength());
tsepez46bf0332016-04-06 16:48:26 -0700456
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700457 return *this;
458}
tsepez46bf0332016-04-06 16:48:26 -0700459
Ryan Harrison275e2602017-09-18 14:23:18 -0400460bool WideString::operator==(const wchar_t* ptr) const {
tsepez9f2970c2016-04-01 10:23:04 -0700461 if (!m_pData)
462 return !ptr || !ptr[0];
463
464 if (!ptr)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700465 return m_pData->m_nDataLength == 0;
tsepez9f2970c2016-04-01 10:23:04 -0700466
Ryan Harrison81f9eee2017-09-05 15:33:18 -0400467 return wcslen(ptr) == m_pData->m_nDataLength &&
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700468 wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
469}
tsepez46bf0332016-04-06 16:48:26 -0700470
Tom Sepez1ab27572018-12-14 20:31:31 +0000471bool WideString::operator==(WideStringView str) const {
tsepez9f2970c2016-04-01 10:23:04 -0700472 if (!m_pData)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700473 return str.IsEmpty();
tsepez9f2970c2016-04-01 10:23:04 -0700474
tsepez46bf0332016-04-06 16:48:26 -0700475 return m_pData->m_nDataLength == str.GetLength() &&
Tom Sepez33b42e42017-07-19 13:19:12 -0700476 wmemcmp(m_pData->m_String, str.unterminated_c_str(),
477 str.GetLength()) == 0;
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700478}
tsepez46bf0332016-04-06 16:48:26 -0700479
Ryan Harrison275e2602017-09-18 14:23:18 -0400480bool WideString::operator==(const WideString& other) const {
tsepez3d704882016-09-13 18:10:22 -0700481 if (m_pData == other.m_pData)
482 return true;
483
tsepez9f2970c2016-04-01 10:23:04 -0700484 if (IsEmpty())
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700485 return other.IsEmpty();
tsepez9f2970c2016-04-01 10:23:04 -0700486
487 if (other.IsEmpty())
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700488 return false;
tsepez9f2970c2016-04-01 10:23:04 -0700489
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700490 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
491 wmemcmp(other.m_pData->m_String, m_pData->m_String,
492 m_pData->m_nDataLength) == 0;
493}
tsepez46bf0332016-04-06 16:48:26 -0700494
Lei Zhangd7a3f982017-11-06 19:00:21 +0000495bool WideString::operator<(const wchar_t* ptr) const {
Lei Zhangf3a7edc2017-12-12 18:42:18 +0000496 return Compare(ptr) < 0;
Lei Zhangd7a3f982017-11-06 19:00:21 +0000497}
498
Tom Sepez1ab27572018-12-14 20:31:31 +0000499bool WideString::operator<(WideStringView str) const {
Lei Zhangd7a3f982017-11-06 19:00:21 +0000500 if (!m_pData && !str.unterminated_c_str())
501 return false;
502 if (c_str() == str.unterminated_c_str())
503 return false;
504
505 size_t len = GetLength();
506 size_t other_len = str.GetLength();
tsepez8f53f542016-09-15 11:55:00 -0700507 int result =
Lei Zhangd7a3f982017-11-06 19:00:21 +0000508 wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
509 return result < 0 || (result == 0 && len < other_len);
510}
511
512bool WideString::operator<(const WideString& other) const {
Lei Zhangf3a7edc2017-12-12 18:42:18 +0000513 return Compare(other) < 0;
tsepez8f53f542016-09-15 11:55:00 -0700514}
515
Ryan Harrison875e98c2017-09-27 10:53:11 -0400516void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700517 AllocBeforeWrite(nSrcLen);
tsepez46bf0332016-04-06 16:48:26 -0700518 m_pData->CopyContents(pSrcData, nSrcLen);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700519 m_pData->m_nDataLength = nSrcLen;
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700520}
tsepez46bf0332016-04-06 16:48:26 -0700521
Ryan Harrison875e98c2017-09-27 10:53:11 -0400522void WideString::ReallocBeforeWrite(size_t nNewLength) {
tsepez46bf0332016-04-06 16:48:26 -0700523 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
524 return;
525
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400526 if (nNewLength == 0) {
tsepez46bf0332016-04-06 16:48:26 -0700527 clear();
528 return;
529 }
530
Dan Sinclair0b950422017-09-21 15:49:49 -0400531 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
tsepez46bf0332016-04-06 16:48:26 -0700532 if (m_pData) {
Ryan Harrison875e98c2017-09-27 10:53:11 -0400533 size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
tsepez46bf0332016-04-06 16:48:26 -0700534 pNewData->CopyContents(m_pData->m_String, nCopyLength);
535 pNewData->m_nDataLength = nCopyLength;
536 } else {
537 pNewData->m_nDataLength = 0;
538 }
539 pNewData->m_String[pNewData->m_nDataLength] = 0;
540 m_pData.Swap(pNewData);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700541}
tsepez46bf0332016-04-06 16:48:26 -0700542
Ryan Harrison875e98c2017-09-27 10:53:11 -0400543void WideString::AllocBeforeWrite(size_t nNewLength) {
tsepez46bf0332016-04-06 16:48:26 -0700544 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
545 return;
546
Ryan Harrison81f9eee2017-09-05 15:33:18 -0400547 if (nNewLength == 0) {
tsepez46bf0332016-04-06 16:48:26 -0700548 clear();
549 return;
550 }
551
552 m_pData.Reset(StringData::Create(nNewLength));
553}
554
Ryan Harrison875e98c2017-09-27 10:53:11 -0400555void WideString::ReleaseBuffer(size_t nNewLength) {
tsepez46bf0332016-04-06 16:48:26 -0700556 if (!m_pData)
557 return;
558
tsepez518fd4c2016-04-26 12:13:16 -0700559 nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
tsepez46bf0332016-04-06 16:48:26 -0700560 if (nNewLength == 0) {
561 clear();
562 return;
563 }
564
Lei Zhang45829202021-04-16 16:42:11 +0000565 DCHECK_EQ(m_pData->m_nRefs, 1);
tsepez46bf0332016-04-06 16:48:26 -0700566 m_pData->m_nDataLength = nNewLength;
567 m_pData->m_String[nNewLength] = 0;
tsepez518fd4c2016-04-26 12:13:16 -0700568 if (m_pData->m_nAllocLength - nNewLength >= 32) {
569 // Over arbitrary threshold, so pay the price to relocate. Force copy to
570 // always occur by holding a second reference to the string.
Ryan Harrison275e2602017-09-18 14:23:18 -0400571 WideString preserve(*this);
tsepez518fd4c2016-04-26 12:13:16 -0700572 ReallocBeforeWrite(nNewLength);
573 }
tsepez46bf0332016-04-06 16:48:26 -0700574}
575
Ryan Harrison875e98c2017-09-27 10:53:11 -0400576void WideString::Reserve(size_t len) {
tsepez46bf0332016-04-06 16:48:26 -0700577 GetBuffer(len);
tsepez46bf0332016-04-06 16:48:26 -0700578}
579
Tom Sepezb7973bb2018-04-17 16:41:28 +0000580pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
tsepez46bf0332016-04-06 16:48:26 -0700581 if (!m_pData) {
582 if (nMinBufLength == 0)
Tom Sepezb7973bb2018-04-17 16:41:28 +0000583 return pdfium::span<wchar_t>();
tsepez46bf0332016-04-06 16:48:26 -0700584
585 m_pData.Reset(StringData::Create(nMinBufLength));
586 m_pData->m_nDataLength = 0;
587 m_pData->m_String[0] = 0;
Tom Sepezb7973bb2018-04-17 16:41:28 +0000588 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
tsepez46bf0332016-04-06 16:48:26 -0700589 }
590
591 if (m_pData->CanOperateInPlace(nMinBufLength))
Tom Sepezb7973bb2018-04-17 16:41:28 +0000592 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
tsepez46bf0332016-04-06 16:48:26 -0700593
594 nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
595 if (nMinBufLength == 0)
Tom Sepezb7973bb2018-04-17 16:41:28 +0000596 return pdfium::span<wchar_t>();
tsepez46bf0332016-04-06 16:48:26 -0700597
Dan Sinclair0b950422017-09-21 15:49:49 -0400598 RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
tsepez46bf0332016-04-06 16:48:26 -0700599 pNewData->CopyContents(*m_pData);
600 pNewData->m_nDataLength = m_pData->m_nDataLength;
601 m_pData.Swap(pNewData);
Tom Sepezb7973bb2018-04-17 16:41:28 +0000602 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
tsepez46bf0332016-04-06 16:48:26 -0700603}
604
Ryan Harrison875e98c2017-09-27 10:53:11 -0400605size_t WideString::Delete(size_t index, size_t count) {
tsepez46bf0332016-04-06 16:48:26 -0700606 if (!m_pData)
607 return 0;
608
Ryan Harrison875e98c2017-09-27 10:53:11 -0400609 size_t old_length = m_pData->m_nDataLength;
Lei Zhang10a8fea2018-12-05 23:15:53 +0000610 if (count == 0 || index != pdfium::clamp<size_t>(index, 0, old_length))
Ryan Harrison0811da82017-08-02 16:16:18 -0400611 return old_length;
tsepez46bf0332016-04-06 16:48:26 -0700612
Ryan Harrison875e98c2017-09-27 10:53:11 -0400613 size_t removal_length = index + count;
Ryan Harrison0811da82017-08-02 16:16:18 -0400614 if (removal_length > old_length)
615 return old_length;
616
617 ReallocBeforeWrite(old_length);
Ryan Harrison875e98c2017-09-27 10:53:11 -0400618 size_t chars_to_copy = old_length - removal_length + 1;
Ryan Harrison0811da82017-08-02 16:16:18 -0400619 wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
620 chars_to_copy);
621 m_pData->m_nDataLength = old_length - count;
tsepez46bf0332016-04-06 16:48:26 -0700622 return m_pData->m_nDataLength;
623}
624
Ryan Harrison875e98c2017-09-27 10:53:11 -0400625void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
Ryan Harrison81f9eee2017-09-05 15:33:18 -0400626 if (!pSrcData || nSrcLen == 0)
tsepez46bf0332016-04-06 16:48:26 -0700627 return;
628
629 if (!m_pData) {
630 m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
631 return;
632 }
633
634 if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
635 m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
636 m_pData->m_nDataLength += nSrcLen;
637 return;
638 }
639
Lei Zhang5d6714e2018-12-11 18:53:31 +0000640 size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
Dan Sinclair0b950422017-09-21 15:49:49 -0400641 RetainPtr<StringData> pNewData(
Lei Zhang5d6714e2018-12-11 18:53:31 +0000642 StringData::Create(m_pData->m_nDataLength + nConcatLen));
tsepez46bf0332016-04-06 16:48:26 -0700643 pNewData->CopyContents(*m_pData);
644 pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
Lei Zhang5d6714e2018-12-11 18:53:31 +0000645 pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
tsepez46bf0332016-04-06 16:48:26 -0700646 m_pData.Swap(pNewData);
647}
648
Tom Sepeza1ea4272018-06-19 14:37:12 +0000649intptr_t WideString::ReferenceCountForTesting() const {
650 return m_pData ? m_pData->m_nRefs : 0;
651}
652
Tom Sepezadb9e702018-11-27 18:43:10 +0000653ByteString WideString::ToASCII() const {
654 ByteString result;
655 result.Reserve(GetLength());
656 for (wchar_t wc : *this)
657 result.InsertAtBack(static_cast<char>(wc & 0x7f));
658 return result;
659}
660
Lei Zhangebac96b2019-01-14 20:20:28 +0000661ByteString WideString::ToLatin1() const {
662 ByteString result;
663 result.Reserve(GetLength());
664 for (wchar_t wc : *this)
665 result.InsertAtBack(static_cast<char>(wc & 0xff));
666 return result;
667}
668
Tom Sepez34dab072018-08-08 17:49:02 +0000669ByteString WideString::ToDefANSI() const {
Tom Sepez662e7a82021-08-04 18:02:18 +0000670 size_t dest_len =
671 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
Tom Sepez34dab072018-08-08 17:49:02 +0000672 if (!dest_len)
673 return ByteString();
674
675 ByteString bstr;
676 {
677 // Span's lifetime must end before ReleaseBuffer() below.
678 pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
Tom Sepez662e7a82021-08-04 18:02:18 +0000679 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), dest_buf);
Tom Sepez34dab072018-08-08 17:49:02 +0000680 }
681 bstr.ReleaseBuffer(dest_len);
682 return bstr;
683}
684
Tom Sepezb4c95fe2018-11-27 01:09:44 +0000685ByteString WideString::ToUTF8() const {
Ryan Harrison275e2602017-09-18 14:23:18 -0400686 return FX_UTF8Encode(AsStringView());
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700687}
tsepez46bf0332016-04-06 16:48:26 -0700688
Tom Sepezb4c95fe2018-11-27 01:09:44 +0000689ByteString WideString::ToUTF16LE() const {
Tom Sepez1dbfe992018-04-17 17:19:30 +0000690 if (!m_pData)
Ryan Harrison275e2602017-09-18 14:23:18 -0400691 return ByteString("\0\0", 2);
Tom Sepez1dbfe992018-04-17 17:19:30 +0000692
Tom Sepez3d523e32018-04-16 17:28:17 +0000693 ByteString result;
Tom Sepeza5d8fa12021-11-05 19:29:08 +0000694 size_t len = m_pData->m_nDataLength;
Tom Sepez1dbfe992018-04-17 17:19:30 +0000695 {
696 // Span's lifetime must end before ReleaseBuffer() below.
697 pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
Tom Sepeza5d8fa12021-11-05 19:29:08 +0000698 for (size_t i = 0; i < len; i++) {
Tom Sepez1dbfe992018-04-17 17:19:30 +0000699 buffer[i * 2] = m_pData->m_String[i] & 0xff;
700 buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
701 }
702 buffer[len * 2] = 0;
703 buffer[len * 2 + 1] = 0;
Tom Sepez3d523e32018-04-16 17:28:17 +0000704 }
Oliver Chang35e68a52015-12-09 12:44:33 -0800705 result.ReleaseBuffer(len * 2 + 2);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700706 return result;
707}
Lei Zhang31c7b732016-02-19 10:53:24 -0800708
Tom Sepez3a9cc6d2020-07-20 23:50:36 +0000709WideString WideString::EncodeEntities() const {
710 WideString ret = *this;
711 ret.Replace(L"&", L"&amp;");
712 ret.Replace(L"<", L"&lt;");
713 ret.Replace(L">", L"&gt;");
714 ret.Replace(L"\'", L"&apos;");
715 ret.Replace(L"\"", L"&quot;");
716 return ret;
717}
718
Tom Sepez31722572021-11-03 23:58:40 +0000719WideString WideString::Substr(size_t offset) const {
Tom Sepezd0b53bd2021-11-04 20:05:45 +0000720 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
Tom Sepez31722572021-11-03 23:58:40 +0000721 return Substr(offset, GetLength() - offset);
722}
723
Daniel Hosseinian39516a52020-01-27 22:10:50 +0000724WideString WideString::Substr(size_t first, size_t count) const {
tsepezde0d8522016-03-31 14:40:29 -0700725 if (!m_pData)
Ryan Harrison275e2602017-09-18 14:23:18 -0400726 return WideString();
tsepezde0d8522016-03-31 14:40:29 -0700727
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400728 if (!IsValidIndex(first))
Ryan Harrison275e2602017-09-18 14:23:18 -0400729 return WideString();
tsepez46bf0332016-04-06 16:48:26 -0700730
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400731 if (count == 0 || !IsValidLength(count))
Ryan Harrison275e2602017-09-18 14:23:18 -0400732 return WideString();
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400733
734 if (!IsValidIndex(first + count - 1))
Ryan Harrison275e2602017-09-18 14:23:18 -0400735 return WideString();
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400736
737 if (first == 0 && count == GetLength())
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700738 return *this;
tsepezde0d8522016-03-31 14:40:29 -0700739
Ryan Harrison275e2602017-09-18 14:23:18 -0400740 WideString dest;
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400741 AllocCopy(dest, count, first);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700742 return dest;
743}
tsepez46bf0332016-04-06 16:48:26 -0700744
Daniel Hosseiniana9a704e2020-01-28 19:52:32 +0000745WideString WideString::First(size_t count) const {
Daniel Hosseinian39516a52020-01-27 22:10:50 +0000746 return Substr(0, count);
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400747}
748
Daniel Hosseiniana9a704e2020-01-28 19:52:32 +0000749WideString WideString::Last(size_t count) const {
Lei Zhang6df96bd2021-07-02 03:12:01 +0000750 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
Daniel Hosseinian39516a52020-01-27 22:10:50 +0000751 return Substr(GetLength() - count, count);
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400752}
753
Ryan Harrison275e2602017-09-18 14:23:18 -0400754void WideString::AllocCopy(WideString& dest,
Ryan Harrison875e98c2017-09-27 10:53:11 -0400755 size_t nCopyLen,
756 size_t nCopyIndex) const {
Ryan Harrison81f9eee2017-09-05 15:33:18 -0400757 if (nCopyLen == 0)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700758 return;
tsepez46bf0332016-04-06 16:48:26 -0700759
Dan Sinclair0b950422017-09-21 15:49:49 -0400760 RetainPtr<StringData> pNewData(
tsepez46bf0332016-04-06 16:48:26 -0700761 StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
762 dest.m_pData.Swap(pNewData);
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700763}
tsepez46bf0332016-04-06 16:48:26 -0700764
Andrew Weintraub43d760a2019-06-24 17:45:20 +0000765size_t WideString::Insert(size_t index, wchar_t ch) {
Lei Zhang10a8fea2018-12-05 23:15:53 +0000766 const size_t cur_length = GetLength();
Andrew Weintraub43d760a2019-06-24 17:45:20 +0000767 if (!IsValidLength(index))
Ryan Harrisondb145322017-08-02 14:44:17 -0400768 return cur_length;
tsepez46bf0332016-04-06 16:48:26 -0700769
Ryan Harrison875e98c2017-09-27 10:53:11 -0400770 const size_t new_length = cur_length + 1;
Ryan Harrisondb145322017-08-02 14:44:17 -0400771 ReallocBeforeWrite(new_length);
Andrew Weintraub43d760a2019-06-24 17:45:20 +0000772 wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
773 new_length - index);
774 m_pData->m_String[index] = ch;
Ryan Harrisondb145322017-08-02 14:44:17 -0400775 m_pData->m_nDataLength = new_length;
776 return new_length;
tsepez46bf0332016-04-06 16:48:26 -0700777}
778
Lei Zhang2c495302021-10-07 23:13:30 +0000779absl::optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
tsepez46bf0332016-04-06 16:48:26 -0700780 if (!m_pData)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000781 return absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700782
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400783 if (!IsValidIndex(start))
Lei Zhanga3a739d2021-10-07 20:46:31 +0000784 return absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700785
Dan Sinclair812e96c2017-03-13 16:43:37 -0400786 const wchar_t* pStr =
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400787 wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
Lei Zhang2c495302021-10-07 23:13:30 +0000788 return pStr ? absl::optional<size_t>(
789 static_cast<size_t>(pStr - m_pData->m_String))
Lei Zhanga3a739d2021-10-07 20:46:31 +0000790 : absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700791}
792
Lei Zhang2c495302021-10-07 23:13:30 +0000793absl::optional<size_t> WideString::Find(WideStringView subStr,
794 size_t start) const {
tsepez46bf0332016-04-06 16:48:26 -0700795 if (!m_pData)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000796 return absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700797
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400798 if (!IsValidIndex(start))
Lei Zhanga3a739d2021-10-07 20:46:31 +0000799 return absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700800
Dan Sinclair812e96c2017-03-13 16:43:37 -0400801 const wchar_t* pStr =
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400802 FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
803 subStr.unterminated_c_str(), subStr.GetLength());
Lei Zhang2c495302021-10-07 23:13:30 +0000804 return pStr ? absl::optional<size_t>(
805 static_cast<size_t>(pStr - m_pData->m_String))
Lei Zhanga3a739d2021-10-07 20:46:31 +0000806 : absl::nullopt;
tsepez46bf0332016-04-06 16:48:26 -0700807}
808
Lei Zhang2c495302021-10-07 23:13:30 +0000809absl::optional<size_t> WideString::ReverseFind(wchar_t ch) const {
Daniel Hosseinianbf9cfd72019-10-19 00:34:35 +0000810 if (!m_pData)
Lei Zhanga3a739d2021-10-07 20:46:31 +0000811 return absl::nullopt;
Daniel Hosseinianbf9cfd72019-10-19 00:34:35 +0000812
813 size_t nLength = m_pData->m_nDataLength;
814 while (nLength--) {
815 if (m_pData->m_String[nLength] == ch)
816 return nLength;
817 }
Lei Zhanga3a739d2021-10-07 20:46:31 +0000818 return absl::nullopt;
Daniel Hosseinianbf9cfd72019-10-19 00:34:35 +0000819}
820
Ryan Harrison275e2602017-09-18 14:23:18 -0400821void WideString::MakeLower() {
Tom Sepezbafc4082021-12-15 21:33:15 +0000822 if (IsEmpty())
tsepez46bf0332016-04-06 16:48:26 -0700823 return;
824
825 ReallocBeforeWrite(m_pData->m_nDataLength);
826 FXSYS_wcslwr(m_pData->m_String);
827}
828
Ryan Harrison275e2602017-09-18 14:23:18 -0400829void WideString::MakeUpper() {
Tom Sepezbafc4082021-12-15 21:33:15 +0000830 if (IsEmpty())
tsepez46bf0332016-04-06 16:48:26 -0700831 return;
832
833 ReallocBeforeWrite(m_pData->m_nDataLength);
834 FXSYS_wcsupr(m_pData->m_String);
835}
836
Ryan Harrison875e98c2017-09-27 10:53:11 -0400837size_t WideString::Remove(wchar_t chRemove) {
Tom Sepezbafc4082021-12-15 21:33:15 +0000838 if (IsEmpty())
tsepez46bf0332016-04-06 16:48:26 -0700839 return 0;
840
Dan Sinclair812e96c2017-03-13 16:43:37 -0400841 wchar_t* pstrSource = m_pData->m_String;
842 wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
tsepez46bf0332016-04-06 16:48:26 -0700843 while (pstrSource < pstrEnd) {
tsepeze09c1e42016-04-11 18:01:13 -0700844 if (*pstrSource == chRemove)
845 break;
846 pstrSource++;
847 }
848 if (pstrSource == pstrEnd)
849 return 0;
850
851 ptrdiff_t copied = pstrSource - m_pData->m_String;
852 ReallocBeforeWrite(m_pData->m_nDataLength);
853 pstrSource = m_pData->m_String + copied;
854 pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
855
Dan Sinclair812e96c2017-03-13 16:43:37 -0400856 wchar_t* pstrDest = pstrSource;
tsepeze09c1e42016-04-11 18:01:13 -0700857 while (pstrSource < pstrEnd) {
tsepez46bf0332016-04-06 16:48:26 -0700858 if (*pstrSource != chRemove) {
859 *pstrDest = *pstrSource;
860 pstrDest++;
861 }
862 pstrSource++;
863 }
tsepeze09c1e42016-04-11 18:01:13 -0700864
tsepez46bf0332016-04-06 16:48:26 -0700865 *pstrDest = 0;
Ryan Harrison875e98c2017-09-27 10:53:11 -0400866 size_t count = static_cast<size_t>(pstrSource - pstrDest);
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400867 m_pData->m_nDataLength -= count;
868 return count;
tsepez46bf0332016-04-06 16:48:26 -0700869}
870
Tom Sepez1ab27572018-12-14 20:31:31 +0000871size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
tsepez46bf0332016-04-06 16:48:26 -0700872 if (!m_pData || pOld.IsEmpty())
873 return 0;
874
Ryan Harrison875e98c2017-09-27 10:53:11 -0400875 size_t nSourceLen = pOld.GetLength();
876 size_t nReplacementLen = pNew.GetLength();
877 size_t count = 0;
Dan Sinclair812e96c2017-03-13 16:43:37 -0400878 const wchar_t* pStart = m_pData->m_String;
879 wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
Anton Bikineev7ac13342022-01-24 21:25:15 +0000880 while (true) {
Ryan Harrison875e98c2017-09-27 10:53:11 -0400881 const wchar_t* pTarget =
882 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
883 pOld.unterminated_c_str(), nSourceLen);
tsepez46bf0332016-04-06 16:48:26 -0700884 if (!pTarget)
885 break;
886
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400887 count++;
tsepez46bf0332016-04-06 16:48:26 -0700888 pStart = pTarget + nSourceLen;
889 }
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400890 if (count == 0)
tsepez46bf0332016-04-06 16:48:26 -0700891 return 0;
892
Ryan Harrison875e98c2017-09-27 10:53:11 -0400893 size_t nNewLength =
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400894 m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
tsepez46bf0332016-04-06 16:48:26 -0700895
896 if (nNewLength == 0) {
897 clear();
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400898 return count;
tsepez46bf0332016-04-06 16:48:26 -0700899 }
900
Dan Sinclair0b950422017-09-21 15:49:49 -0400901 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
tsepez46bf0332016-04-06 16:48:26 -0700902 pStart = m_pData->m_String;
Dan Sinclair812e96c2017-03-13 16:43:37 -0400903 wchar_t* pDest = pNewData->m_String;
Ryan Harrison875e98c2017-09-27 10:53:11 -0400904 for (size_t i = 0; i < count; i++) {
905 const wchar_t* pTarget =
906 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
907 pOld.unterminated_c_str(), nSourceLen);
tsepez46bf0332016-04-06 16:48:26 -0700908 wmemcpy(pDest, pStart, pTarget - pStart);
909 pDest += pTarget - pStart;
Tom Sepez33b42e42017-07-19 13:19:12 -0700910 wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
tsepez46bf0332016-04-06 16:48:26 -0700911 pDest += pNew.GetLength();
912 pStart = pTarget + nSourceLen;
913 }
914 wmemcpy(pDest, pStart, pEnd - pStart);
915 m_pData.Swap(pNewData);
Ryan Harrisoned48c1a2017-08-25 15:34:41 -0400916 return count;
tsepez46bf0332016-04-06 16:48:26 -0700917}
918
tsepez46bf0332016-04-06 16:48:26 -0700919// static
Tom Sepezc839ac72018-12-14 20:34:11 +0000920WideString WideString::FromASCII(ByteStringView bstr) {
Tom Sepezadb9e702018-11-27 18:43:10 +0000921 WideString result;
922 result.Reserve(bstr.GetLength());
923 for (char c : bstr)
924 result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
925 return result;
926}
927
928// static
Lei Zhangebac96b2019-01-14 20:20:28 +0000929WideString WideString::FromLatin1(ByteStringView bstr) {
930 WideString result;
931 result.Reserve(bstr.GetLength());
932 for (char c : bstr)
933 result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
934 return result;
935}
936
937// static
Tom Sepezc839ac72018-12-14 20:34:11 +0000938WideString WideString::FromDefANSI(ByteStringView bstr) {
Tom Sepez662e7a82021-08-04 18:02:18 +0000939 size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
Tom Sepezf7658052018-08-08 22:20:29 +0000940 if (!dest_len)
941 return WideString();
tsepez46bf0332016-04-06 16:48:26 -0700942
Tom Sepezf7658052018-08-08 22:20:29 +0000943 WideString wstr;
944 {
945 // Span's lifetime must end before ReleaseBuffer() below.
946 pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
Tom Sepez662e7a82021-08-04 18:02:18 +0000947 FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, dest_buf);
Tom Sepezf7658052018-08-08 22:20:29 +0000948 }
949 wstr.ReleaseBuffer(dest_len);
950 return wstr;
tsepez46bf0332016-04-06 16:48:26 -0700951}
952
953// static
Tom Sepezc839ac72018-12-14 20:34:11 +0000954WideString WideString::FromUTF8(ByteStringView str) {
Tom Sepezfd7cede2018-08-09 21:32:47 +0000955 return FX_UTF8Decode(str);
tsepez46bf0332016-04-06 16:48:26 -0700956}
957
958// static
Ryan Harrison875e98c2017-09-27 10:53:11 -0400959WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
Tom Sepezb7973bb2018-04-17 16:41:28 +0000960 if (!wstr || wlen == 0)
Ryan Harrison275e2602017-09-18 14:23:18 -0400961 return WideString();
tsepez46bf0332016-04-06 16:48:26 -0700962
Ryan Harrison275e2602017-09-18 14:23:18 -0400963 WideString result;
Tom Sepezb7973bb2018-04-17 16:41:28 +0000964 {
965 // Span's lifetime must end before ReleaseBuffer() below.
966 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
967 for (size_t i = 0; i < wlen; i++)
968 buf[i] = wstr[i];
tsepez46bf0332016-04-06 16:48:26 -0700969 }
970 result.ReleaseBuffer(wlen);
971 return result;
972}
973
Patrik Weiskircher6b4a2352019-12-06 20:05:35 +0000974WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
975 if (!wstr || wlen == 0)
976 return WideString();
977
978 WideString result;
979 {
980 // Span's lifetime must end before ReleaseBuffer() below.
981 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
982 for (size_t i = 0; i < wlen; i++) {
983 auto wch = wstr[i];
984 wch = (wch >> 8) | (wch << 8);
985 buf[i] = wch;
986 }
987 }
988 result.ReleaseBuffer(wlen);
989 return result;
990}
991
Ryan Harrison875e98c2017-09-27 10:53:11 -0400992void WideString::SetAt(size_t index, wchar_t c) {
Tom Sepez49f09322021-01-26 22:13:55 +0000993 DCHECK(IsValidIndex(index));
Ryan Harrison8a1758b2017-08-15 10:37:59 -0400994 ReallocBeforeWrite(m_pData->m_nDataLength);
995 m_pData->m_String[index] = c;
996}
997
Andrew Weintraub43d760a2019-06-24 17:45:20 +0000998int WideString::Compare(const wchar_t* str) const {
tsepez46bf0332016-04-06 16:48:26 -0700999 if (m_pData)
Andrew Weintraub43d760a2019-06-24 17:45:20 +00001000 return str ? wcscmp(m_pData->m_String, str) : 1;
1001 return (!str || str[0] == 0) ? 0 : -1;
tsepez46bf0332016-04-06 16:48:26 -07001002}
1003
Ryan Harrison275e2602017-09-18 14:23:18 -04001004int WideString::Compare(const WideString& str) const {
Lei Zhangd7a3f982017-11-06 19:00:21 +00001005 if (!m_pData)
1006 return str.m_pData ? -1 : 0;
1007 if (!str.m_pData)
tsepez46bf0332016-04-06 16:48:26 -07001008 return 1;
Lei Zhangd7a3f982017-11-06 19:00:21 +00001009
Ryan Harrison875e98c2017-09-27 10:53:11 -04001010 size_t this_len = m_pData->m_nDataLength;
1011 size_t that_len = str.m_pData->m_nDataLength;
1012 size_t min_len = std::min(this_len, that_len);
Lei Zhangf3a7edc2017-12-12 18:42:18 +00001013 int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
1014 if (result != 0)
1015 return result;
1016 if (this_len == that_len)
1017 return 0;
Lei Zhang40c223e2018-03-14 18:08:36 +00001018 return this_len < that_len ? -1 : 1;
tsepez46bf0332016-04-06 16:48:26 -07001019}
1020
Andrew Weintraub43d760a2019-06-24 17:45:20 +00001021int WideString::CompareNoCase(const wchar_t* str) const {
Lei Zhangd7a3f982017-11-06 19:00:21 +00001022 if (m_pData)
Andrew Weintraub43d760a2019-06-24 17:45:20 +00001023 return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1024 return (!str || str[0] == 0) ? 0 : -1;
tsepez46bf0332016-04-06 16:48:26 -07001025}
1026
Ryan Harrison875e98c2017-09-27 10:53:11 -04001027size_t WideString::WStringLength(const unsigned short* str) {
1028 size_t len = 0;
tsepez46bf0332016-04-06 16:48:26 -07001029 if (str)
1030 while (str[len])
1031 len++;
1032 return len;
1033}
1034
Lei Zhang135c6602017-11-21 18:07:00 +00001035void WideString::Trim() {
Lei Zhang7981d152018-01-11 14:26:01 +00001036 TrimRight(kWideTrimChars);
1037 TrimLeft(kWideTrimChars);
tsepez46bf0332016-04-06 16:48:26 -07001038}
1039
Lei Zhang135c6602017-11-21 18:07:00 +00001040void WideString::Trim(wchar_t target) {
1041 wchar_t str[2] = {target, 0};
tsepez46bf0332016-04-06 16:48:26 -07001042 TrimRight(str);
Lei Zhang135c6602017-11-21 18:07:00 +00001043 TrimLeft(str);
tsepez46bf0332016-04-06 16:48:26 -07001044}
1045
Tom Sepez1ab27572018-12-14 20:31:31 +00001046void WideString::Trim(WideStringView targets) {
Lei Zhang135c6602017-11-21 18:07:00 +00001047 TrimRight(targets);
1048 TrimLeft(targets);
tsepez46bf0332016-04-06 16:48:26 -07001049}
1050
Lei Zhang135c6602017-11-21 18:07:00 +00001051void WideString::TrimLeft() {
Lei Zhang7981d152018-01-11 14:26:01 +00001052 TrimLeft(kWideTrimChars);
Lei Zhang135c6602017-11-21 18:07:00 +00001053}
1054
1055void WideString::TrimLeft(wchar_t target) {
1056 wchar_t str[2] = {target, 0};
1057 TrimLeft(str);
1058}
1059
Tom Sepez1ab27572018-12-14 20:31:31 +00001060void WideString::TrimLeft(WideStringView targets) {
Lei Zhang135c6602017-11-21 18:07:00 +00001061 if (!m_pData || targets.IsEmpty())
tsepez46bf0332016-04-06 16:48:26 -07001062 return;
1063
Ryan Harrison875e98c2017-09-27 10:53:11 -04001064 size_t len = GetLength();
Ryan Harrison81f9eee2017-09-05 15:33:18 -04001065 if (len == 0)
tsepez46bf0332016-04-06 16:48:26 -07001066 return;
1067
Ryan Harrison875e98c2017-09-27 10:53:11 -04001068 size_t pos = 0;
tsepez184b8252016-04-11 10:56:00 -07001069 while (pos < len) {
Ryan Harrison875e98c2017-09-27 10:53:11 -04001070 size_t i = 0;
Lei Zhang135c6602017-11-21 18:07:00 +00001071 while (i < targets.GetLength() &&
1072 targets.CharAt(i) != m_pData->m_String[pos]) {
tsepez184b8252016-04-11 10:56:00 -07001073 i++;
1074 }
Lei Zhang135c6602017-11-21 18:07:00 +00001075 if (i == targets.GetLength())
tsepez46bf0332016-04-06 16:48:26 -07001076 break;
tsepez184b8252016-04-11 10:56:00 -07001077 pos++;
tsepez46bf0332016-04-06 16:48:26 -07001078 }
Ryan Harrison81f9eee2017-09-05 15:33:18 -04001079 if (!pos)
1080 return;
1081
1082 ReallocBeforeWrite(len);
Ryan Harrison875e98c2017-09-27 10:53:11 -04001083 size_t nDataLength = len - pos;
Ryan Harrison81f9eee2017-09-05 15:33:18 -04001084 memmove(m_pData->m_String, m_pData->m_String + pos,
1085 (nDataLength + 1) * sizeof(wchar_t));
1086 m_pData->m_nDataLength = nDataLength;
tsepez46bf0332016-04-06 16:48:26 -07001087}
1088
Lei Zhang135c6602017-11-21 18:07:00 +00001089void WideString::TrimRight() {
Lei Zhang7981d152018-01-11 14:26:01 +00001090 TrimRight(kWideTrimChars);
tsepez46bf0332016-04-06 16:48:26 -07001091}
1092
Lei Zhang135c6602017-11-21 18:07:00 +00001093void WideString::TrimRight(wchar_t target) {
1094 wchar_t str[2] = {target, 0};
1095 TrimRight(str);
tsepez46bf0332016-04-06 16:48:26 -07001096}
Lei Zhang135c6602017-11-21 18:07:00 +00001097
Tom Sepez1ab27572018-12-14 20:31:31 +00001098void WideString::TrimRight(WideStringView targets) {
Lei Zhang135c6602017-11-21 18:07:00 +00001099 if (IsEmpty() || targets.IsEmpty())
1100 return;
1101
1102 size_t pos = GetLength();
1103 while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1104 pos--;
1105
1106 if (pos < m_pData->m_nDataLength) {
1107 ReallocBeforeWrite(m_pData->m_nDataLength);
1108 m_pData->m_String[pos] = 0;
1109 m_pData->m_nDataLength = pos;
1110 }
1111}
1112
Ryan Harrison275e2602017-09-18 14:23:18 -04001113int WideString::GetInteger() const {
Lei Zhang412e9082015-12-14 18:34:00 -08001114 return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
Nico Weber9d8ec5a2015-08-04 13:00:21 -07001115}
tsepez46bf0332016-04-06 16:48:26 -07001116
Ryan Harrison275e2602017-09-18 14:23:18 -04001117std::wostream& operator<<(std::wostream& os, const WideString& str) {
Ryan Harrison475f4332017-06-28 11:25:37 -04001118 return os.write(str.c_str(), str.GetLength());
1119}
1120
Ryan Harrison275e2602017-09-18 14:23:18 -04001121std::ostream& operator<<(std::ostream& os, const WideString& str) {
Tom Sepezb4c95fe2018-11-27 01:09:44 +00001122 os << str.ToUTF8();
Ryan Harrison475f4332017-06-28 11:25:37 -04001123 return os;
1124}
Ryan Harrison297f1612017-06-28 15:57:51 -04001125
Tom Sepez1ab27572018-12-14 20:31:31 +00001126std::wostream& operator<<(std::wostream& os, WideStringView str) {
Tom Sepez33b42e42017-07-19 13:19:12 -07001127 return os.write(str.unterminated_c_str(), str.GetLength());
Ryan Harrison297f1612017-06-28 15:57:51 -04001128}
1129
Tom Sepez1ab27572018-12-14 20:31:31 +00001130std::ostream& operator<<(std::ostream& os, WideStringView str) {
Ryan Harrison297f1612017-06-28 15:57:51 -04001131 os << FX_UTF8Encode(str);
1132 return os;
1133}
Ryan Harrison275e2602017-09-18 14:23:18 -04001134
1135} // namespace fxcrt
Tom Sepeza44f5aa2018-11-29 21:30:55 +00001136
Tom Sepez96e8a312021-05-27 16:42:47 +00001137uint32_t FX_HashCode_GetW(WideStringView str) {
Tom Sepeza44f5aa2018-11-29 21:30:55 +00001138 uint32_t dwHashCode = 0;
Tom Sepez96e8a312021-05-27 16:42:47 +00001139 for (WideStringView::UnsignedType c : str)
1140 dwHashCode = 1313 * dwHashCode + c;
1141 return dwHashCode;
1142}
1143
1144uint32_t FX_HashCode_GetLoweredW(WideStringView str) {
1145 uint32_t dwHashCode = 0;
1146 for (wchar_t c : str) // match FXSYS_towlower() arg type.
1147 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
Tom Sepeza44f5aa2018-11-29 21:30:55 +00001148 return dwHashCode;
1149}