Better tests for CFXJSE_FormCalcContext::Decode().
Fix the obvious overruns along the way.
Scope variables tighter to avoid potential for duplicate insertion of
stale values under dubious logic. Then combine some redundant code.
Introduce new functions for wide character classification.
Change-Id: I937c5c4030e4f614c399b6b3d82a43331008efd7
Reviewed-on: https://pdfium-review.googlesource.com/c/45793
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h
index 2e46c26..ef79196 100644
--- a/core/fxcrt/fx_extension.h
+++ b/core/fxcrt/fx_extension.h
@@ -72,6 +72,10 @@
return !((c & 0x80) || !std::isxdigit(c));
}
+inline bool FXSYS_IsWideHexDigit(wchar_t c) {
+ return !((c & 0xFFFFFF80) || !std::isxdigit(c));
+}
+
inline int FXSYS_HexCharToInt(char c) {
if (!FXSYS_IsHexDigit(c))
return 0;
@@ -79,6 +83,13 @@
return upchar > '9' ? upchar - 'A' + 10 : upchar - '0';
}
+inline int FXSYS_WideHexCharToInt(wchar_t c) {
+ if (!FXSYS_IsWideHexDigit(c))
+ return 0;
+ char upchar = std::toupper(static_cast<char>(c));
+ return upchar > '9' ? upchar - 'A' + 10 : upchar - '0';
+}
+
inline bool FXSYS_IsDecimalDigit(char c) {
return !((c & 0x80) || !std::isdigit(c));
}
diff --git a/fxjs/cfxjse_formcalc_context.cpp b/fxjs/cfxjse_formcalc_context.cpp
index f831c89..13bd9af 100644
--- a/fxjs/cfxjse_formcalc_context.cpp
+++ b/fxjs/cfxjse_formcalc_context.cpp
@@ -958,202 +958,108 @@
WideString DecodeURL(const WideString& wsURLString) {
const wchar_t* pData = wsURLString.c_str();
- size_t i = 0;
+ size_t iLen = wsURLString.GetLength();
CFX_WideTextBuf wsResultBuf;
- while (i < wsURLString.GetLength()) {
+ for (size_t i = 0; i < iLen; ++i) {
wchar_t ch = pData[i];
if ('%' != ch) {
wsResultBuf.AppendChar(ch);
- ++i;
continue;
}
wchar_t chTemp = 0;
int32_t iCount = 0;
while (iCount < 2) {
- ++i;
+ if (++i >= iLen)
+ break;
+ chTemp *= 16;
ch = pData[i];
- if (FXSYS_IsDecimalDigit((ch))) {
- // TODO(dsinclair): Premultiply and add rather then scale.
- chTemp += (ch - '0') * (!iCount ? 16 : 1);
- } else if (ch <= 'F' && ch >= 'A') {
- chTemp += (ch - 'A' + 10) * (!iCount ? 16 : 1);
- } else if (ch <= 'f' && ch >= 'a') {
- chTemp += (ch - 'a' + 10) * (!iCount ? 16 : 1);
- } else {
+ if (!FXSYS_IsWideHexDigit(ch))
return WideString();
- }
+ chTemp += FXSYS_WideHexCharToInt(ch);
++iCount;
}
wsResultBuf.AppendChar(chTemp);
- ++i;
}
wsResultBuf.AppendChar(0);
return wsResultBuf.MakeString();
}
+WideString DecodeMLInternal(const WideString& wsHTMLString, bool bIsHTML) {
+ const wchar_t* pData = wsHTMLString.c_str();
+ size_t iLen = wsHTMLString.GetLength();
+ CFX_WideTextBuf wsResultBuf;
+ for (size_t i = 0; i < iLen; ++i) {
+ wchar_t ch = pData[i];
+ if (ch != '&') {
+ wsResultBuf.AppendChar(ch);
+ continue;
+ }
+
+ if (++i >= iLen)
+ break;
+ ch = pData[i];
+ if (ch == '#') {
+ if (++i >= iLen)
+ break;
+ ch = pData[i];
+ if (ch != 'x' && ch != 'X')
+ return WideString();
+ if (++i >= iLen)
+ break;
+ ch = pData[i];
+ uint32_t iCode = 0;
+ while (ch != ';' && i < iLen) {
+ iCode *= 16;
+ if (!FXSYS_IsWideHexDigit(ch))
+ return WideString();
+ iCode += FXSYS_WideHexCharToInt(ch);
+ if (++i >= iLen)
+ break;
+ ch = pData[i];
+ }
+ wsResultBuf.AppendChar(iCode);
+ continue;
+ }
+
+ wchar_t strString[9];
+ size_t iStrIndex = 0;
+ while (ch != ';' && i < iLen) {
+ if (iStrIndex < 8)
+ strString[iStrIndex++] = ch;
+ if (++i >= iLen)
+ break;
+ ch = pData[i];
+ }
+ strString[iStrIndex] = 0;
+ if (bIsHTML) {
+ uint32_t iData = 0;
+ if (HTMLSTR2Code(strString, &iData))
+ wsResultBuf.AppendChar((wchar_t)iData);
+ } else {
+ if (wcscmp(strString, L"quot") == 0)
+ wsResultBuf.AppendChar('"');
+ else if (wcscmp(strString, L"amp") == 0)
+ wsResultBuf.AppendChar('&');
+ else if (wcscmp(strString, L"apos") == 0)
+ wsResultBuf.AppendChar('\'');
+ else if (wcscmp(strString, L"lt") == 0)
+ wsResultBuf.AppendChar('<');
+ else if (wcscmp(strString, L"gt") == 0)
+ wsResultBuf.AppendChar('>');
+ }
+ }
+
+ wsResultBuf.AppendChar(0);
+ return wsResultBuf.MakeString();
+}
+
WideString DecodeHTML(const WideString& wsHTMLString) {
- wchar_t strString[9];
- size_t iStrIndex = 0;
- size_t iLen = wsHTMLString.GetLength();
- size_t i = 0;
- int32_t iCode = 0;
- const wchar_t* pData = wsHTMLString.c_str();
- CFX_WideTextBuf wsResultBuf;
- while (i < iLen) {
- wchar_t ch = pData[i];
- if (ch != '&') {
- wsResultBuf.AppendChar(ch);
- ++i;
- continue;
- }
-
- ++i;
- ch = pData[i];
- if (ch == '#') {
- ++i;
- ch = pData[i];
- if (ch != 'x' && ch != 'X') {
- return WideString();
- }
-
- ++i;
- ch = pData[i];
- if (FXSYS_IsDecimalDigit(ch) || (ch <= 'f' && ch >= 'a') ||
- (ch <= 'F' && ch >= 'A')) {
- while (ch != ';' && i < iLen) {
- if (FXSYS_IsDecimalDigit(ch)) {
- iCode += ch - '0';
- } else if (ch <= 'f' && ch >= 'a') {
- iCode += ch - 'a' + 10;
- } else if (ch <= 'F' && ch >= 'A') {
- iCode += ch - 'A' + 10;
- } else {
- return WideString();
- }
- ++i;
- // TODO(dsinclair): Postmultiply seems wrong, start at zero
- // and pre-multiply then can remove the post divide.
- iCode *= 16;
- ch = pData[i];
- }
- iCode /= 16;
- }
- } else {
- while (ch != ';' && i < iLen) {
- strString[iStrIndex++] = ch;
- ++i;
- ch = pData[i];
- }
- strString[iStrIndex] = 0;
- }
- uint32_t iData = 0;
- if (HTMLSTR2Code(strString, &iData)) {
- wsResultBuf.AppendChar((wchar_t)iData);
- } else {
- wsResultBuf.AppendChar(iCode);
- }
- iStrIndex = 0;
- strString[iStrIndex] = 0;
- ++i;
- }
- wsResultBuf.AppendChar(0);
-
- return wsResultBuf.MakeString();
+ return DecodeMLInternal(wsHTMLString, true);
}
WideString DecodeXML(const WideString& wsXMLString) {
- wchar_t strString[9];
- int32_t iStrIndex = 0;
- int32_t iLen = wsXMLString.GetLength();
- int32_t i = 0;
- int32_t iCode = 0;
- wchar_t ch = 0;
- const wchar_t* pData = wsXMLString.c_str();
- CFX_WideTextBuf wsResultBuf;
- while (i < iLen) {
- ch = pData[i];
- if (ch != '&') {
- wsResultBuf.AppendChar(ch);
- ++i;
- continue;
- }
-
- // TODO(dsinclair): This is very similar to DecodeHTML, can they be
- // combined?
- ++i;
- ch = pData[i];
- if (ch == '#') {
- ++i;
- ch = pData[i];
- if (ch != 'x' && ch != 'X') {
- return WideString();
- }
-
- ++i;
- ch = pData[i];
- if ((FXSYS_IsDecimalDigit(ch)) || (ch <= 'f' && ch >= 'a') ||
- (ch <= 'F' && ch >= 'A')) {
- while (ch != ';') {
- if (FXSYS_IsDecimalDigit(ch)) {
- iCode += ch - '0';
- } else if (ch <= 'f' && ch >= 'a') {
- iCode += ch - 'a' + 10;
- } else if (ch <= 'F' && ch >= 'A') {
- iCode += ch - 'A' + 10;
- } else {
- return WideString();
- }
- ++i;
- iCode *= 16;
- ch = pData[i];
- }
- iCode /= 16;
- }
- } else {
- while (ch != ';' && i < iLen) {
- strString[iStrIndex++] = ch;
- ++i;
- ch = pData[i];
- }
- strString[iStrIndex] = 0;
- }
-
- const wchar_t* const strName[] = {L"quot", L"amp", L"apos", L"lt", L"gt"};
- int32_t iIndex = 0;
- while (iIndex < 5) {
- if (memcmp(strString, strName[iIndex], wcslen(strName[iIndex])) == 0) {
- break;
- }
- ++iIndex;
- }
- switch (iIndex) {
- case 0:
- wsResultBuf.AppendChar('"');
- break;
- case 1:
- wsResultBuf.AppendChar('&');
- break;
- case 2:
- wsResultBuf.AppendChar('\'');
- break;
- case 3:
- wsResultBuf.AppendChar('<');
- break;
- case 4:
- wsResultBuf.AppendChar('>');
- break;
- default:
- wsResultBuf.AppendChar(iCode);
- break;
- }
- iStrIndex = 0;
- strString[iStrIndex] = 0;
- ++i;
- iCode = 0;
- }
- wsResultBuf.AppendChar(0);
- return wsResultBuf.MakeString();
+ return DecodeMLInternal(wsXMLString, false);
}
WideString EncodeURL(const ByteString& szURLString) {
diff --git a/fxjs/cfxjse_formcalc_context_embeddertest.cpp b/fxjs/cfxjse_formcalc_context_embeddertest.cpp
index d348f5b..30a3143 100644
--- a/fxjs/cfxjse_formcalc_context_embeddertest.cpp
+++ b/fxjs/cfxjse_formcalc_context_embeddertest.cpp
@@ -1057,16 +1057,32 @@
const char* program;
const char* result;
} tests[] = {
- {"Decode(\"ÆÁÂÁÂ\", \"html\")", "ÆÁÂÁÂ"},
- // {"Decode(\"~!@#$%%^&*()_+|`{"}[]<>?,./;':\", "
- // "\"xml\")",
- // "~!@#$%%^&*()_+|`{"
- // "}[]<>?,./;':"}
+ // HTML
+ {R"(Decode("", "html"))", ""},
+ {R"(Decode("abcÂxyz", "html"))", "abc\xC3\x82xyz"},
+ {R"(Decode("abc&NoneSuchButVeryLongIndeed;", "html"))", "abc"},
+ {R"(Decode("AÆÁ", "html"))", "A\xC3\x86\xC3\x81"},
+ {R"(Decode("xyz&#", "html"))", "xyz"},
+
+ // XML
+ {R"(Decode("", "xml"))", ""},
+ {R"(Decode("~!@#$%%^&*()_+|`", "xml"))", "~!@#$%%^&*()_+|`"},
+ {R"(Decode("abc&nonesuchbutverylongindeed;", "xml"))", "abc"},
+ {R"(Decode(""E<>[].'", "xml"))", "\"E<>[].'"},
+ {R"(Decode("xyz&#", "xml"))", "xyz"},
+
+ // URL
+ {R"(Decode("", "url"))", ""},
+ {R"(Decode("~%26^&*()_+|`{", "url"))", "~&^&*()_+|`{"},
+ {R"(Decode("~%26^&*()_+|`{", "mbogo"))", "~&^&*()_+|`{"},
+ {R"(Decode("~%26^&*()_+|`{"))", "~&^&*()_+|`{"},
+ {R"(Decode("~%~~"))", ""},
+ {R"(Decode("?%~"))", ""},
+ {R"(Decode("?%"))", "?"},
};
for (size_t i = 0; i < FX_ArraySize(tests); ++i) {
EXPECT_TRUE(Execute(tests[i].program));
-
CFXJSE_Value* value = GetValue();
EXPECT_TRUE(value->IsString());
EXPECT_STREQ(tests[i].result, value->ToString().c_str())