Explicitly mark helper methods that only operate on ASCII ranges A number of our character helper methods take in wide character types, but only do tests/operations on the ASCII range of characters. As a very quick first pass I am renaming all of the foot-gun methods to explictly call out this behaviour, while I do a bigger cleanup/refactor. BUG=pdfium:1035 Change-Id: Ia035dfa1cb6812fa6d45155c4565475032c4c165 Reviewed-on: https://pdfium-review.googlesource.com/28330 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Reviewed-by: dsinclair <dsinclair@chromium.org>

commit: 69da36c5f841e8c6e5ded6c704d9ef58c57d532a [log] [tgz]
author: Ryan Harrison <rharrison@chromium.org> Fri Mar 09 17:46:50 2018 +0000
committer: Chromium commit bot <commit-bot@chromium.org> Fri Mar 09 17:46:50 2018 +0000
tree: 42f50adb5d062f3971f8c6a4f899e737f5462c6c
parent: 3baef5c6daf58cec2df193714b5727802d0bd42e [diff]
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index c3cf4fc..f8144a1 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp

@@ -246,7 +246,7 @@
   size_t pPos = aPos.value();  // Used to track the position of '@' or '.'.
   for (size_t i = aPos.value(); i > 0; i--) {
     wchar_t ch = (*str)[i - 1];
-    if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
+    if (ch == L'_' || ch == L'-' || FXSYS_iswASCIIalnum(ch))
       continue;
 
     if (ch != L'.' || i == pPos || i == 1) {
@@ -282,7 +282,7 @@
   pPos = 0;  // Used to track the position of '.'.
   for (size_t i = aPos.value() + 1; i < nLen; i++) {
     wchar_t wch = (*str)[i];
-    if (wch == L'-' || FXSYS_iswalnum(wch))
+    if (wch == L'-' || FXSYS_iswASCIIalnum(wch))
       continue;
 
     if (wch != L'.' || i == pPos + 1) {

diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index e712549..5019eab 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp

@@ -1210,7 +1210,7 @@
 
   if ((iter + 1) != curText.rend()) {
     iter++;
-    if (FXSYS_iswalpha(*iter) && FXSYS_iswalnum(curChar))
+    if (FXSYS_iswASCIIalpha(*iter) && FXSYS_iswASCIIalnum(curChar))
       return true;
   }
 

diff --git a/core/fxcrt/bytestring.cpp b/core/fxcrt/bytestring.cpp
index 2ab1379..2a57602 100644
--- a/core/fxcrt/bytestring.cpp
+++ b/core/fxcrt/bytestring.cpp

@@ -344,8 +344,8 @@
   const uint8_t* pThat = str.raw_str();
   for (size_t i = 0; i < len; i++) {
     if ((*pThis) != (*pThat)) {
-      uint8_t bThis = FXSYS_tolower(*pThis);
-      uint8_t bThat = FXSYS_tolower(*pThat);
+      uint8_t bThis = FXSYS_toASCIIlower(*pThis);
+      uint8_t bThat = FXSYS_toASCIIlower(*pThat);
       if (bThis != bThat)
         return false;
     }

diff --git a/core/fxcrt/css/cfx_cssselector.cpp b/core/fxcrt/css/cfx_cssselector.cpp
index 3993dca..76cb846 100644
--- a/core/fxcrt/css/cfx_cssselector.cpp
+++ b/core/fxcrt/css/cfx_cssselector.cpp

@@ -17,7 +17,7 @@
   const wchar_t* pStart = psz;
   while (psz < pEnd) {
     wchar_t wch = *psz;
-    if (!FXSYS_iswalnum(wch) && wch != '_' && wch != '-')
+    if (!FXSYS_iswASCIIalnum(wch) && wch != '_' && wch != '-')
       break;
     ++psz;
   }
@@ -67,7 +67,7 @@
   std::unique_ptr<CFX_CSSSelector> pFirst = nullptr;
   for (psz = pStart; psz < pEnd;) {
     wchar_t wch = *psz;
-    if (FXSYS_iswalpha(wch) || wch == '*') {
+    if (FXSYS_iswASCIIalpha(wch) || wch == '*') {
       int32_t iNameLen = wch == '*' ? 1 : GetCSSNameLen(psz, pEnd);
       auto p = pdfium::MakeUnique<CFX_CSSSelector>(CFX_CSSSelectorType::Element,
                                                    psz, iNameLen, true);

diff --git a/core/fxcrt/css/cfx_csssyntaxparser.cpp b/core/fxcrt/css/cfx_csssyntaxparser.cpp
index 30352a5..fea951f 100644
--- a/core/fxcrt/css/cfx_csssyntaxparser.cpp
+++ b/core/fxcrt/css/cfx_csssyntaxparser.cpp

@@ -18,7 +18,7 @@
 namespace {
 
 bool IsSelectorStart(wchar_t wch) {
-  return wch == '.' || wch == '#' || wch == '*' || FXSYS_iswalpha(wch);
+  return wch == '.' || wch == '#' || wch == '*' || FXSYS_iswASCIIalpha(wch);
 }
 
 }  // namespace

diff --git a/core/fxcrt/fx_extension.cpp b/core/fxcrt/fx_extension.cpp
index 371f1b5..7bfcec6 100644
--- a/core/fxcrt/fx_extension.cpp
+++ b/core/fxcrt/fx_extension.cpp

@@ -68,8 +68,8 @@
   ASSERT(s1 && s2 && count > 0);
   wchar_t wch1 = 0, wch2 = 0;
   while (count-- > 0) {
-    wch1 = static_cast<wchar_t>(FXSYS_tolower(*s1++));
-    wch2 = static_cast<wchar_t>(FXSYS_tolower(*s2++));
+    wch1 = static_cast<wchar_t>(FXSYS_toASCIIlower(*s1++));
+    wch2 = static_cast<wchar_t>(FXSYS_toASCIIlower(*s2++));
     if (wch1 != wch2)
       break;
   }
@@ -80,7 +80,7 @@
   uint32_t dwHashCode = 0;
   if (bIgnoreCase) {
     for (const auto& c : str)
-      dwHashCode = 31 * dwHashCode + FXSYS_tolower(c);
+      dwHashCode = 31 * dwHashCode + FXSYS_toASCIIlower(c);
   } else {
     for (const auto& c : str)
       dwHashCode = 31 * dwHashCode + c;
@@ -92,7 +92,7 @@
   uint32_t dwHashCode = 0;
   if (bIgnoreCase) {
     for (const auto& c : str)
-      dwHashCode = 1313 * dwHashCode + FXSYS_tolower(c);
+      dwHashCode = 1313 * dwHashCode + FXSYS_toASCIIlower(c);
   } else {
     for (const auto& c : str)
       dwHashCode = 1313 * dwHashCode + c;

diff --git a/core/fxcrt/fx_extension.h b/core/fxcrt/fx_extension.h
index e02d58d..491d4b2 100644
--- a/core/fxcrt/fx_extension.h
+++ b/core/fxcrt/fx_extension.h

@@ -25,31 +25,31 @@
 wchar_t* FXSYS_wcsncpy(wchar_t* dstStr, const wchar_t* srcStr, size_t count);
 int32_t FXSYS_wcsnicmp(const wchar_t* s1, const wchar_t* s2, size_t count);
 
-inline bool FXSYS_islower(int32_t ch) {
+inline bool FXSYS_isASCIIlower(int32_t ch) {
   return ch >= 'a' && ch <= 'z';
 }
 
-inline bool FXSYS_isupper(int32_t ch) {
+inline bool FXSYS_isASCIIupper(int32_t ch) {
   return ch >= 'A' && ch <= 'Z';
 }
 
-inline int32_t FXSYS_tolower(int32_t ch) {
+inline int32_t FXSYS_toASCIIlower(int32_t ch) {
   return ch < 'A' || ch > 'Z' ? ch : (ch + 0x20);
 }
 
-inline int32_t FXSYS_toupper(int32_t ch) {
+inline int32_t FXSYS_toASCIIupper(int32_t ch) {
   return ch < 'a' || ch > 'z' ? ch : (ch - 0x20);
 }
 
-inline bool FXSYS_iswalpha(wchar_t wch) {
-  return FXSYS_isupper(wch) || FXSYS_islower(wch);
+inline bool FXSYS_iswASCIIalpha(wchar_t wch) {
+  return FXSYS_isASCIIupper(wch) || FXSYS_isASCIIlower(wch);
 }
 
-inline bool FXSYS_iswalnum(wchar_t wch) {
-  return FXSYS_iswalpha(wch) || std::iswdigit(wch);
+inline bool FXSYS_iswASCIIalnum(wchar_t wch) {
+  return FXSYS_iswASCIIalpha(wch) || std::iswdigit(wch);
 }
 
-inline bool FXSYS_iswspace(wchar_t c) {
+inline bool FXSYS_iswASCIIspace(wchar_t c) {
   return (c == 0x20) || (c == 0x0d) || (c == 0x0a) || (c == 0x09);
 }
 

diff --git a/core/fxcrt/fx_system.cpp b/core/fxcrt/fx_system.cpp
index a5ceec5..532e83b 100644
--- a/core/fxcrt/fx_system.cpp
+++ b/core/fxcrt/fx_system.cpp

@@ -118,7 +118,7 @@
   }
   char* s = str;
   while (*str) {
-    *str = FXSYS_tolower(*str);
+    *str = FXSYS_toASCIIlower(*str);
     str++;
   }
   return s;
@@ -129,7 +129,7 @@
   }
   char* s = str;
   while (*str) {
-    *str = FXSYS_toupper(*str);
+    *str = FXSYS_toASCIIupper(*str);
     str++;
   }
   return s;
@@ -140,7 +140,7 @@
   }
   wchar_t* s = str;
   while (*str) {
-    *str = FXSYS_tolower(*str);
+    *str = FXSYS_toASCIIlower(*str);
     str++;
   }
   return s;
@@ -151,7 +151,7 @@
   }
   wchar_t* s = str;
   while (*str) {
-    *str = FXSYS_toupper(*str);
+    *str = FXSYS_toASCIIupper(*str);
     str++;
   }
   return s;
@@ -161,8 +161,8 @@
   int f;
   int l;
   do {
-    f = FXSYS_toupper(*dst);
-    l = FXSYS_toupper(*src);
+    f = FXSYS_toASCIIupper(*dst);
+    l = FXSYS_toASCIIupper(*src);
     ++dst;
     ++src;
   } while (f && f == l);
@@ -173,8 +173,8 @@
   wchar_t f;
   wchar_t l;
   do {
-    f = FXSYS_toupper(*dst);
-    l = FXSYS_toupper(*src);
+    f = FXSYS_toASCIIupper(*dst);
+    l = FXSYS_toASCIIupper(*src);
     ++dst;
     ++src;
   } while (f && f == l);

diff --git a/core/fxge/android/cfpf_skiafontmgr.cpp b/core/fxge/android/cfpf_skiafontmgr.cpp
index d8b751d..7413a69 100644
--- a/core/fxge/android/cfpf_skiafontmgr.cpp
+++ b/core/fxge/android/cfpf_skiafontmgr.cpp

@@ -97,7 +97,7 @@
   const char* pStrEnd = pStr + iLength;
   uint32_t uHashCode = 0;
   while (pStr < pStrEnd)
-    uHashCode = 31 * uHashCode + FXSYS_tolower(*pStr++);
+    uHashCode = 31 * uHashCode + FXSYS_toASCIIlower(*pStr++);
   return uHashCode;
 }
 
@@ -167,7 +167,7 @@
     char ch = pBuffer[i];
     if (ch == ' ' || ch == '-' || ch == ',')
       continue;
-    dwHash = 31 * dwHash + FXSYS_tolower(ch);
+    dwHash = 31 * dwHash + FXSYS_toASCIIlower(ch);
   }
   return dwHash;
 }

diff --git a/core/fxge/dib/fx_dib_main.cpp b/core/fxge/dib/fx_dib_main.cpp
index 68e06a6..f0767f3 100644
--- a/core/fxge/dib/fx_dib_main.cpp
+++ b/core/fxge/dib/fx_dib_main.cpp

@@ -97,7 +97,7 @@
   int cc = 0;
   const wchar_t* str = wsValue.unterminated_c_str();
   int len = wsValue.GetLength();
-  while (FXSYS_iswspace(str[cc]) && cc < len)
+  while (FXSYS_iswASCIIspace(str[cc]) && cc < len)
     cc++;
 
   if (cc >= len)
@@ -112,7 +112,7 @@
   }
   if (cc < len && str[cc] == ',') {
     cc++;
-    while (FXSYS_iswspace(str[cc]) && cc < len)
+    while (FXSYS_iswASCIIspace(str[cc]) && cc < len)
       cc++;
 
     while (cc < len) {
@@ -124,7 +124,7 @@
     }
     if (cc < len && str[cc] == ',') {
       cc++;
-      while (FXSYS_iswspace(str[cc]) && cc < len)
+      while (FXSYS_iswASCIIspace(str[cc]) && cc < len)
         cc++;
 
       while (cc < len) {

diff --git a/fpdfsdk/pwl/cpwl_list_impl.cpp b/fpdfsdk/pwl/cpwl_list_impl.cpp
index da455d06..561ef1b 100644
--- a/fpdfsdk/pwl/cpwl_list_impl.cpp
+++ b/fpdfsdk/pwl/cpwl_list_impl.cpp

@@ -608,7 +608,8 @@
       nCircleIndex = 0;
 
     if (Item* pListItem = m_ListItems[nCircleIndex].get()) {
-      if (FXSYS_toupper(pListItem->GetFirstChar()) == FXSYS_toupper(nChar))
+      if (FXSYS_toASCIIupper(pListItem->GetFirstChar()) ==
+          FXSYS_toASCIIupper(nChar))
         return nCircleIndex;
     }
   }

diff --git a/fxjs/cfxjse_formcalc_context.cpp b/fxjs/cfxjse_formcalc_context.cpp
index 9cd3a73..14053c8 100644
--- a/fxjs/cfxjse_formcalc_context.cpp
+++ b/fxjs/cfxjse_formcalc_context.cpp

@@ -1892,7 +1892,7 @@
     iIndex += kSubSecondLength;
   }
 
-  if (iIndex < iLength && FXSYS_tolower(pData[iIndex]) == 'z')
+  if (iIndex < iLength && FXSYS_toASCIIlower(pData[iIndex]) == 'z')
     return true;
 
   int32_t iSign = 1;

diff --git a/fxjs/cjs_publicmethods.cpp b/fxjs/cjs_publicmethods.cpp
index 74428c3..d613f48 100644
--- a/fxjs/cjs_publicmethods.cpp
+++ b/fxjs/cjs_publicmethods.cpp

@@ -283,9 +283,9 @@
     case L'9':
       return !!std::iswdigit(c_Change);
     case L'A':
-      return FXSYS_iswalpha(c_Change);
+      return FXSYS_iswASCIIalpha(c_Change);
     case L'O':
-      return FXSYS_iswalnum(c_Change);
+      return FXSYS_iswASCIIalnum(c_Change);
     case L'X':
       return true;
     default:

diff --git a/fxjs/cjs_util.cpp b/fxjs/cjs_util.cpp
index 56bf413..d552fcd 100644
--- a/fxjs/cjs_util.cpp
+++ b/fxjs/cjs_util.cpp

@@ -264,9 +264,9 @@
 enum CaseMode { kPreserveCase, kUpperCase, kLowerCase };
 
 static wchar_t TranslateCase(wchar_t input, CaseMode eMode) {
-  if (eMode == kLowerCase && FXSYS_isupper(input))
+  if (eMode == kLowerCase && FXSYS_isASCIIupper(input))
     return input | 0x20;
-  if (eMode == kUpperCase && FXSYS_islower(input))
+  if (eMode == kUpperCase && FXSYS_isASCIIlower(input))
     return input & ~0x20;
   return input;
 }
@@ -311,7 +311,7 @@
       } break;
       case 'X': {
         if (iSourceIdx < wsSource.GetLength()) {
-          if (FXSYS_iswalnum(wsSource[iSourceIdx])) {
+          if (FXSYS_iswASCIIalnum(wsSource[iSourceIdx])) {
             wsResult += TranslateCase(wsSource[iSourceIdx], eCaseMode);
             ++iFormatIdx;
           }
@@ -322,7 +322,7 @@
       } break;
       case 'A': {
         if (iSourceIdx < wsSource.GetLength()) {
-          if (FXSYS_iswalpha(wsSource[iSourceIdx])) {
+          if (FXSYS_iswASCIIalpha(wsSource[iSourceIdx])) {
             wsResult += TranslateCase(wsSource[iSourceIdx], eCaseMode);
             ++iFormatIdx;
           }

diff --git a/xfa/fgas/crt/cfgas_formatstring.cpp b/xfa/fgas/crt/cfgas_formatstring.cpp
index d7273d0..e4185b8 100644
--- a/xfa/fgas/crt/cfgas_formatstring.cpp
+++ b/xfa/fgas/crt/cfgas_formatstring.cpp

@@ -1096,7 +1096,7 @@
         break;
       }
       case 'A':
-        if (FXSYS_iswalpha(pStrText[iText])) {
+        if (FXSYS_iswASCIIalpha(pStrText[iText])) {
           *wsValue += pStrText[iText];
           iText++;
         }
@@ -1110,7 +1110,7 @@
       case 'O':
       case '0':
         if (FXSYS_isDecimalDigit(pStrText[iText]) ||
-            FXSYS_iswalpha(pStrText[iText])) {
+            FXSYS_iswASCIIalpha(pStrText[iText])) {
           *wsValue += pStrText[iText];
           iText++;
         }
@@ -1815,7 +1815,7 @@
         break;
       }
       case 'A':
-        if (iText >= iLenText || !FXSYS_iswalpha(pStrText[iText]))
+        if (iText >= iLenText || !FXSYS_iswASCIIalpha(pStrText[iText]))
           return false;
 
         *wsOutput += pStrText[iText++];
@@ -1831,7 +1831,7 @@
       case 'O':
       case '0':
         if (iText >= iLenText || (!FXSYS_isDecimalDigit(pStrText[iText]) &&
-                                  !FXSYS_iswalpha(pStrText[iText]))) {
+                                  !FXSYS_iswASCIIalpha(pStrText[iText]))) {
           return false;
         }
         *wsOutput += pStrText[iText++];

diff --git a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
index 3559fb7..4dc8788 100644
--- a/xfa/fxfa/fm2js/cxfa_fmlexer.cpp
+++ b/xfa/fxfa/fm2js/cxfa_fmlexer.cpp

@@ -306,7 +306,7 @@
   wchar_t* end = nullptr;
   if (m_cursor)
     wcstod(const_cast<wchar_t*>(m_cursor), &end);
-  if (!end || FXSYS_iswalpha(*end)) {
+  if (!end || FXSYS_iswASCIIalpha(*end)) {
     RaiseError();
     return CXFA_FMToken();
   }

diff --git a/xfa/fxfa/parser/cxfa_localevalue.cpp b/xfa/fxfa/parser/cxfa_localevalue.cpp
index 8ef67e5..57d6fbb 100644
--- a/xfa/fxfa/parser/cxfa_localevalue.cpp
+++ b/xfa/fxfa/parser/cxfa_localevalue.cpp

@@ -202,7 +202,7 @@
     bool bExpSign = false;
     const wchar_t* str = m_wsValue.c_str();
     int len = m_wsValue.GetLength();
-    while (FXSYS_iswspace(str[cc]) && cc < len)
+    while (FXSYS_iswASCIIspace(str[cc]) && cc < len)
       cc++;
 
     if (cc >= len)
commit	69da36c5f841e8c6e5ded6c704d9ef58c57d532a	[log] [tgz]
author	Ryan Harrison <rharrison@chromium.org>	Fri Mar 09 17:46:50 2018 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	Fri Mar 09 17:46:50 2018 +0000
tree	42f50adb5d062f3971f8c6a4f899e737f5462c6c
parent	3baef5c6daf58cec2df193714b5727802d0bd42e [diff]