core/fpdftext/cpdf_textpagefind.cpp - pdfium - Git at Google

 // Copyright 2016 PDFium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

 #include "core/fpdftext/cpdf_textpagefind.h"

 #include <cwchar>
 #include <cwctype>
 #include <vector>

 #include "core/fpdftext/cpdf_textpage.h"
 #include "core/fxcrt/fx_string.h"
 #include "core/fxcrt/fx_system.h"
 #include "third_party/base/stl_util.h"

 namespace {

 bool IsIgnoreSpaceCharacter(wchar_t curChar) {
   if (curChar < 255 || (curChar >= 0x0600 && curChar <= 0x06FF) ||
       (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
       (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
       (curChar >= 0x0400 && curChar <= 0x04FF) ||
       (curChar >= 0x0500 && curChar <= 0x052F) ||
       (curChar >= 0xA640 && curChar <= 0xA69F) ||
       (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
       (curChar >= 0x2000 && curChar <= 0x206F)) {
     return false;
   }
   return true;
 }

 }  // namespace

 CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
     : m_pTextPage(pTextPage),
       m_flags(0),
       m_bMatchCase(false),
       m_bMatchWholeWord(false),
       m_resStart(0),
       m_resEnd(-1),
       m_IsFind(false) {
   m_strText = m_pTextPage->GetAllPageText();
   int nCount = pTextPage->CountChars();
   if (nCount)
     m_CharIndex.push_back(0);
   for (int i = 0; i < nCount; i++) {
     FPDF_CHAR_INFO info;
     pTextPage->GetCharInfo(i, &info);
     int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
     if (info.m_Flag == FPDFTEXT_CHAR_NORMAL ||
         info.m_Flag == FPDFTEXT_CHAR_GENERATED) {
       if (indexSize % 2) {
         m_CharIndex.push_back(1);
       } else {
         if (indexSize <= 0)
           continue;
         m_CharIndex[indexSize - 1] += 1;
       }
     } else {
       if (indexSize % 2) {
         if (indexSize <= 0)
           continue;
         m_CharIndex[indexSize - 1] = i + 1;
       } else {
         m_CharIndex.push_back(i + 1);
       }
     }
   }
   int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
   if (indexSize % 2)
     m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
 }

 CPDF_TextPageFind::~CPDF_TextPageFind() {}

 int CPDF_TextPageFind::GetCharIndex(int index) const {
   return m_pTextPage->CharIndexFromTextIndex(index);
 }

 bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
                                   int flags,
                                   Optional<size_t> startPos) {
   if (!m_pTextPage)
     return false;
   if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
     m_strText = m_pTextPage->GetAllPageText();
   WideString findwhatStr = findwhat;
   m_findWhat = findwhatStr;
   m_flags = flags;
   m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
   if (m_strText.IsEmpty()) {
     m_IsFind = false;
     return true;
   }
   size_t len = findwhatStr.GetLength();
   if (!m_bMatchCase) {
     findwhatStr.MakeLower();
     m_strText.MakeLower();
   }
   m_bMatchWholeWord = !!(flags & FPDFTEXT_MATCHWHOLEWORD);
   m_findNextStart = startPos;
   if (!startPos.has_value()) {
     if (!m_strText.IsEmpty())
       m_findPreStart = m_strText.GetLength() - 1;
   } else {
     m_findPreStart = startPos;
   }

   m_csFindWhatArray.clear();
   size_t i = 0;
   for (i = 0; i < len; ++i)
     if (findwhatStr[i] != ' ')
       break;
   if (i < len)
     ExtractFindWhat(findwhatStr);
   else
     m_csFindWhatArray.push_back(findwhatStr);
   if (m_csFindWhatArray.empty())
     return false;

   m_IsFind = true;
   m_resStart = 0;
   m_resEnd = -1;
   return true;
 }

 bool CPDF_TextPageFind::FindNext() {
   if (!m_pTextPage)
     return false;
   m_resArray.clear();
   if (!m_findNextStart.has_value())
     return false;
   if (m_strText.IsEmpty()) {
     m_IsFind = false;
     return m_IsFind;
   }
   size_t strLen = m_strText.GetLength();
   if (m_findNextStart.value() > strLen - 1) {
     m_IsFind = false;
     return m_IsFind;
   }
   int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
   Optional<size_t> nResultPos = 0;
   size_t nStartPos = m_findNextStart.value();
   bool bSpaceStart = false;
   for (int iWord = 0; iWord < nCount; iWord++) {
     WideString csWord = m_csFindWhatArray[iWord];
     if (csWord.IsEmpty()) {
       if (iWord == nCount - 1) {
         wchar_t strInsert = m_strText[nStartPos];
         if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR ||
             strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
           nResultPos = nStartPos + 1;
           break;
         }
         iWord = -1;
       } else if (iWord == 0) {
         bSpaceStart = true;
       }
       continue;
     }
     nResultPos = m_strText.Find(csWord.AsStringView(), nStartPos);
     if (!nResultPos.has_value()) {
       m_IsFind = false;
       return m_IsFind;
     }
     size_t endIndex = nResultPos.value() + csWord.GetLength() - 1;
     if (iWord == 0)
       m_resStart = nResultPos.value();
     bool bMatch = true;
     if (iWord != 0 && !bSpaceStart) {
       size_t PreResEndPos = nStartPos;
       int curChar = csWord[0];
       WideString lastWord = m_csFindWhatArray[iWord - 1];
       int lastChar = lastWord[lastWord.GetLength() - 1];
       if (nStartPos == nResultPos.value() &&
           !(IsIgnoreSpaceCharacter(lastChar) ||
             IsIgnoreSpaceCharacter(curChar))) {
         bMatch = false;
       }
       for (size_t d = PreResEndPos; d < nResultPos.value(); d++) {
         wchar_t strInsert = m_strText[d];
         if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
             strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
           bMatch = false;
           break;
         }
       }
     } else if (bSpaceStart) {
       if (nResultPos.value() > 0) {
         wchar_t strInsert = m_strText[nResultPos.value() - 1];
         if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
             strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
           bMatch = false;
           m_resStart = nResultPos.value();
         } else {
           m_resStart = nResultPos.value() - 1;
         }
       }
     }
     if (m_bMatchWholeWord && bMatch) {
       bMatch = IsMatchWholeWord(m_strText, nResultPos.value(), endIndex);
     }
     nStartPos = endIndex + 1;
     if (!bMatch) {
       iWord = -1;
       if (bSpaceStart)
         nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
       else
         nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
     }
   }
   m_resEnd = nResultPos.value() + m_csFindWhatArray.back().GetLength() - 1;
   m_IsFind = true;
   int resStart = GetCharIndex(m_resStart);
   int resEnd = GetCharIndex(m_resEnd);
   m_resArray = m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1);
   if (m_flags & FPDFTEXT_CONSECUTIVE) {
     m_findNextStart = m_resStart + 1;
     m_findPreStart = m_resEnd - 1;
   } else {
     m_findNextStart = m_resEnd + 1;
     m_findPreStart = m_resStart - 1;
   }
   return m_IsFind;
 }

 bool CPDF_TextPageFind::FindPrev() {
   if (!m_pTextPage)
     return false;
   m_resArray.clear();
   if (m_strText.IsEmpty() || !m_findPreStart.has_value()) {
     m_IsFind = false;
     return m_IsFind;
   }
   CPDF_TextPageFind findEngine(m_pTextPage.Get());
   bool ret = findEngine.FindFirst(m_findWhat, m_flags, Optional<size_t>(0));
   if (!ret) {
     m_IsFind = false;
     return m_IsFind;
   }
   int order = -1;
   int MatchedCount = 0;
   while (ret) {
     ret = findEngine.FindNext();
     if (ret) {
       int order1 = findEngine.GetCurOrder();
       int MatchedCount1 = findEngine.GetMatchedCount();
       int temp = order1 + MatchedCount1;
       if (temp < 0 || static_cast<size_t>(temp) > m_findPreStart.value() + 1)
         break;
       order = order1;
       MatchedCount = MatchedCount1;
     }
   }
   if (order == -1) {
     m_IsFind = false;
     return m_IsFind;
   }
   m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
   m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
   m_IsFind = true;
   m_resArray = m_pTextPage->GetRectArray(order, MatchedCount);
   if (m_flags & FPDFTEXT_CONSECUTIVE) {
     m_findNextStart = m_resStart + 1;
     m_findPreStart = m_resEnd - 1;
   } else {
     m_findNextStart = m_resEnd + 1;
     m_findPreStart = m_resStart - 1;
   }
   return m_IsFind;
 }

 void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) {
   if (findwhat.IsEmpty())
     return;
   int index = 0;
   while (1) {
     Optional<WideString> word =
         ExtractSubString(findwhat.c_str(), index, TEXT_SPACE_CHAR);
     if (!word)
       break;

     if (word->IsEmpty()) {
       m_csFindWhatArray.push_back(L"");
       index++;
       continue;
     }

     size_t pos = 0;
     while (pos < word->GetLength()) {
       WideString curStr = word->Mid(pos, 1);
       wchar_t curChar = (*word)[pos];
       if (IsIgnoreSpaceCharacter(curChar)) {
         if (pos > 0 && curChar == 0x2019) {
           pos++;
           continue;
         }
         if (pos > 0)
           m_csFindWhatArray.push_back(word->Left(pos));
         m_csFindWhatArray.push_back(curStr);
         if (pos == word->GetLength() - 1) {
           word->clear();
           break;
         }
         word.emplace(word->Right(word->GetLength() - pos - 1));
         pos = 0;
         continue;
       }
       pos++;
     }

     if (!word->IsEmpty())
       m_csFindWhatArray.push_back(word.value());
     index++;
   }
 }

 bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText,
                                          size_t startPos,
                                          size_t endPos) {
   if (startPos > endPos)
     return false;
   wchar_t char_left = 0;
   wchar_t char_right = 0;
   size_t char_count = endPos - startPos + 1;
   if (char_count == 0)
     return false;
   if (char_count == 1 && csPageText[startPos] > 255)
     return true;
   if (startPos >= 1)
     char_left = csPageText[startPos - 1];
   if (startPos + char_count < csPageText.GetLength())
     char_right = csPageText[startPos + char_count];
   if ((char_left > 'A' && char_left < 'a') ||
       (char_left > 'a' && char_left < 'z') ||
       (char_left > 0xfb00 && char_left < 0xfb06) || std::iswdigit(char_left) ||
       (char_right > 'A' && char_right < 'a') ||
       (char_right > 'a' && char_right < 'z') ||
       (char_right > 0xfb00 && char_right < 0xfb06) ||
       std::iswdigit(char_right)) {
     return false;
   }
   if (!(('A' > char_left || char_left > 'Z') &&
         ('a' > char_left || char_left > 'z') &&
         ('A' > char_right || char_right > 'Z') &&
         ('a' > char_right || char_right > 'z'))) {
     return false;
   }
   if (char_count > 0) {
     if (std::iswdigit(char_left) && std::iswdigit(csPageText[startPos]))
       return false;
     if (std::iswdigit(char_right) && std::iswdigit(csPageText[endPos]))
       return false;
   }
   return true;
 }

 Optional<WideString> CPDF_TextPageFind::ExtractSubString(
     const wchar_t* lpszFullString,
     int iSubString,
     wchar_t chSep) {
   if (!lpszFullString)
     return {};

   while (iSubString--) {
     lpszFullString = std::wcschr(lpszFullString, chSep);
     if (!lpszFullString)
       return {};

     lpszFullString++;
     while (*lpszFullString == chSep)
       lpszFullString++;
   }

   const wchar_t* lpchEnd = std::wcschr(lpszFullString, chSep);
   int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString)
                      : static_cast<int>(wcslen(lpszFullString));
   if (nLen < 0)
     return {};

   return {WideString(lpszFullString, static_cast<size_t>(nLen))};
 }

 int CPDF_TextPageFind::GetCurOrder() const {
   return GetCharIndex(m_resStart);
 }

 int CPDF_TextPageFind::GetMatchedCount() const {
   int resStart = GetCharIndex(m_resStart);
   int resEnd = GetCharIndex(m_resEnd);
   return resEnd - resStart + 1;
 }
	// Copyright 2016 PDFium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

	#include "core/fpdftext/cpdf_textpagefind.h"

	#include <cwchar>
	#include <cwctype>
	#include <vector>

	#include "core/fpdftext/cpdf_textpage.h"
	#include "core/fxcrt/fx_string.h"
	#include "core/fxcrt/fx_system.h"
	#include "third_party/base/stl_util.h"

	namespace {

	bool IsIgnoreSpaceCharacter(wchar_t curChar) {
	if (curChar < 255 \|\| (curChar >= 0x0600 && curChar <= 0x06FF) \|\|
	(curChar >= 0xFE70 && curChar <= 0xFEFF) \|\|
	(curChar >= 0xFB50 && curChar <= 0xFDFF) \|\|
	(curChar >= 0x0400 && curChar <= 0x04FF) \|\|
	(curChar >= 0x0500 && curChar <= 0x052F) \|\|
	(curChar >= 0xA640 && curChar <= 0xA69F) \|\|
	(curChar >= 0x2DE0 && curChar <= 0x2DFF) \|\| curChar == 8467 \|\|
	(curChar >= 0x2000 && curChar <= 0x206F)) {
	return false;
	}
	return true;
	}

	} // namespace

	CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
	: m_pTextPage(pTextPage),
	m_flags(0),
	m_bMatchCase(false),
	m_bMatchWholeWord(false),
	m_resStart(0),
	m_resEnd(-1),
	m_IsFind(false) {
	m_strText = m_pTextPage->GetAllPageText();
	int nCount = pTextPage->CountChars();
	if (nCount)
	m_CharIndex.push_back(0);
	for (int i = 0; i < nCount; i++) {
	FPDF_CHAR_INFO info;
	pTextPage->GetCharInfo(i, &info);
	int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
	if (info.m_Flag == FPDFTEXT_CHAR_NORMAL \|\|
	info.m_Flag == FPDFTEXT_CHAR_GENERATED) {
	if (indexSize % 2) {
	m_CharIndex.push_back(1);
	} else {
	if (indexSize <= 0)
	continue;
	m_CharIndex[indexSize - 1] += 1;
	}
	} else {
	if (indexSize % 2) {
	if (indexSize <= 0)
	continue;
	m_CharIndex[indexSize - 1] = i + 1;
	} else {
	m_CharIndex.push_back(i + 1);
	}
	}
	}
	int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
	if (indexSize % 2)
	m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
	}

	CPDF_TextPageFind::~CPDF_TextPageFind() {}

	int CPDF_TextPageFind::GetCharIndex(int index) const {
	return m_pTextPage->CharIndexFromTextIndex(index);
	}

	bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
	int flags,
	Optional<size_t> startPos) {
	if (!m_pTextPage)
	return false;
	if (m_strText.IsEmpty() \|\| m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
	m_strText = m_pTextPage->GetAllPageText();
	WideString findwhatStr = findwhat;
	m_findWhat = findwhatStr;
	m_flags = flags;
	m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
	if (m_strText.IsEmpty()) {
	m_IsFind = false;
	return true;
	}
	size_t len = findwhatStr.GetLength();
	if (!m_bMatchCase) {
	findwhatStr.MakeLower();
	m_strText.MakeLower();
	}
	m_bMatchWholeWord = !!(flags & FPDFTEXT_MATCHWHOLEWORD);
	m_findNextStart = startPos;
	if (!startPos.has_value()) {
	if (!m_strText.IsEmpty())
	m_findPreStart = m_strText.GetLength() - 1;
	} else {
	m_findPreStart = startPos;
	}

	m_csFindWhatArray.clear();
	size_t i = 0;
	for (i = 0; i < len; ++i)
	if (findwhatStr[i] != ' ')
	break;
	if (i < len)
	ExtractFindWhat(findwhatStr);
	else
	m_csFindWhatArray.push_back(findwhatStr);
	if (m_csFindWhatArray.empty())
	return false;

	m_IsFind = true;
	m_resStart = 0;
	m_resEnd = -1;
	return true;
	}

	bool CPDF_TextPageFind::FindNext() {
	if (!m_pTextPage)
	return false;
	m_resArray.clear();
	if (!m_findNextStart.has_value())
	return false;
	if (m_strText.IsEmpty()) {
	m_IsFind = false;
	return m_IsFind;
	}
	size_t strLen = m_strText.GetLength();
	if (m_findNextStart.value() > strLen - 1) {
	m_IsFind = false;
	return m_IsFind;
	}
	int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
	Optional<size_t> nResultPos = 0;
	size_t nStartPos = m_findNextStart.value();
	bool bSpaceStart = false;
	for (int iWord = 0; iWord < nCount; iWord++) {
	WideString csWord = m_csFindWhatArray[iWord];
	if (csWord.IsEmpty()) {
	if (iWord == nCount - 1) {
	wchar_t strInsert = m_strText[nStartPos];
	if (strInsert == TEXT_LINEFEED_CHAR \|\| strInsert == TEXT_SPACE_CHAR \|\|
	strInsert == TEXT_RETURN_CHAR \|\| strInsert == 160) {
	nResultPos = nStartPos + 1;
	break;
	}
	iWord = -1;
	} else if (iWord == 0) {
	bSpaceStart = true;
	}
	continue;
	}
	nResultPos = m_strText.Find(csWord.AsStringView(), nStartPos);
	if (!nResultPos.has_value()) {
	m_IsFind = false;
	return m_IsFind;
	}
	size_t endIndex = nResultPos.value() + csWord.GetLength() - 1;
	if (iWord == 0)
	m_resStart = nResultPos.value();
	bool bMatch = true;
	if (iWord != 0 && !bSpaceStart) {
	size_t PreResEndPos = nStartPos;
	int curChar = csWord[0];
	WideString lastWord = m_csFindWhatArray[iWord - 1];
	int lastChar = lastWord[lastWord.GetLength() - 1];
	if (nStartPos == nResultPos.value() &&
	!(IsIgnoreSpaceCharacter(lastChar) \|\|
	IsIgnoreSpaceCharacter(curChar))) {
	bMatch = false;
	}
	for (size_t d = PreResEndPos; d < nResultPos.value(); d++) {
	wchar_t strInsert = m_strText[d];
	if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
	strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
	bMatch = false;
	break;
	}
	}
	} else if (bSpaceStart) {
	if (nResultPos.value() > 0) {
	wchar_t strInsert = m_strText[nResultPos.value() - 1];
	if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
	strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
	bMatch = false;
	m_resStart = nResultPos.value();
	} else {
	m_resStart = nResultPos.value() - 1;
	}
	}
	}
	if (m_bMatchWholeWord && bMatch) {
	bMatch = IsMatchWholeWord(m_strText, nResultPos.value(), endIndex);
	}
	nStartPos = endIndex + 1;
	if (!bMatch) {
	iWord = -1;
	if (bSpaceStart)
	nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
	else
	nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
	}
	}
	m_resEnd = nResultPos.value() + m_csFindWhatArray.back().GetLength() - 1;
	m_IsFind = true;
	int resStart = GetCharIndex(m_resStart);
	int resEnd = GetCharIndex(m_resEnd);
	m_resArray = m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1);
	if (m_flags & FPDFTEXT_CONSECUTIVE) {
	m_findNextStart = m_resStart + 1;
	m_findPreStart = m_resEnd - 1;
	} else {
	m_findNextStart = m_resEnd + 1;
	m_findPreStart = m_resStart - 1;
	}
	return m_IsFind;
	}

	bool CPDF_TextPageFind::FindPrev() {
	if (!m_pTextPage)
	return false;
	m_resArray.clear();
	if (m_strText.IsEmpty() \|\| !m_findPreStart.has_value()) {
	m_IsFind = false;
	return m_IsFind;
	}
	CPDF_TextPageFind findEngine(m_pTextPage.Get());
	bool ret = findEngine.FindFirst(m_findWhat, m_flags, Optional<size_t>(0));
	if (!ret) {
	m_IsFind = false;
	return m_IsFind;
	}
	int order = -1;
	int MatchedCount = 0;
	while (ret) {
	ret = findEngine.FindNext();
	if (ret) {
	int order1 = findEngine.GetCurOrder();
	int MatchedCount1 = findEngine.GetMatchedCount();
	int temp = order1 + MatchedCount1;
	if (temp < 0 \|\| static_cast<size_t>(temp) > m_findPreStart.value() + 1)
	break;
	order = order1;
	MatchedCount = MatchedCount1;
	}
	}
	if (order == -1) {
	m_IsFind = false;
	return m_IsFind;
	}
	m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
	m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
	m_IsFind = true;
	m_resArray = m_pTextPage->GetRectArray(order, MatchedCount);
	if (m_flags & FPDFTEXT_CONSECUTIVE) {
	m_findNextStart = m_resStart + 1;
	m_findPreStart = m_resEnd - 1;
	} else {
	m_findNextStart = m_resEnd + 1;
	m_findPreStart = m_resStart - 1;
	}
	return m_IsFind;
	}

	void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) {
	if (findwhat.IsEmpty())
	return;
	int index = 0;
	while (1) {
	Optional<WideString> word =
	ExtractSubString(findwhat.c_str(), index, TEXT_SPACE_CHAR);
	if (!word)
	break;

	if (word->IsEmpty()) {
	m_csFindWhatArray.push_back(L"");
	index++;
	continue;
	}

	size_t pos = 0;
	while (pos < word->GetLength()) {
	WideString curStr = word->Mid(pos, 1);
	wchar_t curChar = (*word)[pos];
	if (IsIgnoreSpaceCharacter(curChar)) {
	if (pos > 0 && curChar == 0x2019) {
	pos++;
	continue;
	}
	if (pos > 0)
	m_csFindWhatArray.push_back(word->Left(pos));
	m_csFindWhatArray.push_back(curStr);
	if (pos == word->GetLength() - 1) {
	word->clear();
	break;
	}
	word.emplace(word->Right(word->GetLength() - pos - 1));
	pos = 0;
	continue;
	}
	pos++;
	}

	if (!word->IsEmpty())
	m_csFindWhatArray.push_back(word.value());
	index++;
	}
	}

	bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText,
	size_t startPos,
	size_t endPos) {
	if (startPos > endPos)
	return false;
	wchar_t char_left = 0;
	wchar_t char_right = 0;
	size_t char_count = endPos - startPos + 1;
	if (char_count == 0)
	return false;
	if (char_count == 1 && csPageText[startPos] > 255)
	return true;
	if (startPos >= 1)
	char_left = csPageText[startPos - 1];
	if (startPos + char_count < csPageText.GetLength())
	char_right = csPageText[startPos + char_count];
	if ((char_left > 'A' && char_left < 'a') \|\|
	(char_left > 'a' && char_left < 'z') \|\|
	(char_left > 0xfb00 && char_left < 0xfb06) \|\| std::iswdigit(char_left) \|\|
	(char_right > 'A' && char_right < 'a') \|\|
	(char_right > 'a' && char_right < 'z') \|\|
	(char_right > 0xfb00 && char_right < 0xfb06) \|\|
	std::iswdigit(char_right)) {
	return false;
	}
	if (!(('A' > char_left \|\| char_left > 'Z') &&
	('a' > char_left \|\| char_left > 'z') &&
	('A' > char_right \|\| char_right > 'Z') &&
	('a' > char_right \|\| char_right > 'z'))) {
	return false;
	}
	if (char_count > 0) {
	if (std::iswdigit(char_left) && std::iswdigit(csPageText[startPos]))
	return false;
	if (std::iswdigit(char_right) && std::iswdigit(csPageText[endPos]))
	return false;
	}
	return true;
	}

	Optional<WideString> CPDF_TextPageFind::ExtractSubString(
	const wchar_t* lpszFullString,
	int iSubString,
	wchar_t chSep) {
	if (!lpszFullString)
	return {};

	while (iSubString--) {
	lpszFullString = std::wcschr(lpszFullString, chSep);
	if (!lpszFullString)
	return {};

	lpszFullString++;
	while (*lpszFullString == chSep)
	lpszFullString++;
	}

	const wchar_t* lpchEnd = std::wcschr(lpszFullString, chSep);
	int nLen = lpchEnd ? static_cast<int>(lpchEnd - lpszFullString)
	: static_cast<int>(wcslen(lpszFullString));
	if (nLen < 0)
	return {};

	return {WideString(lpszFullString, static_cast<size_t>(nLen))};
	}

	int CPDF_TextPageFind::GetCurOrder() const {
	return GetCharIndex(m_resStart);
	}

	int CPDF_TextPageFind::GetMatchedCount() const {
	int resStart = GetCharIndex(m_resStart);
	int resEnd = GetCharIndex(m_resEnd);
	return resEnd - resStart + 1;
	}