| // Copyright 2014 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "xfa/src/fee/fx_wordbreak/fx_wordbreak_impl.h" |
| |
| FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) { |
| FX_DWORD dwProperty = |
| (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1]; |
| return (FX_WordBreakProp)(((wcCodePoint) & 1) ? (dwProperty & 0x0F) |
| : (dwProperty >> 4)); |
| } |
| CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText) |
| : m_wsText(wsText), m_nIndex(0) { |
| FXSYS_assert(!wsText.IsEmpty()); |
| } |
| CFX_CharIter::~CFX_CharIter() {} |
| void CFX_CharIter::Release() { |
| delete this; |
| } |
| FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) { |
| if (bPrev) { |
| if (m_nIndex <= 0) { |
| return FALSE; |
| } |
| m_nIndex--; |
| } else { |
| if (m_nIndex + 1 >= m_wsText.GetLength()) { |
| return FALSE; |
| } |
| m_nIndex++; |
| } |
| return TRUE; |
| } |
| FX_WCHAR CFX_CharIter::GetChar() { |
| return m_wsText.GetAt(m_nIndex); |
| } |
| void CFX_CharIter::SetAt(int32_t nIndex) { |
| if (nIndex < 0 || nIndex >= m_wsText.GetLength()) { |
| return; |
| } |
| m_nIndex = nIndex; |
| } |
| int32_t CFX_CharIter::GetAt() const { |
| return m_nIndex; |
| } |
| FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const { |
| return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0); |
| } |
| IFX_CharIter* CFX_CharIter::Clone() { |
| CFX_CharIter* pIter = new CFX_CharIter(m_wsText); |
| pIter->m_nIndex = m_nIndex; |
| return pIter; |
| } |
| CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {} |
| CFX_WordBreak::~CFX_WordBreak() { |
| if (m_pPreIter) { |
| m_pPreIter->Release(); |
| m_pPreIter = NULL; |
| } |
| if (m_pCurIter) { |
| m_pCurIter->Release(); |
| m_pCurIter = NULL; |
| } |
| } |
| void CFX_WordBreak::Release() { |
| delete this; |
| } |
| void CFX_WordBreak::Attach(IFX_CharIter* pIter) { |
| FXSYS_assert(pIter); |
| m_pCurIter = pIter; |
| } |
| void CFX_WordBreak::Attach(const CFX_WideString& wsText) { |
| m_pCurIter = new CFX_CharIter(wsText); |
| } |
| FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) { |
| IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone(); |
| if (pIter->IsEOF(!bPrev)) { |
| return FALSE; |
| } |
| pIter->Next(bPrev); |
| if (!FindNextBreakPos(pIter, bPrev, TRUE)) { |
| pIter->Release(); |
| return FALSE; |
| } |
| if (bPrev) { |
| m_pCurIter->Release(); |
| m_pCurIter = m_pPreIter; |
| m_pCurIter->Next(TRUE); |
| m_pPreIter = pIter; |
| } else { |
| m_pPreIter->Release(); |
| m_pPreIter = m_pCurIter; |
| m_pPreIter->Next(); |
| m_pCurIter = pIter; |
| } |
| return TRUE; |
| } |
| void CFX_WordBreak::SetAt(int32_t nIndex) { |
| if (m_pPreIter) { |
| m_pPreIter->Release(); |
| m_pPreIter = NULL; |
| } |
| m_pCurIter->SetAt(nIndex); |
| FindNextBreakPos(m_pCurIter, TRUE, FALSE); |
| m_pPreIter = m_pCurIter; |
| m_pCurIter = m_pPreIter->Clone(); |
| FindNextBreakPos(m_pCurIter, FALSE, FALSE); |
| } |
| int32_t CFX_WordBreak::GetWordPos() const { |
| return m_pPreIter->GetAt(); |
| } |
| int32_t CFX_WordBreak::GetWordLength() const { |
| return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1; |
| } |
| void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const { |
| int32_t nWordLength = GetWordLength(); |
| if (nWordLength <= 0) { |
| return; |
| } |
| FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength); |
| IFX_CharIter* pTempIter = m_pPreIter->Clone(); |
| int32_t i = 0; |
| while (pTempIter->GetAt() <= m_pCurIter->GetAt()) { |
| lpBuf[i++] = pTempIter->GetChar(); |
| FX_BOOL bEnd = pTempIter->Next(); |
| if (!bEnd) { |
| break; |
| } |
| } |
| pTempIter->Release(); |
| wsWord.ReleaseBuffer(nWordLength); |
| } |
| FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const { |
| return m_pCurIter->IsEOF(bTail); |
| } |
| FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter, |
| FX_BOOL bPrev, |
| FX_BOOL bFromNext) { |
| FX_WordBreakProp ePreType = FX_WordBreakProp_None; |
| FX_WordBreakProp eCurType = FX_WordBreakProp_None; |
| FX_WordBreakProp eNextType = FX_WordBreakProp_None; |
| if (pIter->IsEOF(!bPrev)) { |
| return TRUE; |
| } |
| if (!(bFromNext || pIter->IsEOF(bPrev))) { |
| pIter->Next(!bPrev); |
| FX_WCHAR wcTemp = pIter->GetChar(); |
| ePreType = FX_GetWordBreakProperty(wcTemp); |
| pIter->Next(bPrev); |
| } |
| FX_WCHAR wcTemp = pIter->GetChar(); |
| eCurType = FX_GetWordBreakProperty(wcTemp); |
| FX_BOOL bFirst = TRUE; |
| do { |
| pIter->Next(bPrev); |
| FX_WCHAR wcTemp = pIter->GetChar(); |
| eNextType = FX_GetWordBreakProperty(wcTemp); |
| FX_WORD wBreak = |
| gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType)); |
| if (wBreak) { |
| if (pIter->IsEOF(!bPrev)) { |
| pIter->Next(!bPrev); |
| return TRUE; |
| } |
| if (bFirst) { |
| int32_t nFlags = 0; |
| if (eCurType == FX_WordBreakProp_MidLetter) { |
| if (eNextType == FX_WordBreakProp_ALetter) { |
| nFlags = 1; |
| } |
| } else if (eCurType == FX_WordBreakProp_MidNum) { |
| if (eNextType == FX_WordBreakProp_Numberic) { |
| nFlags = 2; |
| } |
| } else if (eCurType == FX_WordBreakProp_MidNumLet) { |
| if (eNextType == FX_WordBreakProp_ALetter) { |
| nFlags = 1; |
| } else if (eNextType == FX_WordBreakProp_Numberic) { |
| nFlags = 2; |
| } |
| } |
| if (nFlags > 0) { |
| FXSYS_assert(nFlags <= 2); |
| if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) || |
| (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) { |
| pIter->Next(!bPrev); |
| return TRUE; |
| } |
| pIter->Next(bPrev); |
| wBreak = FALSE; |
| } |
| bFirst = FALSE; |
| } |
| if (wBreak) { |
| int32_t nFlags = 0; |
| if (eNextType == FX_WordBreakProp_MidLetter) { |
| if (eCurType == FX_WordBreakProp_ALetter) { |
| nFlags = 1; |
| } |
| } else if (eNextType == FX_WordBreakProp_MidNum) { |
| if (eCurType == FX_WordBreakProp_Numberic) { |
| nFlags = 2; |
| } |
| } else if (eNextType == FX_WordBreakProp_MidNumLet) { |
| if (eCurType == FX_WordBreakProp_ALetter) { |
| nFlags = 1; |
| } else if (eCurType == FX_WordBreakProp_Numberic) { |
| nFlags = 2; |
| } |
| } |
| if (nFlags <= 0) { |
| pIter->Next(!bPrev); |
| return TRUE; |
| } |
| FXSYS_assert(nFlags <= 2); |
| pIter->Next(bPrev); |
| wcTemp = pIter->GetChar(); |
| eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp); |
| if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) || |
| (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) { |
| pIter->Next(!bPrev); |
| pIter->Next(!bPrev); |
| return TRUE; |
| } |
| } |
| } |
| ePreType = eCurType; |
| eCurType = eNextType; |
| bFirst = FALSE; |
| } while (!pIter->IsEOF(!bPrev)); |
| return TRUE; |
| } |
| IFX_WordBreak* FX_WordBreak_Create() { |
| return new CFX_WordBreak; |
| } |