blob: 9b44f95d1317f7a2eabd7f9785f8d080cbefa320 [file] [log] [blame]
// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_seekablestreamproxy.h"
#if _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN64_
#include <io.h>
#endif
#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#include "core/fxcrt/cfx_memorystream.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"
namespace {
// Returns {src bytes consumed, dst bytes produced}.
std::pair<FX_STRSIZE, FX_STRSIZE> UTF8Decode(const char* pSrc,
FX_STRSIZE srcLen,
wchar_t* pDst,
FX_STRSIZE dstLen) {
ASSERT(pDst && dstLen > 0);
if (srcLen < 1)
return {0, 0};
uint32_t dwCode = 0;
int32_t iPending = 0;
FX_STRSIZE iSrcNum = 0;
FX_STRSIZE iDstNum = 0;
FX_STRSIZE iIndex = 0;
int32_t k = 1;
while (iIndex < srcLen) {
uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
if (byte < 0x80) {
iPending = 0;
k = 1;
iDstNum++;
iSrcNum += k;
*pDst++ = byte;
if (iDstNum >= dstLen)
break;
} else if (byte < 0xc0) {
if (iPending < 1)
break;
iPending--;
dwCode |= (byte & 0x3f) << (iPending * 6);
if (iPending == 0) {
iDstNum++;
iSrcNum += k;
*pDst++ = dwCode;
if (iDstNum >= dstLen)
break;
}
} else if (byte < 0xe0) {
iPending = 1;
k = 2;
dwCode = (byte & 0x1f) << 6;
} else if (byte < 0xf0) {
iPending = 2;
k = 3;
dwCode = (byte & 0x0f) << 12;
} else if (byte < 0xf8) {
iPending = 3;
k = 4;
dwCode = (byte & 0x07) << 18;
} else if (byte < 0xfc) {
iPending = 4;
k = 5;
dwCode = (byte & 0x03) << 24;
} else if (byte < 0xfe) {
iPending = 5;
k = 6;
dwCode = (byte & 0x01) << 30;
} else {
break;
}
iIndex++;
}
return {iSrcNum, iDstNum};
}
void UTF16ToWChar(void* pBuffer, FX_STRSIZE iLength) {
ASSERT(pBuffer && iLength > 0);
if (sizeof(wchar_t) == 2)
return;
uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
while (--iLength >= 0)
pDst[iLength] = static_cast<wchar_t>(pSrc[iLength]);
}
void SwapByteOrder(wchar_t* pStr, FX_STRSIZE iLength) {
ASSERT(pStr);
if (iLength < 0)
iLength = FXSYS_wcslen(pStr);
uint16_t wch;
if (sizeof(wchar_t) > 2) {
while (iLength-- > 0) {
wch = static_cast<uint16_t>(*pStr);
wch = (wch >> 8) | (wch << 8);
wch &= 0x00FF;
*pStr = wch;
++pStr;
}
return;
}
while (iLength-- > 0) {
wch = static_cast<uint16_t>(*pStr);
wch = (wch >> 8) | (wch << 8);
*pStr = wch;
++pStr;
}
}
} // namespace
#if _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_
#define BOM_MASK 0x00FFFFFF
#define BOM_UTF8 0x00BFBBEF
#define BOM_UTF16_MASK 0x0000FFFF
#define BOM_UTF16_BE 0x0000FFFE
#define BOM_UTF16_LE 0x0000FEFF
#else
#define BOM_MASK 0xFFFFFF00
#define BOM_UTF8 0xEFBBBF00
#define BOM_UTF16_MASK 0xFFFF0000
#define BOM_UTF16_BE 0xFEFF0000
#define BOM_UTF16_LE 0xFFFE0000
#endif // _FX_ENDIAN_ == _FX_LITTLE_ENDIAN_
CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
const CFX_RetainPtr<IFX_SeekableStream>& stream,
bool isWriteStream)
: m_IsWriteStream(isWriteStream),
m_wCodePage(FX_CODEPAGE_DefANSI),
m_wBOMLength(0),
m_iPosition(0),
m_pStream(stream) {
ASSERT(m_pStream);
if (isWriteStream) {
m_iPosition = m_pStream->GetSize();
return;
}
FX_FILESIZE iPosition = GetPosition();
Seek(CFX_SeekableStreamProxy::Pos::Begin, 0);
uint32_t bom = 0;
ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
bom &= BOM_MASK;
if (bom == BOM_UTF8) {
m_wBOMLength = 3;
m_wCodePage = FX_CODEPAGE_UTF8;
} else {
bom &= BOM_UTF16_MASK;
if (bom == BOM_UTF16_BE) {
m_wBOMLength = 2;
m_wCodePage = FX_CODEPAGE_UTF16BE;
} else if (bom == BOM_UTF16_LE) {
m_wBOMLength = 2;
m_wCodePage = FX_CODEPAGE_UTF16LE;
} else {
m_wBOMLength = 0;
m_wCodePage = FXSYS_GetACP();
}
}
Seek(CFX_SeekableStreamProxy::Pos::Begin,
std::max(static_cast<FX_FILESIZE>(m_wBOMLength), iPosition));
}
CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size)
: CFX_SeekableStreamProxy(
pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
false) {}
CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
void CFX_SeekableStreamProxy::Seek(CFX_SeekableStreamProxy::Pos eSeek,
FX_FILESIZE iOffset) {
switch (eSeek) {
case CFX_SeekableStreamProxy::Pos::Begin:
m_iPosition = iOffset;
break;
case CFX_SeekableStreamProxy::Pos::Current:
m_iPosition += iOffset;
break;
}
m_iPosition =
pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
}
void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
if (m_wBOMLength > 0)
return;
m_wCodePage = wCodePage;
}
FX_STRSIZE CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer,
FX_STRSIZE iBufferSize) {
ASSERT(pBuffer && iBufferSize > 0);
if (m_IsWriteStream)
return -1;
iBufferSize = std::min(
iBufferSize, static_cast<FX_STRSIZE>(m_pStream->GetSize() - m_iPosition));
if (iBufferSize <= 0)
return 0;
if (m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize)) {
pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
new_pos += iBufferSize;
if (!new_pos.IsValid())
return 0;
m_iPosition = new_pos.ValueOrDie();
return iBufferSize;
}
return 0;
}
FX_STRSIZE CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
FX_STRSIZE iMaxLength,
bool* bEOS) {
ASSERT(pStr && iMaxLength > 0);
if (m_IsWriteStream)
return -1;
if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
m_wCodePage == FX_CODEPAGE_UTF16BE) {
FX_FILESIZE iBytes = iMaxLength * 2;
FX_STRSIZE iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
iMaxLength = iLen / 2;
if (sizeof(wchar_t) > 2)
UTF16ToWChar(pStr, iMaxLength);
#if _FX_ENDIAN_ == _FX_BIG_ENDIAN_
if (m_wCodePage == FX_CODEPAGE_UTF16LE)
SwapByteOrder(pStr, iMaxLength);
#else
if (m_wCodePage == FX_CODEPAGE_UTF16BE)
SwapByteOrder(pStr, iMaxLength);
#endif
} else {
FX_FILESIZE pos = GetPosition();
FX_STRSIZE iBytes =
std::min(iMaxLength, static_cast<FX_STRSIZE>(GetLength() - pos));
if (iBytes > 0) {
std::vector<uint8_t> buf(iBytes);
FX_STRSIZE iLen = ReadData(buf.data(), iBytes);
if (m_wCodePage != FX_CODEPAGE_UTF8)
return -1;
FX_STRSIZE iSrc = 0;
std::tie(iSrc, iMaxLength) = UTF8Decode(
reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
Seek(CFX_SeekableStreamProxy::Pos::Current, iSrc - iLen);
} else {
iMaxLength = 0;
}
}
*bEOS = IsEOF();
return iMaxLength;
}
void CFX_SeekableStreamProxy::WriteString(const CFX_WideStringC& str) {
if (!m_IsWriteStream || str.GetLength() == 0 ||
m_wCodePage != FX_CODEPAGE_UTF8) {
return;
}
if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
str.GetLength() * sizeof(wchar_t))) {
return;
}
pdfium::base::CheckedNumeric<FX_STRSIZE> new_pos = m_iPosition;
new_pos += str.GetLength() * sizeof(wchar_t);
if (!new_pos.IsValid()) {
m_iPosition = std::numeric_limits<FX_STRSIZE>::max();
return;
}
m_iPosition = new_pos.ValueOrDie();
}