blob: a67ec52c85e50d5748c99bb66168494ec8864fb6 [file] [log] [blame]
// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/cfx_seekablestreamproxy.h"
#if _FX_OS_ == _FX_WIN32_DESKTOP_ || _FX_OS_ == _FX_WIN64_
#include <io.h>
#endif
#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#include "core/fxcrt/cfx_memorystream.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"
namespace {
// Returns {src bytes consumed, dst bytes produced}.
std::pair<FX_STRSIZE, FX_STRSIZE> UTF8Decode(const char* pSrc,
FX_STRSIZE srcLen,
wchar_t* pDst,
FX_STRSIZE dstLen) {
ASSERT(pDst && dstLen > 0);
if (srcLen < 1)
return {0, 0};
uint32_t dwCode = 0;
int32_t iPending = 0;
FX_STRSIZE iSrcNum = 0;
FX_STRSIZE iDstNum = 0;
FX_STRSIZE iIndex = 0;
int32_t k = 1;
while (iIndex < srcLen) {
uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
if (byte < 0x80) {
iPending = 0;
k = 1;
iDstNum++;
iSrcNum += k;
*pDst++ = byte;
if (iDstNum >= dstLen)
break;
} else if (byte < 0xc0) {
if (iPending < 1)
break;
iPending--;
dwCode |= (byte & 0x3f) << (iPending * 6);
if (iPending == 0) {
iDstNum++;
iSrcNum += k;
*pDst++ = dwCode;
if (iDstNum >= dstLen)
break;
}
} else if (byte < 0xe0) {
iPending = 1;
k = 2;
dwCode = (byte & 0x1f) << 6;
} else if (byte < 0xf0) {
iPending = 2;
k = 3;
dwCode = (byte & 0x0f) << 12;
} else if (byte < 0xf8) {
iPending = 3;
k = 4;
dwCode = (byte & 0x07) << 18;
} else if (byte < 0xfc) {
iPending = 4;
k = 5;
dwCode = (byte & 0x03) << 24;
} else if (byte < 0xfe) {
iPending = 5;
k = 6;
dwCode = (byte & 0x01) << 30;
} else {
break;
}
iIndex++;
}
return {iSrcNum, iDstNum};
}
void UTF16ToWChar(void* pBuffer, FX_STRSIZE iLength) {
ASSERT(pBuffer);
ASSERT(iLength > 0);
ASSERT(sizeof(wchar_t) > 2);
uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
for (FX_STRSIZE i = 0; i < iLength; i++)
pDst[i] = static_cast<wchar_t>(pSrc[i]);
}
void SwapByteOrder(wchar_t* pStr, FX_STRSIZE iLength) {
ASSERT(pStr);
if (iLength < 0)
iLength = FXSYS_wcslen(pStr);
uint16_t wch;
if (sizeof(wchar_t) > 2) {
while (iLength-- > 0) {
wch = static_cast<uint16_t>(*pStr);
wch = (wch >> 8) | (wch << 8);
wch &= 0x00FF;
*pStr = wch;
++pStr;
}
return;
}
while (iLength-- > 0) {
wch = static_cast<uint16_t>(*pStr);
wch = (wch >> 8) | (wch << 8);
*pStr = wch;
++pStr;
}
}
} // namespace
#define BOM_MASK 0x00FFFFFF
#define BOM_UTF8 0x00BFBBEF
#define BOM_UTF16_MASK 0x0000FFFF
#define BOM_UTF16_BE 0x0000FFFE
#define BOM_UTF16_LE 0x0000FEFF
CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
const CFX_RetainPtr<IFX_SeekableStream>& stream,
bool isWriteStream)
: m_IsWriteStream(isWriteStream),
m_wCodePage(FX_CODEPAGE_DefANSI),
m_wBOMLength(0),
m_iPosition(0),
m_pStream(stream) {
ASSERT(m_pStream);
if (isWriteStream) {
m_iPosition = m_pStream->GetSize();
return;
}
Seek(From::Begin, 0);
uint32_t bom = 0;
ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
bom &= BOM_MASK;
if (bom == BOM_UTF8) {
m_wBOMLength = 3;
m_wCodePage = FX_CODEPAGE_UTF8;
} else {
bom &= BOM_UTF16_MASK;
if (bom == BOM_UTF16_BE) {
m_wBOMLength = 2;
m_wCodePage = FX_CODEPAGE_UTF16BE;
} else if (bom == BOM_UTF16_LE) {
m_wBOMLength = 2;
m_wCodePage = FX_CODEPAGE_UTF16LE;
} else {
m_wBOMLength = 0;
m_wCodePage = FXSYS_GetACP();
}
}
Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
}
CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, FX_STRSIZE size)
: CFX_SeekableStreamProxy(
pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
false) {}
CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
switch (eSeek) {
case From::Begin:
m_iPosition = iOffset;
break;
case From::Current: {
pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
new_pos += iOffset;
m_iPosition =
new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
} break;
}
m_iPosition =
pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
}
void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
if (m_wBOMLength > 0)
return;
m_wCodePage = wCodePage;
}
FX_STRSIZE CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer,
FX_STRSIZE iBufferSize) {
ASSERT(pBuffer && iBufferSize > 0);
if (m_IsWriteStream)
return 0;
iBufferSize =
std::min(iBufferSize, static_cast<FX_STRSIZE>(GetLength() - m_iPosition));
if (iBufferSize <= 0)
return 0;
if (!m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize))
return 0;
pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
new_pos += iBufferSize;
m_iPosition = new_pos.ValueOrDefault(m_iPosition);
return new_pos.IsValid() ? iBufferSize : 0;
}
FX_STRSIZE CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
FX_STRSIZE iMaxLength,
bool* bEOS) {
if (!pStr || iMaxLength <= 0)
return 0;
if (m_IsWriteStream)
return 0;
if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
m_wCodePage == FX_CODEPAGE_UTF16BE) {
FX_STRSIZE iBytes = iMaxLength * 2;
FX_STRSIZE iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
iMaxLength = iLen / 2;
if (sizeof(wchar_t) > 2 && iMaxLength > 0)
UTF16ToWChar(pStr, iMaxLength);
if (m_wCodePage == FX_CODEPAGE_UTF16BE)
SwapByteOrder(pStr, iMaxLength);
} else {
FX_FILESIZE pos = GetPosition();
FX_STRSIZE iBytes =
std::min(iMaxLength, static_cast<FX_STRSIZE>(GetLength() - pos));
if (iBytes > 0) {
std::vector<uint8_t> buf(iBytes);
FX_STRSIZE iLen = ReadData(buf.data(), iBytes);
if (m_wCodePage != FX_CODEPAGE_UTF8)
return 0;
FX_STRSIZE iSrc = 0;
std::tie(iSrc, iMaxLength) = UTF8Decode(
reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
Seek(From::Current, iSrc - iLen);
} else {
iMaxLength = 0;
}
}
*bEOS = IsEOF();
return iMaxLength;
}
void CFX_SeekableStreamProxy::WriteString(const CFX_WideStringC& str) {
if (!m_IsWriteStream || str.GetLength() == 0 ||
m_wCodePage != FX_CODEPAGE_UTF8) {
return;
}
if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
str.GetLength() * sizeof(wchar_t))) {
return;
}
pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
new_pos += str.GetLength() * sizeof(wchar_t);
m_iPosition = new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
m_iPosition =
pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
}