blob: cae2dd72340a4a560be300bd534e3c0a1dc8b6cd [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fxcrt/fx_ext.h"
#include "xfa/fgas/crt/fgas_codepage.h"
#include "xfa/fgas/crt/fgas_language.h"
namespace {
struct FX_CHARSET_MAP {
uint16_t charset;
uint16_t codepage;
};
const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
{FX_CHARSET_ANSI, FX_CODEPAGE_MSWin_WesternEuropean},
{FX_CHARSET_Default, FX_CODEPAGE_DefANSI},
{FX_CHARSET_Symbol, FX_CODEPAGE_Symbol},
{FX_CHARSET_MAC_Roman, FX_CODEPAGE_MAC_Roman},
{FX_CHARSET_MAC_ShiftJIS, FX_CODEPAGE_MAC_ShiftJIS},
{FX_CHARSET_MAC_Korean, FX_CODEPAGE_MAC_Korean},
{FX_CHARSET_MAC_ChineseSimplified, FX_CODEPAGE_MAC_ChineseSimplified},
{FX_CHARSET_MAC_ChineseTriditional, FX_CODEPAGE_MAC_ChineseTraditional},
{FX_CHARSET_MAC_Hebrew, FX_CODEPAGE_MAC_Hebrew},
{FX_CHARSET_MAC_Arabic, FX_CODEPAGE_MAC_Arabic},
{FX_CHARSET_MAC_Greek, FX_CODEPAGE_MAC_Greek},
{FX_CHARSET_MAC_Turkish, FX_CODEPAGE_MAC_Turkish},
{FX_CHARSET_MAC_Thai, FX_CODEPAGE_MAC_Thai},
{FX_CHARSET_MAC_EasternEuropean, FX_CODEPAGE_MAC_EasternEuropean},
{FX_CHARSET_MAC_Cyrillic, FX_CODEPAGE_MAC_Cyrillic},
{FX_CHARSET_ShiftJIS, FX_CODEPAGE_ShiftJIS},
{FX_CHARSET_Korean, FX_CODEPAGE_Korean},
{FX_CHARSET_Johab, FX_CODEPAGE_Johab},
{FX_CHARSET_ChineseSimplified, FX_CODEPAGE_ChineseSimplified},
{FX_CHARSET_ChineseTriditional, FX_CODEPAGE_ChineseTraditional},
{FX_CHARSET_MSWin_Greek, FX_CODEPAGE_MSWin_Greek},
{FX_CHARSET_MSWin_Turkish, FX_CODEPAGE_MSWin_Turkish},
{FX_CHARSET_MSWin_Vietnamese, FX_CODEPAGE_MSWin_Vietnamese},
{FX_CHARSET_MSWin_Hebrew, FX_CODEPAGE_MSWin_Hebrew},
{FX_CHARSET_MSWin_Arabic, FX_CODEPAGE_MSWin_Arabic},
{FX_CHARSET_MSWin_Baltic, FX_CODEPAGE_MSWin_Baltic},
{FX_CHARSET_MSWin_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
{FX_CHARSET_Thai, FX_CODEPAGE_MSDOS_Thai},
{FX_CHARSET_MSWin_EasterEuropean, FX_CODEPAGE_MSWin_EasternEuropean},
{FX_CHARSET_US, FX_CODEPAGE_MSDOS_US},
{FX_CHARSET_OEM, FX_CODEPAGE_MSDOS_WesternEuropean},
};
} // namespace
uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1;
ASSERT(iEnd >= 0);
int32_t iStart = 0, iMid;
do {
iMid = (iStart + iEnd) / 2;
const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid];
if (charset == cp.charset)
return cp.codepage;
if (charset < cp.charset)
iEnd = iMid - 1;
else
iStart = iMid + 1;
} while (iStart <= iEnd);
return 0xFFFF;
}
void FX_SwapByteOrder(wchar_t* pStr, int32_t iLength) {
ASSERT(pStr);
if (iLength < 0)
iLength = FXSYS_wcslen(pStr);
uint16_t wch;
if (sizeof(wchar_t) > 2) {
while (iLength-- > 0) {
wch = (uint16_t)*pStr;
wch = (wch >> 8) | (wch << 8);
wch &= 0x00FF;
*pStr++ = wch;
}
return;
}
while (iLength-- > 0) {
wch = (uint16_t)*pStr;
wch = (wch >> 8) | (wch << 8);
*pStr++ = wch;
}
}
void FX_UTF16ToWChar(void* pBuffer, int32_t iLength) {
ASSERT(pBuffer && iLength > 0);
if (sizeof(wchar_t) == 2)
return;
uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
while (--iLength >= 0)
pDst[iLength] = static_cast<wchar_t>(pSrc[iLength]);
}
int32_t FX_DecodeString(uint16_t wCodePage,
const char* pSrc,
int32_t* pSrcLen,
wchar_t* pDst,
int32_t* pDstLen,
bool bErrBreak) {
if (wCodePage == FX_CODEPAGE_UTF8)
return FX_UTF8Decode(pSrc, pSrcLen, pDst, pDstLen);
return -1;
}
int32_t FX_UTF8Decode(const char* pSrc,
int32_t* pSrcLen,
wchar_t* pDst,
int32_t* pDstLen) {
if (!pSrcLen || !pDstLen)
return -1;
int32_t iSrcLen = *pSrcLen;
if (iSrcLen < 1) {
*pSrcLen = *pDstLen = 0;
return 1;
}
int32_t iDstLen = *pDstLen;
bool bValidDst = (pDst && iDstLen > 0);
uint32_t dwCode = 0;
int32_t iPending = 0;
int32_t iSrcNum = 0;
int32_t iDstNum = 0;
int32_t iIndex = 0;
int32_t k = 1;
while (iIndex < iSrcLen) {
uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
if (byte < 0x80) {
iPending = 0;
k = 1;
iDstNum++;
iSrcNum += k;
if (bValidDst) {
*pDst++ = byte;
if (iDstNum >= iDstLen)
break;
}
} else if (byte < 0xc0) {
if (iPending < 1)
break;
iPending--;
dwCode |= (byte & 0x3f) << (iPending * 6);
if (iPending == 0) {
iDstNum++;
iSrcNum += k;
if (bValidDst) {
*pDst++ = dwCode;
if (iDstNum >= iDstLen)
break;
}
}
} else if (byte < 0xe0) {
iPending = 1;
k = 2;
dwCode = (byte & 0x1f) << 6;
} else if (byte < 0xf0) {
iPending = 2;
k = 3;
dwCode = (byte & 0x0f) << 12;
} else if (byte < 0xf8) {
iPending = 3;
k = 4;
dwCode = (byte & 0x07) << 18;
} else if (byte < 0xfc) {
iPending = 4;
k = 5;
dwCode = (byte & 0x03) << 24;
} else if (byte < 0xfe) {
iPending = 5;
k = 6;
dwCode = (byte & 0x01) << 30;
} else {
break;
}
iIndex++;
}
*pSrcLen = iSrcNum;
*pDstLen = iDstNum;
return 1;
}