blob: 357ffd820a860fbb26fd1bab91d34d955f9ca20f [file] [log] [blame]
// Copyright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#ifndef _FX_CODEPAGE
#define _FX_CODEPAGE
class IFX_CodePage;
#define FX_CODEPAGE_DefANSI 0
#define FX_CODEPAGE_DefOEM 1
#define FX_CODEPAGE_DefMAC 2
#define FX_CODEPAGE_Thread 3
#define FX_CODEPAGE_Symbol 42
#define FX_CODEPAGE_MSDOS_US 437
#define FX_CODEPAGE_Arabic_ASMO708 708
#define FX_CODEPAGE_Arabic_ASMO449Plus 709
#define FX_CODEPAGE_Arabic_Transparent 710
#define FX_CODEPAGE_Arabic_NafithaEnhanced 711
#define FX_CODEPAGE_Arabic_TransparentASMO 720
#define FX_CODEPAGE_MSDOS_Greek1 737
#define FX_CODEPAGE_MSDOS_Baltic 775
#define FX_CODEPAGE_MSWin31_WesternEuropean 819
#define FX_CODEPAGE_MSDOS_WesternEuropean 850
#define FX_CODEPAGE_MSDOS_EasternEuropean 852
#define FX_CODEPAGE_MSDOS_Latin3 853
#define FX_CODEPAGE_MSDOS_Cyrillic 855
#define FX_CODEPAGE_MSDOS_Turkish 857
#define FX_CODEPAGE_MSDOS_Latin1Euro 858
#define FX_CODEPAGE_MSDOS_Portuguese 860
#define FX_CODEPAGE_MSDOS_Icelandic 861
#define FX_CODEPAGE_MSDOS_Hebrew 862
#define FX_CODEPAGE_MSDOS_FrenchCanadian 863
#define FX_CODEPAGE_MSDOS_Arabic 864
#define FX_CODEPAGE_MSDOS_Norwegian 865
#define FX_CODEPAGE_MSDOS_Russian 866
#define FX_CODEPAGE_MSDOS_Greek2 869
#define FX_CODEPAGE_MSDOS_Thai 874
#define FX_CODEPAGE_MSDOS_KamenickyCS 895
#define FX_CODEPAGE_ShiftJIS 932
#define FX_CODEPAGE_ChineseSimplified 936
#define FX_CODEPAGE_Korean 949
#define FX_CODEPAGE_ChineseTraditional 950
#define FX_CODEPAGE_UTF16LE 1200
#define FX_CODEPAGE_UTF16BE 1201
#define FX_CODEPAGE_MSWin_EasternEuropean 1250
#define FX_CODEPAGE_MSWin_Cyrillic 1251
#define FX_CODEPAGE_MSWin_WesternEuropean 1252
#define FX_CODEPAGE_MSWin_Greek 1253
#define FX_CODEPAGE_MSWin_Turkish 1254
#define FX_CODEPAGE_MSWin_Hebrew 1255
#define FX_CODEPAGE_MSWin_Arabic 1256
#define FX_CODEPAGE_MSWin_Baltic 1257
#define FX_CODEPAGE_MSWin_Vietnamese 1258
#define FX_CODEPAGE_Johab 1361
#define FX_CODEPAGE_MAC_Roman 10000
#define FX_CODEPAGE_MAC_ShiftJIS 10001
#define FX_CODEPAGE_MAC_ChineseTraditional 10002
#define FX_CODEPAGE_MAC_Korean 10003
#define FX_CODEPAGE_MAC_Arabic 10004
#define FX_CODEPAGE_MAC_Hebrew 10005
#define FX_CODEPAGE_MAC_Greek 10006
#define FX_CODEPAGE_MAC_Cyrillic 10007
#define FX_CODEPAGE_MAC_ChineseSimplified 10008
#define FX_CODEPAGE_MAC_Thai 10021
#define FX_CODEPAGE_MAC_EasternEuropean 10029
#define FX_CODEPAGE_MAC_Turkish 10081
#define FX_CODEPAGE_UTF32LE 12000
#define FX_CODEPAGE_UTF32BE 12001
#define FX_CODEPAGE_ISO8859_1 28591
#define FX_CODEPAGE_ISO8859_2 28592
#define FX_CODEPAGE_ISO8859_3 28593
#define FX_CODEPAGE_ISO8859_4 28594
#define FX_CODEPAGE_ISO8859_5 28595
#define FX_CODEPAGE_ISO8859_6 28596
#define FX_CODEPAGE_ISO8859_7 28597
#define FX_CODEPAGE_ISO8859_8 28598
#define FX_CODEPAGE_ISO8859_9 28599
#define FX_CODEPAGE_ISO8859_10 28600
#define FX_CODEPAGE_ISO8859_11 28601
#define FX_CODEPAGE_ISO8859_12 28602
#define FX_CODEPAGE_ISO8859_13 28603
#define FX_CODEPAGE_ISO8859_14 28604
#define FX_CODEPAGE_ISO8859_15 28605
#define FX_CODEPAGE_ISO8859_16 28606
#define FX_CODEPAGE_ISCII_Devanagari 57002
#define FX_CODEPAGE_ISCII_Bengali 57003
#define FX_CODEPAGE_ISCII_Tamil 57004
#define FX_CODEPAGE_ISCII_Telugu 57005
#define FX_CODEPAGE_ISCII_Assamese 57006
#define FX_CODEPAGE_ISCII_Oriya 57007
#define FX_CODEPAGE_ISCII_Kannada 57008
#define FX_CODEPAGE_ISCII_Malayalam 57009
#define FX_CODEPAGE_ISCII_Gujarati 57010
#define FX_CODEPAGE_ISCII_Punjabi 57011
#define FX_CODEPAGE_UTF7 65000
#define FX_CODEPAGE_UTF8 65001
#define FX_CHARSET_ANSI 0
#define FX_CHARSET_Default 1
#define FX_CHARSET_Symbol 2
#define FX_CHARSET_MAC_Roman 77
#define FX_CHARSET_MAC_ShiftJIS 78
#define FX_CHARSET_MAC_Korean 79
#define FX_CHARSET_MAC_ChineseSimplified 80
#define FX_CHARSET_MAC_ChineseTriditional 81
#define FX_CHARSET_MAC_Johab 82
#define FX_CHARSET_MAC_Hebrew 83
#define FX_CHARSET_MAC_Arabic 84
#define FX_CHARSET_MAC_Greek 85
#define FX_CHARSET_MAC_Turkish 86
#define FX_CHARSET_MAC_Thai 87
#define FX_CHARSET_MAC_EasternEuropean 88
#define FX_CHARSET_MAC_Cyrillic 89
#define FX_CHARSET_ShiftJIS 128
#define FX_CHARSET_Korean 129
#define FX_CHARSET_Johab 130
#define FX_CHARSET_ChineseSimplified 134
#define FX_CHARSET_ChineseTriditional 136
#define FX_CHARSET_MSWin_Greek 161
#define FX_CHARSET_MSWin_Turkish 162
#define FX_CHARSET_MSWin_Vietnamese 163
#define FX_CHARSET_MSWin_Hebrew 177
#define FX_CHARSET_MSWin_Arabic 178
#define FX_CHARSET_ArabicTraditional 179
#define FX_CHARSET_ArabicUser 180
#define FX_CHARSET_HebrewUser 181
#define FX_CHARSET_MSWin_Baltic 186
#define FX_CHARSET_MSWin_Cyrillic 204
#define FX_CHARSET_Thai 222
#define FX_CHARSET_MSWin_EasterEuropean 238
#define FX_CHARSET_US 254
#define FX_CHARSET_OEM 255
FX_WORD FX_GetCodePageFromCharset(uint8_t charset);
FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage);
FX_WORD FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength);
FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength);
FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage);
void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength);
void FX_SwapByteOrderCopy(const FX_WCHAR* pSrc,
FX_WCHAR* pDst,
int32_t iLength);
void FX_UTF16ToWChar(void* pBuffer, int32_t iLength);
void FX_UTF16ToWCharCopy(const FX_WORD* pUTF16,
FX_WCHAR* pWChar,
int32_t iLength);
void FX_WCharToUTF16(void* pBuffer, int32_t iLength);
void FX_WCharToUTF16Copy(const FX_WCHAR* pWChar,
FX_WORD* pUTF16,
int32_t iLength);
int32_t FX_DecodeString(FX_WORD wCodePage,
const FX_CHAR* pSrc,
int32_t* pSrcLen,
FX_WCHAR* pDst,
int32_t* pDstLen,
FX_BOOL bErrBreak = FALSE);
int32_t FX_UTF8Decode(const FX_CHAR* pSrc,
int32_t* pSrcLen,
FX_WCHAR* pDst,
int32_t* pDstLen);
enum FX_CODESYSTEM {
FX_MBCS = 0,
FX_SBCS,
FX_DBCS,
};
typedef struct _FX_CODEPAGE_HEADER {
uint16_t uCPID;
uint8_t uMinCharBytes;
uint8_t uMaxCharBytes;
FX_CODESYSTEM eCPType;
FX_BOOL bHasLeadByte;
FX_WCHAR wMinChar;
FX_WCHAR wMaxChar;
FX_WCHAR wDefChar;
FX_WCHAR wMinUnicode;
FX_WCHAR wMaxUnicode;
FX_WCHAR wDefUnicode;
} FX_CODEPAGE_HEADER;
#define FX_CPMAPTYPE_Consecution 1
#define FX_CPMAPTYPE_Strict 2
#define FX_CPMAPTYPE_NoMapping 3
#define FX_CPMAPTYPE_Delta 4
typedef struct _FX_CPCU_MAPTABLE1 {
uint16_t uMapType;
uint16_t uUniocde;
} FX_CPCU_MAPTABLE1;
typedef struct _FX_CPCU_MAPTABLE2 {
uint8_t uTrailByte;
uint8_t uMapType;
uint16_t uOffset;
} FX_CPCU_MAPTABLE2;
typedef struct _FX_CPCU_MAPINFO {
FX_CPCU_MAPTABLE1* pMapTable1;
FX_CPCU_MAPTABLE2* pMapTable2;
const uint8_t* pMapData;
} FX_CPCU_MAPINFO;
typedef struct _FX_CPUC_MAPTABLE {
uint16_t uStartUnicode;
uint16_t uEndUnicode;
uint16_t uMapType;
uint16_t uOffset;
} FX_CPUC_MAPTABLE;
typedef struct _FX_CPUC_MAPINFO {
uint32_t uMapCount;
FX_CPUC_MAPTABLE* pMapTable;
const uint8_t* pMapData;
} FX_CPUC_MAPINFO;
typedef struct _FX_CODEPAGE {
FX_CODEPAGE_HEADER const* pCPHeader;
FX_CPCU_MAPINFO const* pCPCUMapInfo;
FX_CPUC_MAPINFO const* pCPUCMapInfo;
} FX_CODEPAGE, *FX_LPCODEPAGE;
typedef FX_CODEPAGE const* FX_LPCCODEPAGE;
typedef struct _FX_STR2CPHASH {
uint32_t uHash;
uint32_t uCodePage;
} FX_STR2CPHASH;
typedef struct _FX_CHARSET_MAP {
uint16_t charset;
uint16_t codepage;
} FX_CHARSET_MAP;
typedef struct _FX_LANG2CPMAP {
FX_WORD wLanguage;
FX_WORD wCodepage;
} FX_LANG2CPMAP;
class IFX_CodePage {
public:
static IFX_CodePage* Create(FX_WORD wCodePage);
virtual ~IFX_CodePage() {}
virtual void Release() = 0;
virtual FX_WORD GetCodePageNumber() const = 0;
virtual FX_CODESYSTEM GetCodeSystemType() const = 0;
virtual FX_BOOL HasLeadByte() const = 0;
virtual FX_BOOL IsLeadByte(uint8_t byte) const = 0;
virtual int32_t GetMinBytesPerChar() const = 0;
virtual int32_t GetMaxBytesPerChar() const = 0;
virtual FX_WCHAR GetMinCharcode() const = 0;
virtual FX_WCHAR GetMaxCharcode() const = 0;
virtual FX_WCHAR GetDefCharcode() const = 0;
virtual FX_WCHAR GetMinUnicode() const = 0;
virtual FX_WCHAR GetMaxUnicode() const = 0;
virtual FX_WCHAR GetDefUnicode() const = 0;
virtual FX_BOOL IsValidCharcode(FX_WORD wCharcode) const = 0;
virtual FX_WCHAR GetUnicode(FX_WORD wCharcode) const = 0;
virtual FX_BOOL IsValidUnicode(FX_WCHAR wUnicode) const = 0;
virtual FX_WORD GetCharcode(FX_WCHAR wUnicode) const = 0;
};
#endif