// Copyright 2014 PDFium Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style license that can be | |
// found in the LICENSE file. | |
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
#ifndef _FX_CODEPAGE | |
#define _FX_CODEPAGE | |
class IFX_CodePage; | |
#define FX_CODEPAGE_DefANSI 0 | |
#define FX_CODEPAGE_DefOEM 1 | |
#define FX_CODEPAGE_DefMAC 2 | |
#define FX_CODEPAGE_Thread 3 | |
#define FX_CODEPAGE_Symbol 42 | |
#define FX_CODEPAGE_MSDOS_US 437 | |
#define FX_CODEPAGE_Arabic_ASMO708 708 | |
#define FX_CODEPAGE_Arabic_ASMO449Plus 709 | |
#define FX_CODEPAGE_Arabic_Transparent 710 | |
#define FX_CODEPAGE_Arabic_NafithaEnhanced 711 | |
#define FX_CODEPAGE_Arabic_TransparentASMO 720 | |
#define FX_CODEPAGE_MSDOS_Greek1 737 | |
#define FX_CODEPAGE_MSDOS_Baltic 775 | |
#define FX_CODEPAGE_MSWin31_WesternEuropean 819 | |
#define FX_CODEPAGE_MSDOS_WesternEuropean 850 | |
#define FX_CODEPAGE_MSDOS_EasternEuropean 852 | |
#define FX_CODEPAGE_MSDOS_Latin3 853 | |
#define FX_CODEPAGE_MSDOS_Cyrillic 855 | |
#define FX_CODEPAGE_MSDOS_Turkish 857 | |
#define FX_CODEPAGE_MSDOS_Latin1Euro 858 | |
#define FX_CODEPAGE_MSDOS_Portuguese 860 | |
#define FX_CODEPAGE_MSDOS_Icelandic 861 | |
#define FX_CODEPAGE_MSDOS_Hebrew 862 | |
#define FX_CODEPAGE_MSDOS_FrenchCanadian 863 | |
#define FX_CODEPAGE_MSDOS_Arabic 864 | |
#define FX_CODEPAGE_MSDOS_Norwegian 865 | |
#define FX_CODEPAGE_MSDOS_Russian 866 | |
#define FX_CODEPAGE_MSDOS_Greek2 869 | |
#define FX_CODEPAGE_MSDOS_Thai 874 | |
#define FX_CODEPAGE_MSDOS_KamenickyCS 895 | |
#define FX_CODEPAGE_ShiftJIS 932 | |
#define FX_CODEPAGE_ChineseSimplified 936 | |
#define FX_CODEPAGE_Korean 949 | |
#define FX_CODEPAGE_ChineseTraditional 950 | |
#define FX_CODEPAGE_UTF16LE 1200 | |
#define FX_CODEPAGE_UTF16BE 1201 | |
#define FX_CODEPAGE_MSWin_EasternEuropean 1250 | |
#define FX_CODEPAGE_MSWin_Cyrillic 1251 | |
#define FX_CODEPAGE_MSWin_WesternEuropean 1252 | |
#define FX_CODEPAGE_MSWin_Greek 1253 | |
#define FX_CODEPAGE_MSWin_Turkish 1254 | |
#define FX_CODEPAGE_MSWin_Hebrew 1255 | |
#define FX_CODEPAGE_MSWin_Arabic 1256 | |
#define FX_CODEPAGE_MSWin_Baltic 1257 | |
#define FX_CODEPAGE_MSWin_Vietnamese 1258 | |
#define FX_CODEPAGE_Johab 1361 | |
#define FX_CODEPAGE_MAC_Roman 10000 | |
#define FX_CODEPAGE_MAC_ShiftJIS 10001 | |
#define FX_CODEPAGE_MAC_ChineseTraditional 10002 | |
#define FX_CODEPAGE_MAC_Korean 10003 | |
#define FX_CODEPAGE_MAC_Arabic 10004 | |
#define FX_CODEPAGE_MAC_Hebrew 10005 | |
#define FX_CODEPAGE_MAC_Greek 10006 | |
#define FX_CODEPAGE_MAC_Cyrillic 10007 | |
#define FX_CODEPAGE_MAC_ChineseSimplified 10008 | |
#define FX_CODEPAGE_MAC_Thai 10021 | |
#define FX_CODEPAGE_MAC_EasternEuropean 10029 | |
#define FX_CODEPAGE_MAC_Turkish 10081 | |
#define FX_CODEPAGE_UTF32LE 12000 | |
#define FX_CODEPAGE_UTF32BE 12001 | |
#define FX_CODEPAGE_ISO8859_1 28591 | |
#define FX_CODEPAGE_ISO8859_2 28592 | |
#define FX_CODEPAGE_ISO8859_3 28593 | |
#define FX_CODEPAGE_ISO8859_4 28594 | |
#define FX_CODEPAGE_ISO8859_5 28595 | |
#define FX_CODEPAGE_ISO8859_6 28596 | |
#define FX_CODEPAGE_ISO8859_7 28597 | |
#define FX_CODEPAGE_ISO8859_8 28598 | |
#define FX_CODEPAGE_ISO8859_9 28599 | |
#define FX_CODEPAGE_ISO8859_10 28600 | |
#define FX_CODEPAGE_ISO8859_11 28601 | |
#define FX_CODEPAGE_ISO8859_12 28602 | |
#define FX_CODEPAGE_ISO8859_13 28603 | |
#define FX_CODEPAGE_ISO8859_14 28604 | |
#define FX_CODEPAGE_ISO8859_15 28605 | |
#define FX_CODEPAGE_ISO8859_16 28606 | |
#define FX_CODEPAGE_ISCII_Devanagari 57002 | |
#define FX_CODEPAGE_ISCII_Bengali 57003 | |
#define FX_CODEPAGE_ISCII_Tamil 57004 | |
#define FX_CODEPAGE_ISCII_Telugu 57005 | |
#define FX_CODEPAGE_ISCII_Assamese 57006 | |
#define FX_CODEPAGE_ISCII_Oriya 57007 | |
#define FX_CODEPAGE_ISCII_Kannada 57008 | |
#define FX_CODEPAGE_ISCII_Malayalam 57009 | |
#define FX_CODEPAGE_ISCII_Gujarati 57010 | |
#define FX_CODEPAGE_ISCII_Punjabi 57011 | |
#define FX_CODEPAGE_UTF7 65000 | |
#define FX_CODEPAGE_UTF8 65001 | |
#define FX_CHARSET_ANSI 0 | |
#define FX_CHARSET_Default 1 | |
#define FX_CHARSET_Symbol 2 | |
#define FX_CHARSET_MAC_Roman 77 | |
#define FX_CHARSET_MAC_ShiftJIS 78 | |
#define FX_CHARSET_MAC_Korean 79 | |
#define FX_CHARSET_MAC_ChineseSimplified 80 | |
#define FX_CHARSET_MAC_ChineseTriditional 81 | |
#define FX_CHARSET_MAC_Johab 82 | |
#define FX_CHARSET_MAC_Hebrew 83 | |
#define FX_CHARSET_MAC_Arabic 84 | |
#define FX_CHARSET_MAC_Greek 85 | |
#define FX_CHARSET_MAC_Turkish 86 | |
#define FX_CHARSET_MAC_Thai 87 | |
#define FX_CHARSET_MAC_EasternEuropean 88 | |
#define FX_CHARSET_MAC_Cyrillic 89 | |
#define FX_CHARSET_ShiftJIS 128 | |
#define FX_CHARSET_Korean 129 | |
#define FX_CHARSET_Johab 130 | |
#define FX_CHARSET_ChineseSimplified 134 | |
#define FX_CHARSET_ChineseTriditional 136 | |
#define FX_CHARSET_MSWin_Greek 161 | |
#define FX_CHARSET_MSWin_Turkish 162 | |
#define FX_CHARSET_MSWin_Vietnamese 163 | |
#define FX_CHARSET_MSWin_Hebrew 177 | |
#define FX_CHARSET_MSWin_Arabic 178 | |
#define FX_CHARSET_ArabicTraditional 179 | |
#define FX_CHARSET_ArabicUser 180 | |
#define FX_CHARSET_HebrewUser 181 | |
#define FX_CHARSET_MSWin_Baltic 186 | |
#define FX_CHARSET_MSWin_Cyrillic 204 | |
#define FX_CHARSET_Thai 222 | |
#define FX_CHARSET_MSWin_EasterEuropean 238 | |
#define FX_CHARSET_US 254 | |
#define FX_CHARSET_OEM 255 | |
FX_WORD FX_GetCodePageFromCharset(uint8_t charset); | |
FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage); | |
FX_WORD FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength); | |
FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength); | |
FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage); | |
void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength); | |
void FX_SwapByteOrderCopy(const FX_WCHAR* pSrc, | |
FX_WCHAR* pDst, | |
int32_t iLength); | |
void FX_UTF16ToWChar(void* pBuffer, int32_t iLength); | |
void FX_UTF16ToWCharCopy(const FX_WORD* pUTF16, | |
FX_WCHAR* pWChar, | |
int32_t iLength); | |
void FX_WCharToUTF16(void* pBuffer, int32_t iLength); | |
void FX_WCharToUTF16Copy(const FX_WCHAR* pWChar, | |
FX_WORD* pUTF16, | |
int32_t iLength); | |
int32_t FX_DecodeString(FX_WORD wCodePage, | |
const FX_CHAR* pSrc, | |
int32_t* pSrcLen, | |
FX_WCHAR* pDst, | |
int32_t* pDstLen, | |
FX_BOOL bErrBreak = FALSE); | |
int32_t FX_UTF8Decode(const FX_CHAR* pSrc, | |
int32_t* pSrcLen, | |
FX_WCHAR* pDst, | |
int32_t* pDstLen); | |
enum FX_CODESYSTEM { | |
FX_MBCS = 0, | |
FX_SBCS, | |
FX_DBCS, | |
}; | |
typedef struct _FX_CODEPAGE_HEADER { | |
uint16_t uCPID; | |
uint8_t uMinCharBytes; | |
uint8_t uMaxCharBytes; | |
FX_CODESYSTEM eCPType; | |
FX_BOOL bHasLeadByte; | |
FX_WCHAR wMinChar; | |
FX_WCHAR wMaxChar; | |
FX_WCHAR wDefChar; | |
FX_WCHAR wMinUnicode; | |
FX_WCHAR wMaxUnicode; | |
FX_WCHAR wDefUnicode; | |
} FX_CODEPAGE_HEADER; | |
#define FX_CPMAPTYPE_Consecution 1 | |
#define FX_CPMAPTYPE_Strict 2 | |
#define FX_CPMAPTYPE_NoMapping 3 | |
#define FX_CPMAPTYPE_Delta 4 | |
typedef struct _FX_CPCU_MAPTABLE1 { | |
uint16_t uMapType; | |
uint16_t uUniocde; | |
} FX_CPCU_MAPTABLE1; | |
typedef struct _FX_CPCU_MAPTABLE2 { | |
uint8_t uTrailByte; | |
uint8_t uMapType; | |
uint16_t uOffset; | |
} FX_CPCU_MAPTABLE2; | |
typedef struct _FX_CPCU_MAPINFO { | |
FX_CPCU_MAPTABLE1* pMapTable1; | |
FX_CPCU_MAPTABLE2* pMapTable2; | |
const uint8_t* pMapData; | |
} FX_CPCU_MAPINFO; | |
typedef struct _FX_CPUC_MAPTABLE { | |
uint16_t uStartUnicode; | |
uint16_t uEndUnicode; | |
uint16_t uMapType; | |
uint16_t uOffset; | |
} FX_CPUC_MAPTABLE; | |
typedef struct _FX_CPUC_MAPINFO { | |
uint32_t uMapCount; | |
FX_CPUC_MAPTABLE* pMapTable; | |
const uint8_t* pMapData; | |
} FX_CPUC_MAPINFO; | |
typedef struct _FX_CODEPAGE { | |
FX_CODEPAGE_HEADER const* pCPHeader; | |
FX_CPCU_MAPINFO const* pCPCUMapInfo; | |
FX_CPUC_MAPINFO const* pCPUCMapInfo; | |
} FX_CODEPAGE, *FX_LPCODEPAGE; | |
typedef FX_CODEPAGE const* FX_LPCCODEPAGE; | |
typedef struct _FX_STR2CPHASH { | |
uint32_t uHash; | |
uint32_t uCodePage; | |
} FX_STR2CPHASH; | |
typedef struct _FX_CHARSET_MAP { | |
uint16_t charset; | |
uint16_t codepage; | |
} FX_CHARSET_MAP; | |
typedef struct _FX_LANG2CPMAP { | |
FX_WORD wLanguage; | |
FX_WORD wCodepage; | |
} FX_LANG2CPMAP; | |
class IFX_CodePage { | |
public: | |
static IFX_CodePage* Create(FX_WORD wCodePage); | |
virtual ~IFX_CodePage() {} | |
virtual void Release() = 0; | |
virtual FX_WORD GetCodePageNumber() const = 0; | |
virtual FX_CODESYSTEM GetCodeSystemType() const = 0; | |
virtual FX_BOOL HasLeadByte() const = 0; | |
virtual FX_BOOL IsLeadByte(uint8_t byte) const = 0; | |
virtual int32_t GetMinBytesPerChar() const = 0; | |
virtual int32_t GetMaxBytesPerChar() const = 0; | |
virtual FX_WCHAR GetMinCharcode() const = 0; | |
virtual FX_WCHAR GetMaxCharcode() const = 0; | |
virtual FX_WCHAR GetDefCharcode() const = 0; | |
virtual FX_WCHAR GetMinUnicode() const = 0; | |
virtual FX_WCHAR GetMaxUnicode() const = 0; | |
virtual FX_WCHAR GetDefUnicode() const = 0; | |
virtual FX_BOOL IsValidCharcode(FX_WORD wCharcode) const = 0; | |
virtual FX_WCHAR GetUnicode(FX_WORD wCharcode) const = 0; | |
virtual FX_BOOL IsValidUnicode(FX_WCHAR wUnicode) const = 0; | |
virtual FX_WORD GetCharcode(FX_WCHAR wUnicode) const = 0; | |
}; | |
#endif |