| // Copyright 2014 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "core/fpdfapi/parser/fpdf_parser_utility.h" |
| |
| #include <ostream> |
| |
| #include "core/fpdfapi/parser/cpdf_array.h" |
| #include "core/fpdfapi/parser/cpdf_boolean.h" |
| #include "core/fpdfapi/parser/cpdf_dictionary.h" |
| #include "core/fpdfapi/parser/cpdf_number.h" |
| #include "core/fpdfapi/parser/cpdf_reference.h" |
| #include "core/fpdfapi/parser/cpdf_stream.h" |
| #include "core/fpdfapi/parser/cpdf_stream_acc.h" |
| #include "core/fpdfapi/parser/cpdf_string.h" |
| #include "core/fpdfapi/parser/fpdf_parser_decode.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "core/fxcrt/fx_stream.h" |
| #include "third_party/base/check.h" |
| #include "third_party/base/notreached.h" |
| |
| // Indexed by 8-bit character code, contains either: |
| // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff |
| // 'N' - for numeric: 0123456789+-. |
| // 'D' - for delimiter: %()/<>[]{} |
| // 'R' - otherwise. |
| const char PDF_CharType[256] = { |
| // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO |
| // SI |
| 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R', |
| 'R', |
| |
| // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS |
| // US |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', |
| |
| // SP ! " # $ % & ยด ( ) * + , - . |
| // / |
| 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N', |
| 'D', |
| |
| // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
| 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D', |
| 'R', |
| |
| // @ A B C D E F G H I J K L M N O |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', |
| |
| // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', |
| 'R', |
| |
| // ` a b c d e f g h i j k l m n o |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', |
| |
| // p q r s t u v w x y z { | } ~ |
| // DEL |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', |
| 'R', |
| |
| 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; |
| |
| absl::optional<FX_FILESIZE> GetHeaderOffset( |
| const RetainPtr<IFX_SeekableReadStream>& pFile) { |
| static constexpr size_t kBufSize = 4; |
| uint8_t buf[kBufSize]; |
| for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) { |
| if (!pFile->ReadBlockAtOffset(buf, offset, kBufSize)) |
| return absl::nullopt; |
| |
| if (memcmp(buf, "%PDF", 4) == 0) |
| return offset; |
| } |
| return absl::nullopt; |
| } |
| |
| int32_t GetDirectInteger(const CPDF_Dictionary* pDict, const ByteString& key) { |
| const CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key)); |
| return pObj ? pObj->GetInteger() : 0; |
| } |
| |
| ByteString PDF_NameDecode(ByteStringView orig) { |
| size_t src_size = orig.GetLength(); |
| size_t out_index = 0; |
| ByteString result; |
| { |
| // Span's lifetime must end before ReleaseBuffer() below. |
| pdfium::span<char> pDest = result.GetBuffer(src_size); |
| for (size_t i = 0; i < src_size; i++) { |
| if (orig[i] == '#' && i + 2 < src_size) { |
| pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 + |
| FXSYS_HexCharToInt(orig[i + 2]); |
| i += 2; |
| } else { |
| pDest[out_index++] = orig[i]; |
| } |
| } |
| } |
| result.ReleaseBuffer(out_index); |
| return result; |
| } |
| |
| ByteString PDF_NameEncode(const ByteString& orig) { |
| const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str()); |
| int src_len = orig.GetLength(); |
| int dest_len = 0; |
| int i; |
| for (i = 0; i < src_len; i++) { |
| uint8_t ch = src_buf[i]; |
| if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || |
| PDFCharIsDelimiter(ch)) { |
| dest_len += 3; |
| } else { |
| dest_len++; |
| } |
| } |
| if (dest_len == src_len) |
| return orig; |
| |
| ByteString res; |
| { |
| // Span's lifetime must end before ReleaseBuffer() below. |
| pdfium::span<char> dest_buf = res.GetBuffer(dest_len); |
| dest_len = 0; |
| for (i = 0; i < src_len; i++) { |
| uint8_t ch = src_buf[i]; |
| if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || |
| PDFCharIsDelimiter(ch)) { |
| dest_buf[dest_len++] = '#'; |
| FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]); |
| dest_len += 2; |
| continue; |
| } |
| dest_buf[dest_len++] = ch; |
| } |
| } |
| res.ReleaseBuffer(dest_len); |
| return res; |
| } |
| |
| std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray, |
| size_t nCount) { |
| DCHECK(pArray); |
| DCHECK(pArray->size() >= nCount); |
| std::vector<float> ret(nCount); |
| for (size_t i = 0; i < nCount; ++i) |
| ret[i] = pArray->GetNumberAt(i); |
| return ret; |
| } |
| |
| bool ValidateDictType(const CPDF_Dictionary* dict, ByteStringView type) { |
| DCHECK(!type.IsEmpty()); |
| return dict && dict->GetNameFor("Type") == type; |
| } |
| |
| bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict, |
| ByteStringView type) { |
| if (!dict) |
| return false; |
| |
| CPDF_DictionaryLocker locker(dict); |
| for (const auto& it : locker) { |
| const CPDF_Dictionary* entry = ToDictionary(it.second->GetDirect()); |
| if (!ValidateDictType(entry, type)) |
| return false; |
| } |
| return true; |
| } |
| |
| bool ValidateFontResourceDict(const CPDF_Dictionary* dict) { |
| return ValidateDictAllResourcesOfType(dict, "Font"); |
| } |
| |
| std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) { |
| if (!pObj) { |
| buf << " null"; |
| return buf; |
| } |
| switch (pObj->GetType()) { |
| case CPDF_Object::kNullobj: |
| buf << " null"; |
| break; |
| case CPDF_Object::kBoolean: |
| case CPDF_Object::kNumber: |
| buf << " " << pObj->GetString(); |
| break; |
| case CPDF_Object::kString: |
| buf << pObj->AsString()->EncodeString(); |
| break; |
| case CPDF_Object::kName: { |
| ByteString str = pObj->GetString(); |
| buf << "/" << PDF_NameEncode(str); |
| break; |
| } |
| case CPDF_Object::kReference: { |
| buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R "; |
| break; |
| } |
| case CPDF_Object::kArray: { |
| const CPDF_Array* p = pObj->AsArray(); |
| buf << "["; |
| for (size_t i = 0; i < p->size(); i++) { |
| const CPDF_Object* pElement = p->GetObjectAt(i); |
| if (!pElement->IsInline()) { |
| buf << " " << pElement->GetObjNum() << " 0 R"; |
| } else { |
| buf << pElement; |
| } |
| } |
| buf << "]"; |
| break; |
| } |
| case CPDF_Object::kDictionary: { |
| CPDF_DictionaryLocker locker(pObj->AsDictionary()); |
| buf << "<<"; |
| for (const auto& it : locker) { |
| const ByteString& key = it.first; |
| const CPDF_Object* pValue = it.second.Get(); |
| buf << "/" << PDF_NameEncode(key); |
| if (!pValue->IsInline()) { |
| buf << " " << pValue->GetObjNum() << " 0 R "; |
| } else { |
| buf << pValue; |
| } |
| } |
| buf << ">>"; |
| break; |
| } |
| case CPDF_Object::kStream: { |
| const CPDF_Stream* p = pObj->AsStream(); |
| buf << p->GetDict() << "stream\r\n"; |
| auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(p); |
| pAcc->LoadAllDataRaw(); |
| buf.write(reinterpret_cast<const char*>(pAcc->GetData()), |
| pAcc->GetSize()); |
| buf << "\r\nendstream"; |
| break; |
| } |
| default: |
| NOTREACHED(); |
| break; |
| } |
| return buf; |
| } |