Use ByteStringView in parsers

This CL converts the CPDF_SimpleParser to accept a ByteStringView.
Several of the callers of SimpleParser are also updated to use a
ByteStringView instead of <char*,size>.

Change-Id: Ic2df3a06f92e77b53745a0419b44368142f9d8e6
Reviewed-on: https://pdfium-review.googlesource.com/29351
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
diff --git a/core/fpdfapi/font/cpdf_cidfont.cpp b/core/fpdfapi/font/cpdf_cidfont.cpp
index f17af41..4a96b0a 100644
--- a/core/fpdfapi/font/cpdf_cidfont.cpp
+++ b/core/fpdfapi/font/cpdf_cidfont.cpp
@@ -376,7 +376,7 @@
     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
     pAcc->LoadAllDataFiltered();
     m_pCMap = pdfium::MakeRetain<CPDF_CMap>();
-    m_pCMap->LoadEmbedded(pAcc->GetData(), pAcc->GetSize());
+    m_pCMap->LoadEmbedded(pAcc->GetDataView());
   } else {
     return false;
   }
diff --git a/core/fpdfapi/font/cpdf_cmap.cpp b/core/fpdfapi/font/cpdf_cmap.cpp
index a654fcc..e82e86b 100644
--- a/core/fpdfapi/font/cpdf_cmap.cpp
+++ b/core/fpdfapi/font/cpdf_cmap.cpp
@@ -290,10 +290,10 @@
   m_bLoaded = true;
 }
 
-void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
+void CPDF_CMap::LoadEmbedded(const ByteStringView& data) {
   m_DirectCharcodeToCIDTable = std::vector<uint16_t>(65536);
   CPDF_CMapParser parser(this);
-  CPDF_SimpleParser syntax(pData, size);
+  CPDF_SimpleParser syntax(data);
   while (1) {
     ByteStringView word = syntax.GetWord();
     if (word.IsEmpty()) {
diff --git a/core/fpdfapi/font/cpdf_cmap.h b/core/fpdfapi/font/cpdf_cmap.h
index 98a7728..3ad74ca 100644
--- a/core/fpdfapi/font/cpdf_cmap.h
+++ b/core/fpdfapi/font/cpdf_cmap.h
@@ -53,7 +53,7 @@
   void LoadPredefined(CPDF_CMapManager* pMgr,
                       const ByteString& name,
                       bool bPromptCJK);
-  void LoadEmbedded(const uint8_t* pData, uint32_t dwSize);
+  void LoadEmbedded(const ByteStringView& data);
 
   bool IsLoaded() const { return m_bLoaded; }
   bool IsVertWriting() const { return m_bVertical; }
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
index 0b746d7..88904f2 100644
--- a/core/fpdfapi/font/cpdf_tounicodemap.cpp
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -126,7 +126,7 @@
   CIDSet cid_set = CIDSET_UNKNOWN;
   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
   pAcc->LoadAllDataFiltered();
-  CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize());
+  CPDF_SimpleParser parser(pAcc->GetDataView());
   while (1) {
     ByteStringView word = parser.GetWord();
     if (word.IsEmpty()) {
diff --git a/core/fpdfapi/page/cpdf_psengine.cpp b/core/fpdfapi/page/cpdf_psengine.cpp
index 7ce3a60..adfd45e 100644
--- a/core/fpdfapi/page/cpdf_psengine.cpp
+++ b/core/fpdfapi/page/cpdf_psengine.cpp
@@ -190,10 +190,9 @@
   return static_cast<int>(Pop());
 }
 
-bool CPDF_PSEngine::Parse(const char* str, int size) {
-  CPDF_SimpleParser parser(reinterpret_cast<const uint8_t*>(str), size);
-  ByteStringView word = parser.GetWord();
-  return word == "{" ? m_MainProc.Parse(&parser, 0) : false;
+bool CPDF_PSEngine::Parse(const ByteStringView& view) {
+  CPDF_SimpleParser parser(view);
+  return parser.GetWord() == "{" && m_MainProc.Parse(&parser, 0);
 }
 
 bool CPDF_PSEngine::DoOperator(PDF_PSOP op) {
diff --git a/core/fpdfapi/page/cpdf_psengine.h b/core/fpdfapi/page/cpdf_psengine.h
index 63d91d0..8a054cc 100644
--- a/core/fpdfapi/page/cpdf_psengine.h
+++ b/core/fpdfapi/page/cpdf_psengine.h
@@ -109,7 +109,7 @@
   CPDF_PSEngine();
   ~CPDF_PSEngine();
 
-  bool Parse(const char* str, int size);
+  bool Parse(const ByteStringView& str);
   bool Execute();
   bool DoOperator(PDF_PSOP op);
   void Reset() { m_StackCount = 0; }
diff --git a/core/fpdfapi/page/cpdf_psfunc.cpp b/core/fpdfapi/page/cpdf_psfunc.cpp
index 9f101eb..af996c0 100644
--- a/core/fpdfapi/page/cpdf_psfunc.cpp
+++ b/core/fpdfapi/page/cpdf_psfunc.cpp
@@ -16,8 +16,7 @@
 bool CPDF_PSFunc::v_Init(CPDF_Object* pObj) {
   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pObj->AsStream());
   pAcc->LoadAllDataFiltered();
-  return m_PS.Parse(reinterpret_cast<const char*>(pAcc->GetData()),
-                    pAcc->GetSize());
+  return m_PS.Parse(pAcc->GetDataView());
 }
 
 bool CPDF_PSFunc::v_Call(float* inputs, float* results) const {
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp
index 9e3bf54..47ce1ad 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp
@@ -10,147 +10,124 @@
 
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
 
-CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize)
-    : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
+CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) : data_(str) {}
 
-CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str)
-    : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
+CPDF_SimpleParser::~CPDF_SimpleParser() = default;
 
-std::pair<const uint8_t*, uint32_t> CPDF_SimpleParser::ParseWord() {
-  const uint8_t* pStart = nullptr;
-  uint8_t dwSize = 0;
+ByteStringView CPDF_SimpleParser::GetWord() {
   uint8_t ch;
 
   // Skip whitespace and comment lines.
   while (1) {
-    if (m_dwSize <= m_dwCurPos)
-      return std::make_pair(pStart, dwSize);
+    if (data_.GetLength() <= cur_pos_)
+      return ByteStringView();
 
-    ch = m_pData[m_dwCurPos++];
+    ch = data_[cur_pos_++];
     while (PDFCharIsWhitespace(ch)) {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
-      ch = m_pData[m_dwCurPos++];
+      if (data_.GetLength() <= cur_pos_)
+        return ByteStringView();
+      ch = data_[cur_pos_++];
     }
 
     if (ch != '%')
       break;
 
     while (1) {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      if (data_.GetLength() <= cur_pos_)
+        return ByteStringView();
 
-      ch = m_pData[m_dwCurPos++];
+      ch = data_[cur_pos_++];
       if (PDFCharIsLineEnding(ch))
         break;
     }
   }
 
-  uint32_t start_pos = m_dwCurPos - 1;
-  pStart = m_pData + start_pos;
+  uint8_t dwSize = 0;
+  uint32_t start_pos = cur_pos_ - 1;
   if (PDFCharIsDelimiter(ch)) {
     // Find names
     if (ch == '/') {
       while (1) {
-        if (m_dwSize <= m_dwCurPos)
+        if (data_.GetLength() <= cur_pos_)
           break;
 
-        ch = m_pData[m_dwCurPos++];
+        ch = data_[cur_pos_++];
         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
-          m_dwCurPos--;
-          dwSize = m_dwCurPos - start_pos;
+          cur_pos_--;
+          dwSize = cur_pos_ - start_pos;
           break;
         }
       }
-      return std::make_pair(pStart, dwSize);
+      return data_.Mid(start_pos, dwSize);
     }
 
     dwSize = 1;
     if (ch == '<') {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      if (data_.GetLength() <= cur_pos_)
+        return data_.Mid(start_pos, dwSize);
 
-      ch = m_pData[m_dwCurPos++];
-      if (ch == '<')
+      ch = data_[cur_pos_++];
+      if (ch == '<') {
         dwSize = 2;
-      else
-        m_dwCurPos--;
-    } else if (ch == '>') {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      } else {
+        while (cur_pos_ < data_.GetLength() && data_[cur_pos_] != '>')
+          cur_pos_++;
 
-      ch = m_pData[m_dwCurPos++];
+        if (cur_pos_ < data_.GetLength())
+          cur_pos_++;
+
+        dwSize = cur_pos_ - start_pos;
+      }
+    } else if (ch == '>') {
+      if (data_.GetLength() <= cur_pos_)
+        return data_.Mid(start_pos, dwSize);
+
+      ch = data_[cur_pos_++];
       if (ch == '>')
         dwSize = 2;
       else
-        m_dwCurPos--;
+        cur_pos_--;
+    } else if (ch == '(') {
+      int level = 1;
+      while (cur_pos_ < data_.GetLength()) {
+        if (data_[cur_pos_] == ')') {
+          level--;
+          if (level == 0)
+            break;
+        }
+
+        if (data_[cur_pos_] == '\\') {
+          if (data_.GetLength() <= cur_pos_)
+            break;
+
+          cur_pos_++;
+        } else if (data_[cur_pos_] == '(') {
+          level++;
+        }
+        if (data_.GetLength() <= cur_pos_)
+          break;
+
+        cur_pos_++;
+      }
+      if (cur_pos_ < data_.GetLength())
+        cur_pos_++;
+
+      dwSize = cur_pos_ - start_pos;
     }
-    return std::make_pair(pStart, dwSize);
+    return data_.Mid(start_pos, dwSize);
   }
 
   dwSize = 1;
-  while (1) {
-    if (m_dwSize <= m_dwCurPos)
-      return std::make_pair(pStart, dwSize);
-    ch = m_pData[m_dwCurPos++];
+  while (cur_pos_ < data_.GetLength()) {
+    ch = data_[cur_pos_++];
 
     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
-      m_dwCurPos--;
+      cur_pos_--;
       break;
     }
     dwSize++;
   }
-  return std::make_pair(pStart, dwSize);
-}
-
-ByteStringView CPDF_SimpleParser::GetWord() {
-  const uint8_t* pStart;
-  uint32_t dwSize;
-  std::tie(pStart, dwSize) = ParseWord();
-
-  if (dwSize != 1)
-    return ByteStringView(pStart, dwSize);
-
-  if (pStart[0] == '<') {
-    while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>')
-      m_dwCurPos++;
-
-    if (m_dwCurPos < m_dwSize)
-      m_dwCurPos++;
-
-    return ByteStringView(pStart,
-                          static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
-  }
-
-  if (pStart[0] == '(') {
-    int level = 1;
-    while (m_dwCurPos < m_dwSize) {
-      if (m_pData[m_dwCurPos] == ')') {
-        level--;
-        if (level == 0)
-          break;
-      }
-
-      if (m_pData[m_dwCurPos] == '\\') {
-        if (m_dwSize <= m_dwCurPos)
-          break;
-
-        m_dwCurPos++;
-      } else if (m_pData[m_dwCurPos] == '(') {
-        level++;
-      }
-      if (m_dwSize <= m_dwCurPos)
-        break;
-
-      m_dwCurPos++;
-    }
-    if (m_dwCurPos < m_dwSize)
-      m_dwCurPos++;
-
-    return ByteStringView(pStart,
-                          static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
-  }
-  return ByteStringView(pStart, dwSize);
+  return data_.Mid(start_pos, dwSize);
 }
 
 bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token,
@@ -160,9 +137,9 @@
   std::vector<uint32_t> pBuf(nParams);
   int buf_index = 0;
   int buf_count = 0;
-  m_dwCurPos = 0;
+  cur_pos_ = 0;
   while (1) {
-    pBuf[buf_index++] = m_dwCurPos;
+    pBuf[buf_index++] = cur_pos_;
     if (buf_index == nParams)
       buf_index = 0;
 
@@ -178,7 +155,7 @@
       if (buf_count < nParams)
         continue;
 
-      m_dwCurPos = pBuf[buf_index];
+      cur_pos_ = pBuf[buf_index];
       return true;
     }
   }
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.h b/core/fpdfapi/parser/cpdf_simple_parser.h
index 659039e..f02a58c 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.h
+++ b/core/fpdfapi/parser/cpdf_simple_parser.h
@@ -14,8 +14,8 @@
 
 class CPDF_SimpleParser {
  public:
-  CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize);
   explicit CPDF_SimpleParser(const ByteStringView& str);
+  ~CPDF_SimpleParser();
 
   ByteStringView GetWord();
 
@@ -23,15 +23,11 @@
   // and move the current position to the start of those parameters.
   bool FindTagParamFromStart(const ByteStringView& token, int nParams);
 
-  // For testing only.
-  uint32_t GetCurPos() const { return m_dwCurPos; }
+  uint32_t GetCurPosForTest() const { return cur_pos_; }
 
  private:
-  std::pair<const uint8_t*, uint32_t> ParseWord();
-
-  const uint8_t* m_pData;
-  uint32_t m_dwSize;
-  uint32_t m_dwCurPos;
+  const ByteStringView data_;
+  uint32_t cur_pos_ = 0;
 };
 
 #endif  // CORE_FPDFAPI_PARSER_CPDF_SIMPLE_PARSER_H_
diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
index e8d3b71..b53b6c6 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
@@ -49,7 +49,7 @@
   };
   for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
     const pdfium::StrFuncTestData& data = test_data[i];
-    CPDF_SimpleParser parser(data.input, data.input_size);
+    CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size));
     ByteStringView word = parser.GetWord();
     EXPECT_EQ(data.expected_size, word.GetLength()) << " for case " << i;
     if (data.expected_size != word.GetLength())
@@ -88,10 +88,10 @@
   };
   for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
     const FindTagTestStruct& data = test_data[i];
-    CPDF_SimpleParser parser(data.input, data.input_size);
+    CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size));
     EXPECT_EQ(data.result,
               parser.FindTagParamFromStart(data.token, data.num_params))
         << " for case " << i;
-    EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i;
+    EXPECT_EQ(data.result_pos, parser.GetCurPosForTest()) << " for case " << i;
   }
 }
diff --git a/core/fpdfapi/parser/cpdf_stream_acc.h b/core/fpdfapi/parser/cpdf_stream_acc.h
index d54e000..ac5253a 100644
--- a/core/fpdfapi/parser/cpdf_stream_acc.h
+++ b/core/fpdfapi/parser/cpdf_stream_acc.h
@@ -30,6 +30,8 @@
   const CPDF_Stream* GetStream() const { return m_pStream.Get(); }
   CPDF_Dictionary* GetDict() const;
 
+  ByteStringView GetDataView() { return ByteStringView(GetData(), GetSize()); }
+
   const uint8_t* GetData() const;
   uint8_t* GetData();
   uint32_t GetSize() const;
diff --git a/testing/libfuzzer/pdf_cmap_fuzzer.cc b/testing/libfuzzer/pdf_cmap_fuzzer.cc
index b0e41d5..0f2a22d 100644
--- a/testing/libfuzzer/pdf_cmap_fuzzer.cc
+++ b/testing/libfuzzer/pdf_cmap_fuzzer.cc
@@ -8,7 +8,6 @@
 #include "third_party/base/ptr_util.h"
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
-  auto cmap = pdfium::MakeRetain<CPDF_CMap>();
-  cmap->LoadEmbedded(data, size);
+  pdfium::MakeRetain<CPDF_CMap>()->LoadEmbedded(ByteStringView(data, size));
   return 0;
 }
diff --git a/testing/libfuzzer/pdf_psengine_fuzzer.cc b/testing/libfuzzer/pdf_psengine_fuzzer.cc
index fcdfed8..253b43e3 100644
--- a/testing/libfuzzer/pdf_psengine_fuzzer.cc
+++ b/testing/libfuzzer/pdf_psengine_fuzzer.cc
@@ -8,7 +8,7 @@
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   CPDF_PSEngine engine;
-  if (engine.Parse(reinterpret_cast<const char*>(data), size))
+  if (engine.Parse(ByteStringView(data, size)))
     engine.Execute();
   return 0;
 }