Add unit test for CXF_SeekableStreamProxy.

Then fix bug uncovered by test where UTF16BE streams were being
anded with 0xff, losing the top byte. Avoid the issue entirely
by fixing the byte order before expanding to wchar_t.

Change-Id: If40001f73ba56d95380ab65f92d442a83a515cab
Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/52090
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
diff --git a/core/fxcrt/BUILD.gn b/core/fxcrt/BUILD.gn
index 400ab35..d395e4d 100644
--- a/core/fxcrt/BUILD.gn
+++ b/core/fxcrt/BUILD.gn
@@ -142,6 +142,7 @@
     "autorestorer_unittest.cpp",
     "bytestring_unittest.cpp",
     "cfx_bitstream_unittest.cpp",
+    "cfx_seekablestreamproxy_unittest.cpp",
     "cfx_widetextbuf_unittest.cpp",
     "fx_bidi_unittest.cpp",
     "fx_coordinates_unittest.cpp",
diff --git a/core/fxcrt/cfx_seekablestreamproxy.cpp b/core/fxcrt/cfx_seekablestreamproxy.cpp
index 5884bca..c0317ca 100644
--- a/core/fxcrt/cfx_seekablestreamproxy.cpp
+++ b/core/fxcrt/cfx_seekablestreamproxy.cpp
@@ -89,32 +89,16 @@
     pDst[i - 1] = static_cast<wchar_t>(pSrc[i - 1]);
 }
 
-void SwapByteOrder(wchar_t* pStr, size_t iLength) {
-  ASSERT(pStr);
-
-  uint16_t wch;
-  if (sizeof(wchar_t) > 2) {
-    while (iLength-- > 0) {
-      wch = static_cast<uint16_t>(*pStr);
-      wch = (wch >> 8) | (wch << 8);
-      wch &= 0x00FF;
-      *pStr = wch;
-      ++pStr;
-    }
-    return;
-  }
-
+void SwapByteOrder(uint16_t* pStr, size_t iLength) {
   while (iLength-- > 0) {
-    wch = static_cast<uint16_t>(*pStr);
-    wch = (wch >> 8) | (wch << 8);
-    *pStr = wch;
-    ++pStr;
+    uint16_t wch = *pStr;
+    *pStr++ = (wch >> 8) | (wch << 8);
   }
 }
 
 }  // namespace
 
-#define BOM_MASK 0x00FFFFFF
+#define BOM_UTF8_MASK 0x00FFFFFF
 #define BOM_UTF8 0x00BFBBEF
 #define BOM_UTF16_MASK 0x0000FFFF
 #define BOM_UTF16_BE 0x0000FFFE
@@ -133,7 +117,7 @@
   uint32_t bom = 0;
   ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
 
-  bom &= BOM_MASK;
+  bom &= BOM_UTF8_MASK;
   if (bom == BOM_UTF8) {
     m_wBOMLength = 3;
     m_wCodePage = FX_CODEPAGE_UTF8;
@@ -217,12 +201,11 @@
     size_t iBytes = size * 2;
     size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
     size = iLen / 2;
+    if (m_wCodePage == FX_CODEPAGE_UTF16BE)
+      SwapByteOrder(static_cast<uint16_t*>(pStr), size);
+
     if (sizeof(wchar_t) > 2 && size > 0)
       UTF16ToWChar(pStr, size);
-
-    if (m_wCodePage == FX_CODEPAGE_UTF16BE)
-      SwapByteOrder(static_cast<wchar_t*>(pStr), size);
-
   } else {
     FX_FILESIZE pos = GetPosition();
     size_t iBytes = std::min(size, static_cast<size_t>(GetSize() - pos));
diff --git a/core/fxcrt/cfx_seekablestreamproxy_unittest.cpp b/core/fxcrt/cfx_seekablestreamproxy_unittest.cpp
new file mode 100644
index 0000000..b263fa8
--- /dev/null
+++ b/core/fxcrt/cfx_seekablestreamproxy_unittest.cpp
@@ -0,0 +1,70 @@
+// Copyright 2019 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxcrt/cfx_seekablestreamproxy.h"
+
+#include <memory>
+#include <vector>
+
+#include "core/fxcrt/cfx_readonlymemorystream.h"
+#include "core/fxcrt/retain_ptr.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/base/ptr_util.h"
+#include "third_party/base/span.h"
+
+TEST(SeekableStreamProxyTest, NullStream) {
+  auto proxy_stream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
+          pdfium::make_span<const uint8_t>(nullptr, 0)));
+
+  wchar_t buffer[16];
+  EXPECT_EQ(0u, proxy_stream->ReadBlock(buffer, FX_ArraySize(buffer)));
+}
+
+TEST(SeekableStreamProxyTest, DefaultStreamBOMNotRecognized) {
+  const char data[] = "abcd";
+  auto proxy_stream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::make_span(
+          reinterpret_cast<const uint8_t*>(data), sizeof(data) - 1)));
+
+  wchar_t buffer[16];
+  EXPECT_EQ(0u, proxy_stream->ReadBlock(buffer, FX_ArraySize(buffer)));
+}
+
+TEST(SeekableStreamProxyTest, UTF8Stream) {
+  const char data[] = "\xEF\xBB\xBF*\xC2\xA2*";
+  auto proxy_stream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::make_span(
+          reinterpret_cast<const uint8_t*>(data), sizeof(data) - 1)));
+
+  wchar_t buffer[16];
+  EXPECT_EQ(3u, proxy_stream->ReadBlock(buffer, FX_ArraySize(buffer)));
+  EXPECT_EQ(L'*', buffer[0]);
+  EXPECT_EQ(L'\u00A2', buffer[1]);
+  EXPECT_EQ(L'*', buffer[2]);
+}
+
+TEST(SeekableStreamProxyTest, UTF16LEStream) {
+  const char data[] = "\xFF\xFE\x41\x00\x42\x01";
+  auto proxy_stream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::make_span(
+          reinterpret_cast<const uint8_t*>(data), sizeof(data) - 1)));
+
+  wchar_t buffer[16];
+  EXPECT_EQ(2u, proxy_stream->ReadBlock(buffer, FX_ArraySize(buffer)));
+  EXPECT_EQ(L'A', buffer[0]);
+  EXPECT_EQ(L'\u0142', buffer[1]);
+}
+
+TEST(SeekableStreamProxyTest, UTF16BEStream) {
+  const char data[] = "\xFE\xFF\x00\x41\x01\x42";
+  auto proxy_stream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
+      pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(pdfium::make_span(
+          reinterpret_cast<const uint8_t*>(data), sizeof(data) - 1)));
+
+  wchar_t buffer[16];
+  EXPECT_EQ(2u, proxy_stream->ReadBlock(buffer, FX_ArraySize(buffer)));
+  EXPECT_EQ(L'A', buffer[0]);
+  EXPECT_EQ(L'\u0142', buffer[1]);
+}