| // Copyright 2014 The PDFium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| // Original code is licensed as follows: |
| /* |
| * Copyright 2006-2007 Jeremias Maerki. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h" |
| |
| #include <algorithm> |
| #include <array> |
| #include <limits> |
| #include <memory> |
| #include <vector> |
| |
| #include "core/fxcrt/check.h" |
| #include "core/fxcrt/fx_extension.h" |
| #include "fxbarcode/common/BC_CommonBitMatrix.h" |
| #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h" |
| #include "fxbarcode/datamatrix/BC_Base256Encoder.h" |
| #include "fxbarcode/datamatrix/BC_C40Encoder.h" |
| #include "fxbarcode/datamatrix/BC_EdifactEncoder.h" |
| #include "fxbarcode/datamatrix/BC_Encoder.h" |
| #include "fxbarcode/datamatrix/BC_EncoderContext.h" |
| #include "fxbarcode/datamatrix/BC_SymbolInfo.h" |
| #include "fxbarcode/datamatrix/BC_TextEncoder.h" |
| #include "fxbarcode/datamatrix/BC_X12Encoder.h" |
| |
| namespace { |
| |
| const wchar_t kPad = 129; |
| const wchar_t kMacro05 = 236; |
| const wchar_t kMacro06 = 237; |
| const wchar_t kMacro05Header[] = |
| L"[)>\036" |
| L"05"; |
| const wchar_t kMacro06Header[] = |
| L"[)>\036" |
| L"06"; |
| const wchar_t kMacroTrailer = 0x0004; |
| |
| constexpr size_t kEncoderCount = |
| static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1; |
| static_assert(kEncoderCount == 6, "Bad encoder count"); |
| |
| wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) { |
| int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1; |
| int32_t tempVariable = ch + pseudoRandom; |
| return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable) |
| : static_cast<wchar_t>(tempVariable - 254); |
| } |
| |
| int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts, |
| std::array<int32_t, kEncoderCount>* intCharCounts, |
| std::array<uint8_t, kEncoderCount>* mins) { |
| int32_t min = std::numeric_limits<int32_t>::max(); |
| for (size_t i = 0; i < kEncoderCount; ++i) { |
| int32_t current = static_cast<int32_t>(ceil(charCounts[i])); |
| (*intCharCounts)[i] = current; |
| if (min > current) { |
| min = current; |
| for (auto& m : *mins) |
| m = 0; |
| } |
| if (min == current) |
| (*mins)[i]++; |
| } |
| return min; |
| } |
| |
| int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) { |
| int32_t count = 0; |
| for (const auto& m : mins) |
| count += m; |
| return count; |
| } |
| |
| bool IsNativeC40(wchar_t ch) { |
| return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsUpperASCII(ch); |
| } |
| |
| bool IsNativeText(wchar_t ch) { |
| return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsLowerASCII(ch); |
| } |
| |
| bool IsX12TermSep(wchar_t ch) { |
| return (ch == '\r') || (ch == '*') || (ch == '>'); |
| } |
| |
| bool IsNativeX12(wchar_t ch) { |
| return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || |
| FXSYS_IsUpperASCII(ch); |
| } |
| |
| bool IsNativeEDIFACT(wchar_t ch) { |
| return ch >= ' ' && ch <= '^'; |
| } |
| |
| size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) { |
| DCHECK(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN); |
| return static_cast<size_t>(encoding); |
| } |
| |
| } // namespace |
| |
| // static |
| WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) { |
| // Per spec. Alpha numeric input is even shorter. |
| static constexpr size_t kMaxNumericInputLength = 3116; |
| |
| // Exit early if the input is too long. It will fail no matter what. |
| if (msg.GetLength() > kMaxNumericInputLength) |
| return WideString(); |
| |
| CBC_EncoderContext context(msg); |
| if (context.HasCharactersOutsideISO88591Encoding()) |
| return WideString(); |
| |
| if (msg.Back() == kMacroTrailer) { |
| WideString left = msg.First(6); |
| if (left == kMacro05Header) { |
| context.writeCodeword(kMacro05); |
| context.setSkipAtEnd(2); |
| context.m_pos += 6; |
| } else if (left == kMacro06Header) { |
| context.writeCodeword(kMacro06); |
| context.setSkipAtEnd(2); |
| context.m_pos += 6; |
| } |
| } |
| |
| std::vector<std::unique_ptr<CBC_Encoder>> encoders; |
| encoders.push_back(std::make_unique<CBC_ASCIIEncoder>()); |
| encoders.push_back(std::make_unique<CBC_C40Encoder>()); |
| encoders.push_back(std::make_unique<CBC_TextEncoder>()); |
| encoders.push_back(std::make_unique<CBC_X12Encoder>()); |
| encoders.push_back(std::make_unique<CBC_EdifactEncoder>()); |
| encoders.push_back(std::make_unique<CBC_Base256Encoder>()); |
| Encoding encodingMode = Encoding::ASCII; |
| while (context.hasMoreCharacters()) { |
| if (!encoders[EncoderIndex(encodingMode)]->Encode(&context)) |
| return WideString(); |
| |
| if (context.m_newEncoding != Encoding::UNKNOWN) { |
| encodingMode = context.m_newEncoding; |
| context.ResetEncoderSignal(); |
| } |
| } |
| size_t len = context.m_codewords.GetLength(); |
| if (!context.UpdateSymbolInfo()) |
| return WideString(); |
| |
| size_t capacity = context.m_symbolInfo->data_capacity(); |
| if (len < capacity) { |
| if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256) |
| context.writeCodeword(0x00fe); |
| } |
| WideString codewords = context.m_codewords; |
| if (codewords.GetLength() < capacity) |
| codewords += kPad; |
| |
| while (codewords.GetLength() < capacity) |
| codewords += Randomize253State(kPad, codewords.GetLength() + 1); |
| |
| DCHECK(!codewords.IsEmpty()); |
| return codewords; |
| } |
| |
| // static |
| CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest( |
| const WideString& msg, |
| size_t startpos, |
| CBC_HighLevelEncoder::Encoding currentMode) { |
| if (startpos >= msg.GetLength()) |
| return currentMode; |
| |
| std::array<float, kEncoderCount> charCounts; |
| if (currentMode == Encoding::ASCII) { |
| charCounts = {0, 1, 1, 1, 1, 1.25f}; |
| } else { |
| charCounts = {1, 2, 2, 2, 2, 2.25f}; |
| charCounts[EncoderIndex(currentMode)] = 0; |
| } |
| |
| size_t charsProcessed = 0; |
| while (true) { |
| if ((startpos + charsProcessed) == msg.GetLength()) { |
| std::array<int32_t, kEncoderCount> intCharCounts; |
| std::array<uint8_t, kEncoderCount> mins; |
| int32_t min = FindMinimums(charCounts, &intCharCounts, &mins); |
| if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min) |
| return Encoding::ASCII; |
| const int32_t minCount = GetMinimumCount(mins); |
| if (minCount == 1) { |
| if (mins[EncoderIndex(Encoding::BASE256)] > 0) |
| return Encoding::BASE256; |
| if (mins[EncoderIndex(Encoding::EDIFACT)] > 0) |
| return Encoding::EDIFACT; |
| if (mins[EncoderIndex(Encoding::TEXT)] > 0) |
| return Encoding::TEXT; |
| if (mins[EncoderIndex(Encoding::X12)] > 0) |
| return Encoding::X12; |
| } |
| return Encoding::C40; |
| } |
| |
| wchar_t c = msg[startpos + charsProcessed]; |
| charsProcessed++; |
| { |
| auto& count = charCounts[EncoderIndex(Encoding::ASCII)]; |
| if (FXSYS_IsDecimalDigit(c)) |
| count += 0.5; |
| else if (IsExtendedASCII(c)) |
| count = ceilf(count) + 2; |
| else |
| count = ceilf(count) + 1; |
| } |
| |
| { |
| auto& count = charCounts[EncoderIndex(Encoding::C40)]; |
| if (IsNativeC40(c)) |
| count += 2.0f / 3.0f; |
| else if (IsExtendedASCII(c)) |
| count += 8.0f / 3.0f; |
| else |
| count += 4.0f / 3.0f; |
| } |
| |
| { |
| auto& count = charCounts[EncoderIndex(Encoding::TEXT)]; |
| if (IsNativeText(c)) |
| count += 2.0f / 3.0f; |
| else if (IsExtendedASCII(c)) |
| count += 8.0f / 3.0f; |
| else |
| count += 4.0f / 3.0f; |
| } |
| |
| { |
| auto& count = charCounts[EncoderIndex(Encoding::X12)]; |
| if (IsNativeX12(c)) |
| count += 2.0f / 3.0f; |
| else if (IsExtendedASCII(c)) |
| count += 13.0f / 3.0f; |
| else |
| count += 10.0f / 3.0f; |
| } |
| |
| { |
| auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)]; |
| if (IsNativeEDIFACT(c)) |
| count += 3.0f / 4.0f; |
| else if (IsExtendedASCII(c)) |
| count += 17.0f / 4.0f; |
| else |
| count += 13.0f / 4.0f; |
| } |
| |
| charCounts[EncoderIndex(Encoding::BASE256)]++; |
| if (charsProcessed < 4) |
| continue; |
| |
| std::array<int32_t, kEncoderCount> intCharCounts; |
| std::array<uint8_t, kEncoderCount> mins; |
| FindMinimums(charCounts, &intCharCounts, &mins); |
| int32_t minCount = GetMinimumCount(mins); |
| int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)]; |
| int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)]; |
| int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)]; |
| int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)]; |
| int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)]; |
| int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)]; |
| int32_t bet_min = std::min({base256_count, editfact_count, text_count}); |
| if (ascii_count < bet_min && ascii_count < c40_count && |
| ascii_count < x12_count) { |
| return Encoding::ASCII; |
| } |
| if (base256_count < ascii_count || |
| (mins[EncoderIndex(Encoding::C40)] + |
| mins[EncoderIndex(Encoding::TEXT)] + |
| mins[EncoderIndex(Encoding::X12)] + |
| mins[EncoderIndex(Encoding::EDIFACT)]) == 0) { |
| return Encoding::BASE256; |
| } |
| if (minCount == 1) { |
| if (mins[EncoderIndex(Encoding::EDIFACT)] > 0) |
| return Encoding::EDIFACT; |
| if (mins[EncoderIndex(Encoding::TEXT)] > 0) |
| return Encoding::TEXT; |
| if (mins[EncoderIndex(Encoding::X12)] > 0) |
| return Encoding::X12; |
| } |
| if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) { |
| if (c40_count < x12_count) |
| return Encoding::C40; |
| if (c40_count == x12_count) { |
| size_t p = startpos + charsProcessed + 1; |
| while (p < msg.GetLength()) { |
| wchar_t tc = msg[p]; |
| if (IsX12TermSep(tc)) |
| return Encoding::X12; |
| if (!IsNativeX12(tc)) |
| break; |
| p++; |
| } |
| return Encoding::C40; |
| } |
| } |
| } |
| } |
| |
| // static |
| bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) { |
| return ch >= 128 && ch <= 255; |
| } |