blob: f96d01f20a5e90d2e0f8a8282abed8fa8f592e48 [file] [log] [blame]
// Copright 2014 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
#include <algorithm>
#include "core/fxcrt/fx_extension.h"
#include "third_party/icu/source/common/unicode/uchar.h"
namespace {
bool IsFormCalcCharacter(wchar_t c) {
return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
(c >= 0xE000 && c <= 0xFFFD);
}
bool IsIdentifierCharacter(wchar_t c) {
return u_isalnum(c) || c == 0x005F || // '_'
c == 0x0024; // '$'
}
bool IsInitialIdentifierCharacter(wchar_t c) {
return u_isalpha(c) || c == 0x005F || // '_'
c == 0x0024 || // '$'
c == 0x0021; // '!'
}
bool IsWhitespaceCharacter(wchar_t c) {
return c == 0x0009 || // Horizontal tab
c == 0x000B || // Vertical tab
c == 0x000C || // Form feed
c == 0x0020; // Space
}
const XFA_FMKeyword keyWords[] = {
{TOKdo, L"do"},
{TOKkseq, L"eq"},
{TOKksge, L"ge"},
{TOKksgt, L"gt"},
{TOKif, L"if"},
{TOKin, L"in"},
{TOKksle, L"le"},
{TOKkslt, L"lt"},
{TOKksne, L"ne"},
{TOKksor, L"or"},
{TOKnull, L"null"},
{TOKbreak, L"break"},
{TOKksand, L"and"},
{TOKend, L"end"},
{TOKeof, L"eof"},
{TOKfor, L"for"},
{TOKnan, L"nan"},
{TOKksnot, L"not"},
{TOKvar, L"var"},
{TOKthen, L"then"},
{TOKelse, L"else"},
{TOKexit, L"exit"},
{TOKdownto, L"downto"},
{TOKreturn, L"return"},
{TOKinfinity, L"infinity"},
{TOKendwhile, L"endwhile"},
{TOKforeach, L"foreach"},
{TOKendfunc, L"endfunc"},
{TOKelseif, L"elseif"},
{TOKwhile, L"while"},
{TOKendfor, L"endfor"},
{TOKthrow, L"throw"},
{TOKstep, L"step"},
{TOKupto, L"upto"},
{TOKcontinue, L"continue"},
{TOKfunc, L"func"},
{TOKendif, L"endif"},
};
#ifndef NDEBUG
const wchar_t* const tokenStrings[] = {
L"TOKand", L"TOKlparen", L"TOKrparen", L"TOKmul",
L"TOKplus", L"TOKcomma", L"TOKminus", L"TOKdot",
L"TOKdiv", L"TOKlt", L"TOKassign", L"TOKgt",
L"TOKlbracket", L"TOKrbracket", L"TOKor", L"TOKdotscream",
L"TOKdotstar", L"TOKdotdot", L"TOKle", L"TOKne",
L"TOKeq", L"TOKge", L"TOKdo", L"TOKkseq",
L"TOKksge", L"TOKksgt", L"TOKif", L"TOKin",
L"TOKksle", L"TOKkslt", L"TOKksne", L"TOKksor",
L"TOKnull", L"TOKbreak", L"TOKksand", L"TOKend",
L"TOKeof", L"TOKfor", L"TOKnan", L"TOKksnot",
L"TOKvar", L"TOKthen", L"TOKelse", L"TOKexit",
L"TOKdownto", L"TOKreturn", L"TOKinfinity", L"TOKendwhile",
L"TOKforeach", L"TOKendfunc", L"TOKelseif", L"TOKwhile",
L"TOKendfor", L"TOKthrow", L"TOKstep", L"TOKupto",
L"TOKcontinue", L"TOKfunc", L"TOKendif", L"TOKstar",
L"TOKidentifier", L"TOKunderscore", L"TOKdollar", L"TOKexclamation",
L"TOKcall", L"TOKstring", L"TOKnumber", L"TOKreserver",
};
#endif // NDEBUG
XFA_FM_TOKEN TokenizeIdentifier(const WideStringView& str) {
const XFA_FMKeyword* result = std::find_if(
std::begin(keyWords), std::end(keyWords),
[&str](const XFA_FMKeyword& iter) { return iter.m_keyword == str; });
if (result != std::end(keyWords) && result->m_keyword == str)
return result->m_type;
return TOKidentifier;
}
} // namespace
CXFA_FMToken::CXFA_FMToken(XFA_FM_TOKEN token) : m_type(token) {}
CXFA_FMToken::CXFA_FMToken() : CXFA_FMToken(TOKreserver) {}
CXFA_FMToken::CXFA_FMToken(const CXFA_FMToken&) = default;
CXFA_FMToken::~CXFA_FMToken() = default;
#ifndef NDEBUG
WideString CXFA_FMToken::ToDebugString() const {
WideString str(L"type = ");
str += tokenStrings[m_type];
str += L", string = ";
str += m_string;
return str;
}
#endif // NDEBUG
CXFA_FMLexer::CXFA_FMLexer(const WideStringView& wsFormCalc)
: m_cursor(wsFormCalc.unterminated_c_str()),
m_end(m_cursor + wsFormCalc.GetLength()),
m_lexer_error(false) {}
CXFA_FMLexer::~CXFA_FMLexer() = default;
CXFA_FMToken CXFA_FMLexer::NextToken() {
if (m_lexer_error)
return CXFA_FMToken();
while (!IsComplete() && *m_cursor) {
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
switch (*m_cursor) {
case '\n':
++m_cursor;
break;
case '\r':
++m_cursor;
break;
case ';':
AdvanceForComment();
break;
case '"':
return AdvanceForString();
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return AdvanceForNumber();
case '=':
++m_cursor;
if (m_cursor >= m_end)
return CXFA_FMToken(TOKassign);
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (*m_cursor == '=') {
++m_cursor;
return CXFA_FMToken(TOKeq);
}
return CXFA_FMToken(TOKassign);
case '<':
++m_cursor;
if (m_cursor >= m_end)
return CXFA_FMToken(TOKlt);
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (*m_cursor == '=') {
++m_cursor;
return CXFA_FMToken(TOKle);
}
if (*m_cursor == '>') {
++m_cursor;
return CXFA_FMToken(TOKne);
}
return CXFA_FMToken(TOKlt);
case '>':
++m_cursor;
if (m_cursor >= m_end)
return CXFA_FMToken(TOKgt);
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (*m_cursor == '=') {
++m_cursor;
return CXFA_FMToken(TOKge);
}
return CXFA_FMToken(TOKgt);
case ',':
++m_cursor;
return CXFA_FMToken(TOKcomma);
case '(':
++m_cursor;
return CXFA_FMToken(TOKlparen);
case ')':
++m_cursor;
return CXFA_FMToken(TOKrparen);
case '[':
++m_cursor;
return CXFA_FMToken(TOKlbracket);
case ']':
++m_cursor;
return CXFA_FMToken(TOKrbracket);
case '&':
++m_cursor;
return CXFA_FMToken(TOKand);
case '|':
++m_cursor;
return CXFA_FMToken(TOKor);
case '+':
++m_cursor;
return CXFA_FMToken(TOKplus);
case '-':
++m_cursor;
return CXFA_FMToken(TOKminus);
case '*':
++m_cursor;
return CXFA_FMToken(TOKmul);
case '/': {
++m_cursor;
if (m_cursor >= m_end)
return CXFA_FMToken(TOKdiv);
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (*m_cursor != '/')
return CXFA_FMToken(TOKdiv);
AdvanceForComment();
break;
}
case '.':
++m_cursor;
if (m_cursor >= m_end)
return CXFA_FMToken(TOKdot);
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (*m_cursor == '.') {
++m_cursor;
return CXFA_FMToken(TOKdotdot);
}
if (*m_cursor == '*') {
++m_cursor;
return CXFA_FMToken(TOKdotstar);
}
if (*m_cursor == '#') {
++m_cursor;
return CXFA_FMToken(TOKdotscream);
}
if (FXSYS_IsDecimalDigit(*m_cursor)) {
--m_cursor;
return AdvanceForNumber();
}
return CXFA_FMToken(TOKdot);
default:
if (IsWhitespaceCharacter(*m_cursor)) {
++m_cursor;
break;
}
if (!IsInitialIdentifierCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
return AdvanceForIdentifier();
}
}
return CXFA_FMToken(TOKeof);
}
CXFA_FMToken CXFA_FMLexer::AdvanceForNumber() {
// This will set end to the character after the end of the number.
int32_t used_length = 0;
if (m_cursor)
FXSYS_wcstof(m_cursor, m_end - m_cursor, &used_length);
const wchar_t* end = m_cursor + used_length;
if (used_length == 0 || !end || FXSYS_iswalpha(*end)) {
RaiseError();
return CXFA_FMToken();
}
CXFA_FMToken token(TOKnumber);
token.m_string =
WideStringView(m_cursor, static_cast<size_t>(end - m_cursor));
m_cursor = end;
return token;
}
CXFA_FMToken CXFA_FMLexer::AdvanceForString() {
CXFA_FMToken token(TOKstring);
const wchar_t* start = m_cursor;
++m_cursor;
while (!IsComplete() && *m_cursor) {
if (!IsFormCalcCharacter(*m_cursor))
break;
if (*m_cursor == '"') {
// Check for escaped "s, i.e. "".
++m_cursor;
// If the end of the input has been reached it was not escaped.
if (m_cursor >= m_end) {
token.m_string =
WideStringView(start, static_cast<size_t>(m_cursor - start));
return token;
}
// If the next character is not a " then the end of the string has been
// found.
if (*m_cursor != '"') {
if (!IsFormCalcCharacter(*m_cursor))
break;
token.m_string = WideStringView(start, (m_cursor - start));
return token;
}
}
++m_cursor;
}
// Didn't find the end of the string.
RaiseError();
return CXFA_FMToken();
}
CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() {
const wchar_t* start = m_cursor;
++m_cursor;
while (!IsComplete() && *m_cursor) {
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return CXFA_FMToken();
}
if (!IsIdentifierCharacter(*m_cursor))
break;
++m_cursor;
}
WideStringView str =
WideStringView(start, static_cast<size_t>(m_cursor - start));
CXFA_FMToken token(TokenizeIdentifier(str));
token.m_string = str;
return token;
}
void CXFA_FMLexer::AdvanceForComment() {
m_cursor++;
while (!IsComplete() && *m_cursor) {
if (!IsFormCalcCharacter(*m_cursor)) {
RaiseError();
return;
}
if (*m_cursor == L'\r') {
++m_cursor;
return;
}
if (*m_cursor == L'\n') {
++m_cursor;
return;
}
++m_cursor;
}
}