| // Copyright 2014 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "xfa/fxfa/fm2js/cxfa_fmlexer.h" |
| |
| #include <algorithm> |
| |
| #include "core/fxcrt/fx_extension.h" |
| |
| namespace { |
| |
| bool IsFormCalcCharacter(wchar_t c) { |
| return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) || |
| (c >= 0xE000 && c <= 0xFFFD); |
| } |
| |
| bool IsIdentifierCharacter(wchar_t c) { |
| return FXSYS_iswalnum(c) || c == 0x005F || // '_' |
| c == 0x0024; // '$' |
| } |
| |
| bool IsInitialIdentifierCharacter(wchar_t c) { |
| return FXSYS_iswalpha(c) || c == 0x005F || // '_' |
| c == 0x0024 || // '$' |
| c == 0x0021; // '!' |
| } |
| |
| bool IsWhitespaceCharacter(wchar_t c) { |
| return c == 0x0009 || // Horizontal tab |
| c == 0x000B || // Vertical tab |
| c == 0x000C || // Form feed |
| c == 0x0020; // Space |
| } |
| |
| const XFA_FMKeyword keyWords[] = { |
| {TOKdo, "do"}, |
| {TOKkseq, "eq"}, |
| {TOKksge, "ge"}, |
| {TOKksgt, "gt"}, |
| {TOKif, "if"}, |
| {TOKin, "in"}, |
| {TOKksle, "le"}, |
| {TOKkslt, "lt"}, |
| {TOKksne, "ne"}, |
| {TOKksor, "or"}, |
| {TOKnull, "null"}, |
| {TOKbreak, "break"}, |
| {TOKksand, "and"}, |
| {TOKend, "end"}, |
| {TOKeof, "eof"}, |
| {TOKfor, "for"}, |
| {TOKnan, "nan"}, |
| {TOKksnot, "not"}, |
| {TOKvar, "var"}, |
| {TOKthen, "then"}, |
| {TOKelse, "else"}, |
| {TOKexit, "exit"}, |
| {TOKdownto, "downto"}, |
| {TOKreturn, "return"}, |
| {TOKinfinity, "infinity"}, |
| {TOKendwhile, "endwhile"}, |
| {TOKforeach, "foreach"}, |
| {TOKendfunc, "endfunc"}, |
| {TOKelseif, "elseif"}, |
| {TOKwhile, "while"}, |
| {TOKendfor, "endfor"}, |
| {TOKthrow, "throw"}, |
| {TOKstep, "step"}, |
| {TOKupto, "upto"}, |
| {TOKcontinue, "continue"}, |
| {TOKfunc, "func"}, |
| {TOKendif, "endif"}, |
| }; |
| |
| #ifndef NDEBUG |
| const char* const tokenStrings[] = { |
| "TOKand", "TOKlparen", "TOKrparen", "TOKmul", |
| "TOKplus", "TOKcomma", "TOKminus", "TOKdot", |
| "TOKdiv", "TOKlt", "TOKassign", "TOKgt", |
| "TOKlbracket", "TOKrbracket", "TOKor", "TOKdotscream", |
| "TOKdotstar", "TOKdotdot", "TOKle", "TOKne", |
| "TOKeq", "TOKge", "TOKdo", "TOKkseq", |
| "TOKksge", "TOKksgt", "TOKif", "TOKin", |
| "TOKksle", "TOKkslt", "TOKksne", "TOKksor", |
| "TOKnull", "TOKbreak", "TOKksand", "TOKend", |
| "TOKeof", "TOKfor", "TOKnan", "TOKksnot", |
| "TOKvar", "TOKthen", "TOKelse", "TOKexit", |
| "TOKdownto", "TOKreturn", "TOKinfinity", "TOKendwhile", |
| "TOKforeach", "TOKendfunc", "TOKelseif", "TOKwhile", |
| "TOKendfor", "TOKthrow", "TOKstep", "TOKupto", |
| "TOKcontinue", "TOKfunc", "TOKendif", "TOKstar", |
| "TOKidentifier", "TOKunderscore", "TOKdollar", "TOKexclamation", |
| "TOKcall", "TOKstring", "TOKnumber", "TOKreserver", |
| }; |
| #endif // NDEBUG |
| |
| XFA_FM_TOKEN TokenizeIdentifier(WideStringView str) { |
| const XFA_FMKeyword* result = |
| std::find_if(std::begin(keyWords), std::end(keyWords), |
| [str](const XFA_FMKeyword& iter) { |
| return str.EqualsASCII(iter.m_keyword); |
| }); |
| if (result != std::end(keyWords) && str.EqualsASCII(result->m_keyword)) |
| return result->m_type; |
| return TOKidentifier; |
| } |
| |
| } // namespace |
| |
| CXFA_FMToken::CXFA_FMToken(XFA_FM_TOKEN token) : m_type(token) {} |
| |
| CXFA_FMToken::CXFA_FMToken() : CXFA_FMToken(TOKreserver) {} |
| |
| CXFA_FMToken::CXFA_FMToken(const CXFA_FMToken&) = default; |
| |
| CXFA_FMToken::~CXFA_FMToken() = default; |
| |
| #ifndef NDEBUG |
| WideString CXFA_FMToken::ToDebugString() const { |
| WideString str = WideString::FromASCII("type = "); |
| str += WideString::FromASCII(tokenStrings[m_type]); |
| str += WideString::FromASCII(", string = "); |
| str += m_string; |
| return str; |
| } |
| #endif // NDEBUG |
| |
| CXFA_FMLexer::CXFA_FMLexer(WideStringView wsFormCalc) |
| : m_spInput(wsFormCalc.span()) {} |
| |
| CXFA_FMLexer::~CXFA_FMLexer() = default; |
| |
| CXFA_FMToken CXFA_FMLexer::NextToken() { |
| if (m_bLexerError) |
| return CXFA_FMToken(); |
| |
| while (!IsComplete() && m_spInput[m_nCursor]) { |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| |
| switch (m_spInput[m_nCursor]) { |
| case '\n': |
| ++m_nCursor; |
| break; |
| case '\r': |
| ++m_nCursor; |
| break; |
| case ';': |
| AdvanceForComment(); |
| break; |
| case '"': |
| return AdvanceForString(); |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| return AdvanceForNumber(); |
| case '=': |
| ++m_nCursor; |
| if (m_nCursor >= m_spInput.size()) |
| return CXFA_FMToken(TOKassign); |
| |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| if (m_spInput[m_nCursor] == '=') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKeq); |
| } |
| return CXFA_FMToken(TOKassign); |
| case '<': |
| ++m_nCursor; |
| if (m_nCursor >= m_spInput.size()) |
| return CXFA_FMToken(TOKlt); |
| |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| if (m_spInput[m_nCursor] == '=') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKle); |
| } |
| if (m_spInput[m_nCursor] == '>') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKne); |
| } |
| return CXFA_FMToken(TOKlt); |
| case '>': |
| ++m_nCursor; |
| if (m_nCursor >= m_spInput.size()) |
| return CXFA_FMToken(TOKgt); |
| |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| if (m_spInput[m_nCursor] == '=') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKge); |
| } |
| return CXFA_FMToken(TOKgt); |
| case ',': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKcomma); |
| case '(': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKlparen); |
| case ')': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKrparen); |
| case '[': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKlbracket); |
| case ']': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKrbracket); |
| case '&': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKand); |
| case '|': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKor); |
| case '+': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKplus); |
| case '-': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKminus); |
| case '*': |
| ++m_nCursor; |
| return CXFA_FMToken(TOKmul); |
| case '/': { |
| ++m_nCursor; |
| if (m_nCursor >= m_spInput.size()) |
| return CXFA_FMToken(TOKdiv); |
| |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| if (m_spInput[m_nCursor] != '/') |
| return CXFA_FMToken(TOKdiv); |
| |
| AdvanceForComment(); |
| break; |
| } |
| case '.': |
| ++m_nCursor; |
| if (m_nCursor >= m_spInput.size()) |
| return CXFA_FMToken(TOKdot); |
| |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| |
| if (m_spInput[m_nCursor] == '.') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKdotdot); |
| } |
| if (m_spInput[m_nCursor] == '*') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKdotstar); |
| } |
| if (m_spInput[m_nCursor] == '#') { |
| ++m_nCursor; |
| return CXFA_FMToken(TOKdotscream); |
| } |
| if (FXSYS_IsDecimalDigit(m_spInput[m_nCursor])) { |
| --m_nCursor; |
| return AdvanceForNumber(); |
| } |
| return CXFA_FMToken(TOKdot); |
| default: |
| if (IsWhitespaceCharacter(m_spInput[m_nCursor])) { |
| ++m_nCursor; |
| break; |
| } |
| if (!IsInitialIdentifierCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| return AdvanceForIdentifier(); |
| } |
| } |
| return CXFA_FMToken(TOKeof); |
| } |
| |
| CXFA_FMToken CXFA_FMLexer::AdvanceForNumber() { |
| // This will set end to the character after the end of the number. |
| int32_t used_length = 0; |
| if (m_nCursor < m_spInput.size()) { |
| FXSYS_wcstof(&m_spInput[m_nCursor], m_spInput.size() - m_nCursor, |
| &used_length); |
| } |
| size_t end = m_nCursor + used_length; |
| if (used_length == 0 || |
| (end < m_spInput.size() && FXSYS_iswalpha(m_spInput[end]))) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| CXFA_FMToken token(TOKnumber); |
| token.m_string = |
| WideStringView(m_spInput.subspan(m_nCursor, end - m_nCursor)); |
| m_nCursor = end; |
| return token; |
| } |
| |
| CXFA_FMToken CXFA_FMLexer::AdvanceForString() { |
| CXFA_FMToken token(TOKstring); |
| size_t start = m_nCursor; |
| ++m_nCursor; |
| while (!IsComplete() && m_spInput[m_nCursor]) { |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) |
| break; |
| |
| if (m_spInput[m_nCursor] == '"') { |
| // Check for escaped "s, i.e. "". |
| ++m_nCursor; |
| // If the end of the input has been reached it was not escaped. |
| if (m_nCursor >= m_spInput.size()) { |
| token.m_string = |
| WideStringView(m_spInput.subspan(start, m_nCursor - start)); |
| return token; |
| } |
| // If the next character is not a " then the end of the string has been |
| // found. |
| if (m_spInput[m_nCursor] != '"') { |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) |
| break; |
| |
| token.m_string = |
| WideStringView(m_spInput.subspan(start, m_nCursor - start)); |
| return token; |
| } |
| } |
| ++m_nCursor; |
| } |
| |
| // Didn't find the end of the string. |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| |
| CXFA_FMToken CXFA_FMLexer::AdvanceForIdentifier() { |
| size_t start = m_nCursor; |
| ++m_nCursor; |
| while (!IsComplete() && m_spInput[m_nCursor]) { |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return CXFA_FMToken(); |
| } |
| if (!IsIdentifierCharacter(m_spInput[m_nCursor])) |
| break; |
| |
| ++m_nCursor; |
| } |
| |
| WideStringView str = |
| WideStringView(m_spInput.subspan(start, m_nCursor - start)); |
| CXFA_FMToken token(TokenizeIdentifier(str)); |
| token.m_string = str; |
| return token; |
| } |
| |
| void CXFA_FMLexer::AdvanceForComment() { |
| ++m_nCursor; |
| while (!IsComplete() && m_spInput[m_nCursor]) { |
| if (!IsFormCalcCharacter(m_spInput[m_nCursor])) { |
| RaiseError(); |
| return; |
| } |
| if (m_spInput[m_nCursor] == L'\r') { |
| ++m_nCursor; |
| return; |
| } |
| if (m_spInput[m_nCursor] == L'\n') { |
| ++m_nCursor; |
| return; |
| } |
| ++m_nCursor; |
| } |
| } |