You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
613 lines
13 KiB
613 lines
13 KiB
/* |
|
* ***************************************************************************** |
|
* |
|
* SPDX-License-Identifier: BSD-2-Clause |
|
* |
|
* Copyright (c) 2018-2021 Gavin D. Howard and contributors. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions are met: |
|
* |
|
* * Redistributions of source code must retain the above copyright notice, this |
|
* list of conditions and the following disclaimer. |
|
* |
|
* * Redistributions in binary form must reproduce the above copyright notice, |
|
* this list of conditions and the following disclaimer in the documentation |
|
* and/or other materials provided with the distribution. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
* POSSIBILITY OF SUCH DAMAGE. |
|
* |
|
* ***************************************************************************** |
|
* |
|
* Definitions for bc's lexer. |
|
* |
|
*/ |
|
|
|
#ifndef BC_LEX_H |
|
#define BC_LEX_H |
|
|
|
#include <stdbool.h> |
|
#include <stddef.h> |
|
|
|
#include <status.h> |
|
#include <vector.h> |
|
#include <lang.h> |
|
|
|
// Two convencience macros for throwing errors in lex code. They take care of |
|
// plumbing like passing in the current line the lexer is on. |
|
#define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) |
|
#define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) |
|
|
|
// BC_LEX_NEG_CHAR returns the char that corresponds to negative for the |
|
// current calculator. |
|
// |
|
// BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid |
|
// char for numbers. In bc and dc, capital letters are part of numbers, to a |
|
// point. (dc only goes up to hex, so its last valid char is 'F'.) |
|
#if BC_ENABLED |
|
|
|
#if DC_ENABLED |
|
#define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') |
|
#define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') |
|
#else // DC_ENABLED |
|
#define BC_LEX_NEG_CHAR ('-') |
|
#define BC_LEX_LAST_NUM_CHAR ('Z') |
|
#endif // DC_ENABLED |
|
|
|
#else // BC_ENABLED |
|
|
|
#define BC_LEX_NEG_CHAR ('_') |
|
#define BC_LEX_LAST_NUM_CHAR ('F') |
|
|
|
#endif // BC_ENABLED |
|
|
|
/** |
|
* Returns true if c is a valid number character. |
|
* @param c The char to check. |
|
* @param pt If a decimal point has already been seen. |
|
* @param int_only True if the number is expected to be an int only, false if |
|
* non-integers are allowed. |
|
* @return True if @a c is a valid number character. |
|
*/ |
|
#define BC_LEX_NUM_CHAR(c, pt, int_only) \ |
|
(isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ |
|
((c) == '.' && !(pt) && !(int_only))) |
|
|
|
/// An enum of lex token types. |
|
typedef enum BcLexType |
|
{ |
|
/// End of file. |
|
BC_LEX_EOF, |
|
|
|
/// Marker for invalid tokens, used by bc and dc for const data. |
|
BC_LEX_INVALID, |
|
|
|
#if BC_ENABLED |
|
|
|
/// Increment operator. |
|
BC_LEX_OP_INC, |
|
|
|
/// Decrement operator. |
|
BC_LEX_OP_DEC, |
|
|
|
#endif // BC_ENABLED |
|
|
|
/// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer |
|
/// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be |
|
/// able to distinguish them. |
|
BC_LEX_NEG, |
|
|
|
/// Boolean not. |
|
BC_LEX_OP_BOOL_NOT, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// Truncation operator. |
|
BC_LEX_OP_TRUNC, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// Power operator. |
|
BC_LEX_OP_POWER, |
|
|
|
/// Multiplication operator. |
|
BC_LEX_OP_MULTIPLY, |
|
|
|
/// Division operator. |
|
BC_LEX_OP_DIVIDE, |
|
|
|
/// Modulus operator. |
|
BC_LEX_OP_MODULUS, |
|
|
|
/// Addition operator. |
|
BC_LEX_OP_PLUS, |
|
|
|
/// Subtraction operator. |
|
BC_LEX_OP_MINUS, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// Places (truncate or extend) operator. |
|
BC_LEX_OP_PLACES, |
|
|
|
/// Left (decimal) shift operator. |
|
BC_LEX_OP_LSHIFT, |
|
|
|
/// Right (decimal) shift operator. |
|
BC_LEX_OP_RSHIFT, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// Equal operator. |
|
BC_LEX_OP_REL_EQ, |
|
|
|
/// Less than or equal operator. |
|
BC_LEX_OP_REL_LE, |
|
|
|
/// Greater than or equal operator. |
|
BC_LEX_OP_REL_GE, |
|
|
|
/// Not equal operator. |
|
BC_LEX_OP_REL_NE, |
|
|
|
/// Less than operator. |
|
BC_LEX_OP_REL_LT, |
|
|
|
/// Greater than operator. |
|
BC_LEX_OP_REL_GT, |
|
|
|
/// Boolean or operator. |
|
BC_LEX_OP_BOOL_OR, |
|
|
|
/// Boolean and operator. |
|
BC_LEX_OP_BOOL_AND, |
|
|
|
#if BC_ENABLED |
|
|
|
/// Power assignment operator. |
|
BC_LEX_OP_ASSIGN_POWER, |
|
|
|
/// Multiplication assignment operator. |
|
BC_LEX_OP_ASSIGN_MULTIPLY, |
|
|
|
/// Division assignment operator. |
|
BC_LEX_OP_ASSIGN_DIVIDE, |
|
|
|
/// Modulus assignment operator. |
|
BC_LEX_OP_ASSIGN_MODULUS, |
|
|
|
/// Addition assignment operator. |
|
BC_LEX_OP_ASSIGN_PLUS, |
|
|
|
/// Subtraction assignment operator. |
|
BC_LEX_OP_ASSIGN_MINUS, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// Places (truncate or extend) assignment operator. |
|
BC_LEX_OP_ASSIGN_PLACES, |
|
|
|
/// Left (decimal) shift assignment operator. |
|
BC_LEX_OP_ASSIGN_LSHIFT, |
|
|
|
/// Right (decimal) shift assignment operator. |
|
BC_LEX_OP_ASSIGN_RSHIFT, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
#endif // BC_ENABLED |
|
|
|
/// Assignment operator. |
|
BC_LEX_OP_ASSIGN, |
|
|
|
/// Newline. |
|
BC_LEX_NLINE, |
|
|
|
/// Whitespace. |
|
BC_LEX_WHITESPACE, |
|
|
|
/// Left parenthesis. |
|
BC_LEX_LPAREN, |
|
|
|
/// Right parenthesis. |
|
BC_LEX_RPAREN, |
|
|
|
/// Left bracket. |
|
BC_LEX_LBRACKET, |
|
|
|
/// Comma. |
|
BC_LEX_COMMA, |
|
|
|
/// Right bracket. |
|
BC_LEX_RBRACKET, |
|
|
|
/// Left brace. |
|
BC_LEX_LBRACE, |
|
|
|
/// Semicolon. |
|
BC_LEX_SCOLON, |
|
|
|
/// Right brace. |
|
BC_LEX_RBRACE, |
|
|
|
/// String. |
|
BC_LEX_STR, |
|
|
|
/// Identifier/name. |
|
BC_LEX_NAME, |
|
|
|
/// Constant number. |
|
BC_LEX_NUMBER, |
|
|
|
// These keywords are in the order they are in for a reason. Don't change |
|
// the order unless you want a bunch of weird failures in the test suite. |
|
// In fact, almost all of these tokens are in a specific order for a reason. |
|
|
|
#if BC_ENABLED |
|
|
|
/// bc auto keyword. |
|
BC_LEX_KW_AUTO, |
|
|
|
/// bc break keyword. |
|
BC_LEX_KW_BREAK, |
|
|
|
/// bc continue keyword. |
|
BC_LEX_KW_CONTINUE, |
|
|
|
/// bc define keyword. |
|
BC_LEX_KW_DEFINE, |
|
|
|
/// bc for keyword. |
|
BC_LEX_KW_FOR, |
|
|
|
/// bc if keyword. |
|
BC_LEX_KW_IF, |
|
|
|
/// bc limits keyword. |
|
BC_LEX_KW_LIMITS, |
|
|
|
/// bc return keyword. |
|
BC_LEX_KW_RETURN, |
|
|
|
/// bc while keyword. |
|
BC_LEX_KW_WHILE, |
|
|
|
/// bc halt keyword. |
|
BC_LEX_KW_HALT, |
|
|
|
/// bc last keyword. |
|
BC_LEX_KW_LAST, |
|
|
|
#endif // BC_ENABLED |
|
|
|
/// bc ibase keyword. |
|
BC_LEX_KW_IBASE, |
|
|
|
/// bc obase keyword. |
|
BC_LEX_KW_OBASE, |
|
|
|
/// bc scale keyword. |
|
BC_LEX_KW_SCALE, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc seed keyword. |
|
BC_LEX_KW_SEED, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc length keyword. |
|
BC_LEX_KW_LENGTH, |
|
|
|
/// bc print keyword. |
|
BC_LEX_KW_PRINT, |
|
|
|
/// bc sqrt keyword. |
|
BC_LEX_KW_SQRT, |
|
|
|
/// bc abs keyword. |
|
BC_LEX_KW_ABS, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc irand keyword. |
|
BC_LEX_KW_IRAND, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc asciffy keyword. |
|
BC_LEX_KW_ASCIIFY, |
|
|
|
/// bc modexp keyword. |
|
BC_LEX_KW_MODEXP, |
|
|
|
/// bc divmod keyword. |
|
BC_LEX_KW_DIVMOD, |
|
|
|
/// bc quit keyword. |
|
BC_LEX_KW_QUIT, |
|
|
|
/// bc read keyword. |
|
BC_LEX_KW_READ, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc rand keyword. |
|
BC_LEX_KW_RAND, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc maxibase keyword. |
|
BC_LEX_KW_MAXIBASE, |
|
|
|
/// bc maxobase keyword. |
|
BC_LEX_KW_MAXOBASE, |
|
|
|
/// bc maxscale keyword. |
|
BC_LEX_KW_MAXSCALE, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc maxrand keyword. |
|
BC_LEX_KW_MAXRAND, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// bc line_length keyword. |
|
BC_LEX_KW_LINE_LENGTH, |
|
|
|
#if BC_ENABLED |
|
|
|
/// bc global_stacks keyword. |
|
BC_LEX_KW_GLOBAL_STACKS, |
|
|
|
#endif // BC_ENABLED |
|
|
|
/// bc leading_zero keyword. |
|
BC_LEX_KW_LEADING_ZERO, |
|
|
|
/// bc stream keyword. |
|
BC_LEX_KW_STREAM, |
|
|
|
/// bc else keyword. |
|
BC_LEX_KW_ELSE, |
|
|
|
#if DC_ENABLED |
|
|
|
/// A special token for dc to calculate equal without a register. |
|
BC_LEX_EQ_NO_REG, |
|
|
|
/// Colon (array) operator. |
|
BC_LEX_COLON, |
|
|
|
/// Execute command. |
|
BC_LEX_EXECUTE, |
|
|
|
/// Print stack command. |
|
BC_LEX_PRINT_STACK, |
|
|
|
/// Clear stack command. |
|
BC_LEX_CLEAR_STACK, |
|
|
|
/// Register stack level command. |
|
BC_LEX_REG_STACK_LEVEL, |
|
|
|
/// Main stack level command. |
|
BC_LEX_STACK_LEVEL, |
|
|
|
/// Duplicate command. |
|
BC_LEX_DUPLICATE, |
|
|
|
/// Swap (reverse) command. |
|
BC_LEX_SWAP, |
|
|
|
/// Pop (remove) command. |
|
BC_LEX_POP, |
|
|
|
/// Store ibase command. |
|
BC_LEX_STORE_IBASE, |
|
|
|
/// Store obase command. |
|
BC_LEX_STORE_OBASE, |
|
|
|
/// Store scale command. |
|
BC_LEX_STORE_SCALE, |
|
|
|
#if BC_ENABLE_EXTRA_MATH |
|
|
|
/// Store seed command. |
|
BC_LEX_STORE_SEED, |
|
|
|
#endif // BC_ENABLE_EXTRA_MATH |
|
|
|
/// Load variable onto stack command. |
|
BC_LEX_LOAD, |
|
|
|
/// Pop off of variable stack onto results stack command. |
|
BC_LEX_LOAD_POP, |
|
|
|
/// Push onto variable stack command. |
|
BC_LEX_STORE_PUSH, |
|
|
|
/// Print with pop command. |
|
BC_LEX_PRINT_POP, |
|
|
|
/// Parameterized quit command. |
|
BC_LEX_NQUIT, |
|
|
|
/// Execution stack depth command. |
|
BC_LEX_EXEC_STACK_LENGTH, |
|
|
|
/// Scale of number command. This is needed specifically for dc because bc |
|
/// parses the scale function in parts. |
|
BC_LEX_SCALE_FACTOR, |
|
|
|
/// Array length command. This is needed specifically for dc because bc |
|
/// just reuses its length keyword. |
|
BC_LEX_ARRAY_LENGTH, |
|
|
|
#endif // DC_ENABLED |
|
|
|
} BcLexType; |
|
|
|
struct BcLex; |
|
|
|
/** |
|
* A function pointer to call when another token is needed. Mostly called by the |
|
* parser. |
|
* @param l The lexer. |
|
*/ |
|
typedef void (*BcLexNext)(struct BcLex* l); |
|
|
|
/// The lexer. |
|
typedef struct BcLex |
|
{ |
|
/// A pointer to the text to lex. |
|
const char* buf; |
|
|
|
/// The current index into buf. |
|
size_t i; |
|
|
|
/// The current line. |
|
size_t line; |
|
|
|
/// The length of buf. |
|
size_t len; |
|
|
|
/// The current token. |
|
BcLexType t; |
|
|
|
/// The previous token. |
|
BcLexType last; |
|
|
|
/// A string to store extra data for tokens. For example, the @a BC_LEX_STR |
|
/// token really needs to store the actual string, and numbers also need the |
|
/// string. |
|
BcVec str; |
|
|
|
/// If this is true, the lexer is processing stdin and can ask for more data |
|
/// if a string or comment are not properly terminated. |
|
bool is_stdin; |
|
|
|
/// If this is true, the lexer is processing expressions from the |
|
/// command-line and can ask for more data if a string or comment are not |
|
/// properly terminated. |
|
bool is_exprs; |
|
|
|
} BcLex; |
|
|
|
/** |
|
* Initializes a lexer. |
|
* @param l The lexer to initialize. |
|
*/ |
|
void |
|
bc_lex_init(BcLex* l); |
|
|
|
/** |
|
* Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate |
|
* parser is created at runtime to parse read() expressions and dc strings, and |
|
* that parser needs a lexer. |
|
* @param l The lexer to free. |
|
*/ |
|
void |
|
bc_lex_free(BcLex* l); |
|
|
|
/** |
|
* Sets the filename that the lexer will be lexing. |
|
* @param l The lexer. |
|
* @param file The filename that the lexer will lex. |
|
*/ |
|
void |
|
bc_lex_file(BcLex* l, const char* file); |
|
|
|
/** |
|
* Sets the text the lexer will lex. |
|
* @param l The lexer. |
|
* @param text The text to lex. |
|
* @param is_stdin True if the text is from stdin, false otherwise. |
|
* @param is_exprs True if the text is from command-line expressions, false |
|
* otherwise. |
|
*/ |
|
void |
|
bc_lex_text(BcLex* l, const char* text, bool is_stdin, bool is_exprs); |
|
|
|
/** |
|
* Generic next function for the parser to call. It takes care of calling the |
|
* correct @a BcLexNext function and consuming whitespace. |
|
* @param l The lexer. |
|
*/ |
|
void |
|
bc_lex_next(BcLex* l); |
|
|
|
/** |
|
* Lexes a line comment (one beginning with '#' and going to a newline). |
|
* @param l The lexer. |
|
*/ |
|
void |
|
bc_lex_lineComment(BcLex* l); |
|
|
|
/** |
|
* Lexes a general comment (C-style comment). |
|
* @param l The lexer. |
|
*/ |
|
void |
|
bc_lex_comment(BcLex* l); |
|
|
|
/** |
|
* Lexes whitespace, finding as much as possible. |
|
* @param l The lexer. |
|
*/ |
|
void |
|
bc_lex_whitespace(BcLex* l); |
|
|
|
/** |
|
* Lexes a number that begins with char @a start. This takes care of parsing |
|
* numbers in scientific and engineering notations. |
|
* @param l The lexer. |
|
* @param start The starting char of the number. To detect a number and call |
|
* this function, the lexer had to eat the first char. It fixes |
|
* that by passing it in. |
|
*/ |
|
void |
|
bc_lex_number(BcLex* l, char start); |
|
|
|
/** |
|
* Lexes a name/identifier. |
|
* @param l The lexer. |
|
*/ |
|
void |
|
bc_lex_name(BcLex* l); |
|
|
|
/** |
|
* Lexes common whitespace characters. |
|
* @param l The lexer. |
|
* @param c The character to lex. |
|
*/ |
|
void |
|
bc_lex_commonTokens(BcLex* l, char c); |
|
|
|
/** |
|
* Throws a parse error because char @a c was invalid. |
|
* @param l The lexer. |
|
* @param c The problem character. |
|
*/ |
|
void |
|
bc_lex_invalidChar(BcLex* l, char c); |
|
|
|
/** |
|
* Reads a line from stdin and puts it into the lexer's buffer. |
|
* @param l The lexer. |
|
*/ |
|
bool |
|
bc_lex_readLine(BcLex* l); |
|
|
|
#endif // BC_LEX_H
|
|
|