An implementation of Unix dc and POSIX bc with GNU and BSD extensions. Finished, but well-maintained.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

441 lines
10 KiB

/*
* *****************************************************************************
*
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2018-2021 Gavin D. Howard and contributors.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* *****************************************************************************
*
* The parser for dc.
*
*/
#if DC_ENABLED
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
#include <dc.h>
#include <program.h>
#include <vm.h>
/**
* Parses a register. The lexer should have already lexed the true name of the
* register, per extended registers and such.
* @param p The parser.
* @param var True if the parser is for a variable, false otherwise.
*/
static void
dc_parse_register(BcParse* p, bool var)
{
bc_lex_next(&p->l);
if (p->l.t != BC_LEX_NAME) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
bc_parse_pushName(p, p->l.str.v, var);
}
/**
* Parses a dc string.
* @param p The parser.
*/
static inline void
dc_parse_string(BcParse* p)
{
bc_parse_addString(p);
bc_lex_next(&p->l);
}
/**
* Parses a token that requires a memory operation, like load or store.
* @param p The parser.
* @param inst The instruction to push for the memory operation.
* @param name Whether the load or store is to a variable or array, and not to
* a global.
* @param store True if the operation is a store, false otherwise.
*/
static void
dc_parse_mem(BcParse* p, uchar inst, bool name, bool store)
{
// Push the instruction.
bc_parse_push(p, inst);
// Parse the register if necessary.
if (name) dc_parse_register(p, inst != BC_INST_ARRAY_ELEM);
// Stores use the bc assign infrastructure, but they need to do a swap
// first.
if (store)
{
bc_parse_push(p, BC_INST_SWAP);
bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
}
bc_lex_next(&p->l);
}
/**
* Parses a conditional execution instruction.
* @param p The parser.
* @param inst The instruction for the condition.
*/
static void
dc_parse_cond(BcParse* p, uchar inst)
{
// Push the instruction for the condition and the conditional execution.
bc_parse_push(p, inst);
bc_parse_push(p, BC_INST_EXEC_COND);
// Parse the register.
dc_parse_register(p, true);
bc_lex_next(&p->l);
// If the next token is an else, parse the else.
if (p->l.t == BC_LEX_KW_ELSE)
{
dc_parse_register(p, true);
bc_lex_next(&p->l);
}
// Otherwise, push a marker for no else.
else bc_parse_pushIndex(p, SIZE_MAX);
}
/**
* Parses a token for dc.
* @param p The parser.
* @param t The token to parse.
* @param flags The flags that say what is allowed or not.
*/
static void
dc_parse_token(BcParse* p, BcLexType t, uint8_t flags)
{
uchar inst;
bool assign, get_token = false;
switch (t)
{
case BC_LEX_OP_REL_EQ:
case BC_LEX_OP_REL_LE:
case BC_LEX_OP_REL_GE:
case BC_LEX_OP_REL_NE:
case BC_LEX_OP_REL_LT:
case BC_LEX_OP_REL_GT:
{
inst = (uchar) (t - BC_LEX_OP_REL_EQ + BC_INST_REL_EQ);
dc_parse_cond(p, inst);
break;
}
case BC_LEX_SCOLON:
case BC_LEX_COLON:
{
dc_parse_mem(p, BC_INST_ARRAY_ELEM, true, t == BC_LEX_COLON);
break;
}
case BC_LEX_STR:
{
dc_parse_string(p);
break;
}
case BC_LEX_NEG:
{
// This tells us whether or not the neg is for a command or at the
// beginning of a number. If it's a command, push it. Otherwise,
// fallthrough and parse the number.
if (dc_lex_negCommand(&p->l))
{
bc_parse_push(p, BC_INST_NEG);
get_token = true;
break;
}
bc_lex_next(&p->l);
// Fallthrough.
BC_FALLTHROUGH
}
case BC_LEX_NUMBER:
{
bc_parse_number(p);
// Push the negative instruction if we fell through from above.
if (t == BC_LEX_NEG) bc_parse_push(p, BC_INST_NEG);
get_token = true;
break;
}
case BC_LEX_KW_READ:
{
// Make sure the read is not recursive.
if (BC_ERR(flags & BC_PARSE_NOREAD))
{
bc_parse_err(p, BC_ERR_EXEC_REC_READ);
}
else bc_parse_push(p, BC_INST_READ);
get_token = true;
break;
}
case BC_LEX_OP_ASSIGN:
case BC_LEX_STORE_PUSH:
{
assign = t == BC_LEX_OP_ASSIGN;
inst = assign ? BC_INST_VAR : BC_INST_PUSH_TO_VAR;
dc_parse_mem(p, inst, true, assign);
break;
}
case BC_LEX_LOAD:
case BC_LEX_LOAD_POP:
{
inst = t == BC_LEX_LOAD_POP ? BC_INST_PUSH_VAR : BC_INST_LOAD;
dc_parse_mem(p, inst, true, false);
break;
}
case BC_LEX_REG_STACK_LEVEL:
{
dc_parse_mem(p, BC_INST_REG_STACK_LEN, true, false);
break;
}
case BC_LEX_STORE_IBASE:
case BC_LEX_STORE_OBASE:
case BC_LEX_STORE_SCALE:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_STORE_SEED:
#endif // BC_ENABLE_EXTRA_MATH
{
inst = (uchar) (t - BC_LEX_STORE_IBASE + BC_INST_IBASE);
dc_parse_mem(p, inst, false, true);
break;
}
case BC_LEX_ARRAY_LENGTH:
{
// Need to push the array first, based on how length is implemented.
bc_parse_push(p, BC_INST_ARRAY);
dc_parse_register(p, false);
bc_parse_push(p, BC_INST_LENGTH);
get_token = true;
break;
}
case BC_LEX_EOF:
case BC_LEX_INVALID:
#if BC_ENABLED
case BC_LEX_OP_INC:
case BC_LEX_OP_DEC:
#endif // BC_ENABLED
case BC_LEX_OP_BOOL_NOT:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_OP_TRUNC:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_OP_POWER:
case BC_LEX_OP_MULTIPLY:
case BC_LEX_OP_DIVIDE:
case BC_LEX_OP_MODULUS:
case BC_LEX_OP_PLUS:
case BC_LEX_OP_MINUS:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_OP_PLACES:
case BC_LEX_OP_LSHIFT:
case BC_LEX_OP_RSHIFT:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_OP_BOOL_OR:
case BC_LEX_OP_BOOL_AND:
#if BC_ENABLED
case BC_LEX_OP_ASSIGN_POWER:
case BC_LEX_OP_ASSIGN_MULTIPLY:
case BC_LEX_OP_ASSIGN_DIVIDE:
case BC_LEX_OP_ASSIGN_MODULUS:
case BC_LEX_OP_ASSIGN_PLUS:
case BC_LEX_OP_ASSIGN_MINUS:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_OP_ASSIGN_PLACES:
case BC_LEX_OP_ASSIGN_LSHIFT:
case BC_LEX_OP_ASSIGN_RSHIFT:
#endif // BC_ENABLE_EXTRA_MATH
#endif // BC_ENABLED
case BC_LEX_NLINE:
case BC_LEX_WHITESPACE:
case BC_LEX_LPAREN:
case BC_LEX_RPAREN:
case BC_LEX_LBRACKET:
case BC_LEX_COMMA:
case BC_LEX_RBRACKET:
case BC_LEX_LBRACE:
case BC_LEX_NAME:
case BC_LEX_RBRACE:
#if BC_ENABLED
case BC_LEX_KW_AUTO:
case BC_LEX_KW_BREAK:
case BC_LEX_KW_CONTINUE:
case BC_LEX_KW_DEFINE:
case BC_LEX_KW_FOR:
case BC_LEX_KW_IF:
case BC_LEX_KW_LIMITS:
case BC_LEX_KW_RETURN:
case BC_LEX_KW_WHILE:
case BC_LEX_KW_HALT:
case BC_LEX_KW_LAST:
#endif // BC_ENABLED
case BC_LEX_KW_IBASE:
case BC_LEX_KW_OBASE:
case BC_LEX_KW_SCALE:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_SEED:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_LENGTH:
case BC_LEX_KW_PRINT:
case BC_LEX_KW_SQRT:
case BC_LEX_KW_ABS:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_IRAND:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_ASCIIFY:
case BC_LEX_KW_MODEXP:
case BC_LEX_KW_DIVMOD:
case BC_LEX_KW_QUIT:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_RAND:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_MAXIBASE:
case BC_LEX_KW_MAXOBASE:
case BC_LEX_KW_MAXSCALE:
#if BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_MAXRAND:
#endif // BC_ENABLE_EXTRA_MATH
case BC_LEX_KW_LINE_LENGTH:
#if BC_ENABLED
case BC_LEX_KW_GLOBAL_STACKS:
#endif // BC_ENABLED
case BC_LEX_KW_LEADING_ZERO:
case BC_LEX_KW_STREAM:
case BC_LEX_KW_ELSE:
case BC_LEX_EQ_NO_REG:
case BC_LEX_EXECUTE:
case BC_LEX_PRINT_STACK:
case BC_LEX_CLEAR_STACK:
case BC_LEX_STACK_LEVEL:
case BC_LEX_DUPLICATE:
case BC_LEX_SWAP:
case BC_LEX_POP:
case BC_LEX_PRINT_POP:
case BC_LEX_NQUIT:
case BC_LEX_EXEC_STACK_LENGTH:
case BC_LEX_SCALE_FACTOR:
{
// All other tokens should be taken care of by the caller, or they
// actually *are* invalid.
bc_parse_err(p, BC_ERR_PARSE_TOKEN);
}
}
if (get_token) bc_lex_next(&p->l);
}
void
dc_parse_expr(BcParse* p, uint8_t flags)
{
BcInst inst;
BcLexType t;
bool need_expr, have_expr = false;
need_expr = ((flags & BC_PARSE_NOREAD) != 0);
// dc can just keep parsing forever basically, unlike bc, which has to have
// a whole bunch of complicated nonsense because its language was horribly
// designed.
// While we don't have EOF...
while ((t = p->l.t) != BC_LEX_EOF)
{
// Eat newline.
if (t == BC_LEX_NLINE)
{
bc_lex_next(&p->l);
continue;
}
// Get the instruction that corresponds to the token.
inst = dc_parse_insts[t];
// If the instruction is invalid, that means we have to do some harder
// parsing. So if not invalid, just push the instruction; otherwise,
// parse the token.
if (inst != BC_INST_INVALID)
{
bc_parse_push(p, inst);
bc_lex_next(&p->l);
}
else dc_parse_token(p, t, flags);
have_expr = true;
}
// If we don't have an expression and need one, barf. Otherwise, just push a
// BC_INST_POP_EXEC if we have EOF and BC_PARSE_NOCALL, which dc uses to
// indicate that it is executing a string.
if (BC_ERR(need_expr && !have_expr)) bc_err(BC_ERR_EXEC_READ_EXPR);
else if (p->l.t == BC_LEX_EOF && (flags & BC_PARSE_NOCALL))
{
bc_parse_push(p, BC_INST_POP_EXEC);
}
}
void
dc_parse_parse(BcParse* p)
{
assert(p != NULL);
BC_SETJMP_LOCKED(vm, exit);
// If we have EOF, someone called this function one too many times.
// Otherwise, parse.
if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
else dc_parse_expr(p, 0);
exit:
// Need to reset if there was an error.
if (BC_SIG_EXC(vm)) bc_parse_reset(p);
BC_LONGJMP_CONT(vm);
BC_SIG_MAYLOCK;
}
#endif // DC_ENABLED