


/*




* *****************************************************************************




*




* SPDXLicenseIdentifier: BSD2Clause




*




* Copyright (c) 20182021 Gavin D. Howard and contributors.




*




* Redistribution and use in source and binary forms, with or without




* modification, are permitted provided that the following conditions are met:




*




* * Redistributions of source code must retain the above copyright notice, this




* list of conditions and the following disclaimer.




*




* * Redistributions in binary form must reproduce the above copyright notice,




* this list of conditions and the following disclaimer in the documentation




* and/or other materials provided with the distribution.




*




* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"




* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE




* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE




* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE




* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR




* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF




* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS




* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN




* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)




* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE




* POSSIBILITY OF SUCH DAMAGE.




*




* *****************************************************************************




*




* The parser for bc.




*




*/








#if BC_ENABLED








#include <assert.h>




#include <stdbool.h>




#include <stdlib.h>




#include <string.h>








#include <setjmp.h>








#include <bc.h>




#include <num.h>




#include <vm.h>








// Before you embark on trying to understand this code, have you read the




// Development manual (manuals/development.md) and the comment in include/bc.h




// yet? No? Do that first. I'm serious.




//




// The reason is because this file holds the most sensitive and finicky code in




// the entire codebase. Even getting history to work on Windows was nothing




// compared to this. This is where dreams go to die, where dragons live, and




// from which Ken Thompson himself would flee.








static void




bc_parse_else(BcParse* p);








static void




bc_parse_stmt(BcParse* p);








static BcParseStatus




bc_parse_expr_err(BcParse* p, uint8_t flags, BcParseNext next);








static void




bc_parse_expr_status(BcParse* p, uint8_t flags, BcParseNext next);








/**




* Returns true if an instruction could only have come from a "leaf" expression.




* For more on what leaf expressions are, read the comment for BC_PARSE_LEAF().




* @param t The instruction to test.




* @return True if the instruction is a from a leaf expression.




*/




static bool




bc_parse_inst_isLeaf(BcInst t)




{




return (t >= BC_INST_NUM && t <= BC_INST_LEADING_ZERO) 




#if BC_ENABLE_EXTRA_MATH




t == BC_INST_TRUNC 




#endif // BC_ENABLE_EXTRA_MATH




t <= BC_INST_DEC;




}








/**




* Returns true if the *previous* token was a delimiter. A delimiter is anything




* that can legally end a statement. In bc's case, it could be a newline, a




* semicolon, and a brace in certain cases.




* @param p The parser.




* @return True if the token is a legal delimiter.




*/




static bool




bc_parse_isDelimiter(const BcParse* p)




{




BcLexType t = p>l.t;




bool good;








// If it's an obvious delimiter, say so.




if (BC_PARSE_DELIMITER(t)) return true;








good = false;








// If the current token is a keyword, then...beware. That means that we need




// to check for a "dangling" else, where there was no bracedelimited block




// on the previous if.




if (t == BC_LEX_KW_ELSE)




{




size_t i;




uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE;








// As long as going up the stack is valid for a dangling else, keep on.




for (i = 0; i < p>flags.len && BC_PARSE_BLOCK_STMT(flags); ++i)




{




fptr = bc_vec_item_rev(&p>flags, i);




flags = *fptr;








// If we need a brace and don't have one, then we don't have a




// delimiter.




if ((flags & BC_PARSE_FLAG_BRACE) && p>l.last != BC_LEX_RBRACE)




{




return false;




}




}








// Oh, and we had also better have an if statement somewhere.




good = ((flags & BC_PARSE_FLAG_IF) != 0);




}




else if (t == BC_LEX_RBRACE)




{




size_t i;








// Since we have a brace, we need to just check if a brace was needed.




for (i = 0; !good && i < p>flags.len; ++i)




{




uint16_t* fptr = bc_vec_item_rev(&p>flags, i);




good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0);




}




}








return good;




}








/**




* Returns true if we are in top level of a function body. The POSIX grammar




* is defined such that anything is allowed after a function body, so we must




* use this function to detect that case when ending a function body.




* @param p The parser.




* @return True if we are in the top level of parsing a function body.




*/




static bool




bc_parse_TopFunc(const BcParse* p)




{




bool good = p>flags.len == 2;








uint16_t val = BC_PARSE_FLAG_BRACE  BC_PARSE_FLAG_FUNC_INNER;




val = BC_PARSE_FLAG_FUNC;








return good && BC_PARSE_TOP_FLAG(p) == val;




}








/**




* Sets a previously defined exit label. What are labels? See the bc Parsing




* section of the Development manual (manuals/development.md).




* @param p The parser.




*/




static void




bc_parse_setLabel(BcParse* p)




{




BcFunc* func = p>func;




BcInstPtr* ip = bc_vec_top(&p>exits);




size_t* label;








assert(func == bc_vec_item(&p>prog>fns, p>fidx));








// Set the preallocated label to the correct index.




label = bc_vec_item(&func>labels, ip>idx);




*label = func>code.len;








// Now, we don't need the exit label; it is done.




bc_vec_pop(&p>exits);




}








/**




* Creates a label and sets it to idx. If this is an exit label, then idx is




* actually invalid, but it doesn't matter because it will be fixed by




* bc_parse_setLabel() later.




* @param p The parser.




* @param idx The index of the label.




*/




static void




bc_parse_createLabel(BcParse* p, size_t idx)




{




bc_vec_push(&p>func>labels, &idx);




}








/**




* Creates a conditional label. Unlike an exit label, this label is set at




* creation time because it comes *before* the code that will target it.




* @param p The parser.




* @param idx The index of the label.




*/




static void




bc_parse_createCondLabel(BcParse* p, size_t idx)




{




bc_parse_createLabel(p, p>func>code.len);




bc_vec_push(&p>conds, &idx);




}








/*




* Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why




* create a label to be filled in later? Because exit labels are meant to be




* targeted by code that comes *before* the label. Since we have to parse that




* code first, and don't know how long it will be, we need to just make sure to




* reserve a slot to be filled in later when we know.




*




* By the way, this uses BcInstPtr because it was convenient. The field idx




* holds the index, and the field func holds the loop boolean.




*




* @param p The parser.




* @param idx The index of the label's position.




* @param loop True if the exit label is for a loop or not.




*/




static void




bc_parse_createExitLabel(BcParse* p, size_t idx, bool loop)




{




BcInstPtr ip;








assert(p>func == bc_vec_item(&p>prog>fns, p>fidx));








ip.func = loop;




ip.idx = idx;




ip.len = 0;








bc_vec_push(&p>exits, &ip);




bc_parse_createLabel(p, SIZE_MAX);




}








/**




* Pops the correct operators off of the operator stack based on the current




* operator. This is because of the ShuntingYard algorithm. Lower prec means




* higher precedence.




* @param p The parser.




* @param type The operator.




* @param start The previous start of the operator stack. For more




* information, see the bc Parsing section of the Development




* manual (manuals/development.md).




* @param nexprs A pointer to the current number of expressions that have not




* been consumed yet. This is an IN and OUT parameter.




*/




static void




bc_parse_operator(BcParse* p, BcLexType type, size_t start, size_t* nexprs)




{




BcLexType t;




uchar l, r = BC_PARSE_OP_PREC(type);




uchar left = BC_PARSE_OP_LEFT(type);








// While we haven't hit the stop point yet.




while (p>ops.len > start)




{




// Get the top operator.




t = BC_PARSE_TOP_OP(p);








// If it's a right paren, we have reached the end of whatever expression




// this is no matter what.




if (t == BC_LEX_LPAREN) break;








// Break for precedence. Precedence operates differently on left and




// right associativity, by the way. A left associative operator that




// matches the current precedence should take priority, but a right




// associative operator should not.




l = BC_PARSE_OP_PREC(t);




if (l >= r && (l != r  !left)) break;








// Do the housekeeping. In particular, make sure to note that one




// expression was consumed. (Two were, but another was added.)




bc_parse_push(p, BC_PARSE_TOKEN_INST(t));




bc_vec_pop(&p>ops);




*nexprs = !BC_PARSE_OP_PREFIX(t);




}








bc_vec_push(&p>ops, &type);




}








/**




* Parses a right paren. In the ShuntingYard algorithm, it needs to be put on




* the operator stack. But before that, it needs to consume whatever operators




* there are until it hits a left paren.




* @param p The parser.




* @param nexprs A pointer to the current number of expressions that have not




* been consumed yet. This is an IN and OUT parameter.




*/




static void




bc_parse_rightParen(BcParse* p, size_t* nexprs)




{




BcLexType top;








// Consume operators until a left paren.




while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN)




{




bc_parse_push(p, BC_PARSE_TOKEN_INST(top));




bc_vec_pop(&p>ops);




*nexprs = !BC_PARSE_OP_PREFIX(top);




}








// We need to pop the left paren as well.




bc_vec_pop(&p>ops);








// Oh, and we also want the next token.




bc_lex_next(&p>l);




}








/**




* Parses function arguments.




* @param p The parser.




* @param flags Flags restricting what kind of expressions the arguments can




* be.




*/




static void




bc_parse_args(BcParse* p, uint8_t flags)




{




bool comma = false;




size_t nargs;








bc_lex_next(&p>l);








// Print and comparison operators not allowed. Well, comparison operators




// only for POSIX. But we do allow arrays, and we *must* get a value.




flags &= ~(BC_PARSE_PRINT  BC_PARSE_REL);




flags = (BC_PARSE_ARRAY  BC_PARSE_NEEDVAL);








// Count the arguments and parse them.




for (nargs = 0; p>l.t != BC_LEX_RPAREN; ++nargs)




{




bc_parse_expr_status(p, flags, bc_parse_next_arg);








comma = (p>l.t == BC_LEX_COMMA);




if (comma) bc_lex_next(&p>l);




}








// An ending comma is FAIL.




if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








// Now do the call with the number of arguments.




bc_parse_push(p, BC_INST_CALL);




bc_parse_pushIndex(p, nargs);




}








/**




* Parses a function call.




* @param p The parser.




* @param flags Flags restricting what kind of expressions the arguments can




* be.




*/




static void




bc_parse_call(BcParse* p, const char* name, uint8_t flags)




{




size_t idx;








bc_parse_args(p, flags);








// We just assert this because bc_parse_args() should




// ensure that the next token is what it should be.




assert(p>l.t == BC_LEX_RPAREN);








// We cannot use bc_program_insertFunc() here




// because it will overwrite an existing function.




idx = bc_map_index(&p>prog>fn_map, name);








// The function does not exist yet. Create a space for it. If the user does




// not define it, it's a *runtime* error, not a parse error.




if (idx == BC_VEC_INVALID_IDX)




{




idx = bc_program_insertFunc(p>prog, name);








assert(idx != BC_VEC_INVALID_IDX);








// Make sure that this pointer was not invalidated.




p>func = bc_vec_item(&p>prog>fns, p>fidx);




}




// The function exists, so set the right function index.




else idx = ((BcId*) bc_vec_item(&p>prog>fn_map, idx))>idx;








bc_parse_pushIndex(p, idx);








// Make sure to get the next token.




bc_lex_next(&p>l);




}








/**




* Parses a name/identifierbased expression. It could be a variable, an array




* element, an array itself (for function arguments), a function call, etc.




*




*/




static void




bc_parse_name(BcParse* p, BcInst* type, bool* can_assign, uint8_t flags)




{




char* name;








BC_SIG_ASSERT_LOCKED;








// We want a copy of the name since the lexer might overwrite its copy.




name = bc_vm_strdup(p>l.str.v);








BC_SETJMP_LOCKED(vm, err);








// We need the next token to see if it's just a variable or something more.




bc_lex_next(&p>l);








// Array element or array.




if (p>l.t == BC_LEX_LBRACKET)




{




bc_lex_next(&p>l);








// Array only. This has to be a function parameter.




if (p>l.t == BC_LEX_RBRACKET)




{




// Error if arrays are not allowed.




if (BC_ERR(!(flags & BC_PARSE_ARRAY)))




{




bc_parse_err(p, BC_ERR_PARSE_EXPR);




}








*type = BC_INST_ARRAY;




*can_assign = false;




}




else




{




// If we are here, we have an array element. We need to set the




// expression parsing flags.




uint8_t flags2 = (flags & ~(BC_PARSE_PRINT  BC_PARSE_REL)) 




BC_PARSE_NEEDVAL;








bc_parse_expr_status(p, flags2, bc_parse_next_elem);








// The next token *must* be a right bracket.




if (BC_ERR(p>l.t != BC_LEX_RBRACKET))




{




bc_parse_err(p, BC_ERR_PARSE_TOKEN);




}








*type = BC_INST_ARRAY_ELEM;




*can_assign = true;




}








// Make sure to get the next token.




bc_lex_next(&p>l);








// Push the instruction and the name of the identifier.




bc_parse_push(p, *type);




bc_parse_pushName(p, name, false);




}




else if (p>l.t == BC_LEX_LPAREN)




{




// We are parsing a function call; error if not allowed.




if (BC_ERR(flags & BC_PARSE_NOCALL))




{




bc_parse_err(p, BC_ERR_PARSE_TOKEN);




}








*type = BC_INST_CALL;




*can_assign = false;








bc_parse_call(p, name, flags);




}




else




{




// Just a variable.




*type = BC_INST_VAR;




*can_assign = true;




bc_parse_push(p, BC_INST_VAR);




bc_parse_pushName(p, name, true);




}








err:




// Need to make sure to unallocate the name.




free(name);




BC_LONGJMP_CONT(vm);




BC_SIG_MAYLOCK;




}








/**




* Parses a builtin function that takes no arguments. This includes read(),




* rand(), maxibase(), maxobase(), maxscale(), and maxrand().




* @param p The parser.




* @param inst The instruction corresponding to the builtin.




*/




static void




bc_parse_noArgBuiltin(BcParse* p, BcInst inst)




{




// Must have a left paren.




bc_lex_next(&p>l);




if (BC_ERR(p>l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








// Must have a right paren.




bc_lex_next(&p>l);




if ((p>l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_parse_push(p, inst);








bc_lex_next(&p>l);




}








/**




* Parses a builtin function that takes 1 argument. This includes length(),




* sqrt(), abs(), scale(), and irand().




* @param p The parser.




* @param type The lex token.




* @param flags The expression parsing flags for parsing the argument.




* @param prev An out parameter; the previous instruction pointer.




*/




static void




bc_parse_builtin(BcParse* p, BcLexType type, uint8_t flags, BcInst* prev)




{




// Must have a left paren.




bc_lex_next(&p>l);




if (BC_ERR(p>l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_lex_next(&p>l);








// Change the flags as needed for parsing the argument.




flags &= ~(BC_PARSE_PRINT  BC_PARSE_REL);




flags = BC_PARSE_NEEDVAL;








// Since length can take arrays, we need to specially add that flag.




if (type == BC_LEX_KW_LENGTH) flags = BC_PARSE_ARRAY;








bc_parse_expr_status(p, flags, bc_parse_next_rel);








// Must have a right paren.




if (BC_ERR(p>l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








// Adjust previous based on the token and push it.




*prev = type  BC_LEX_KW_LENGTH + BC_INST_LENGTH;




bc_parse_push(p, *prev);








bc_lex_next(&p>l);




}








/**




* Parses a builtin function that takes 3 arguments. This includes modexp() and




* divmod().




*/




static void




bc_parse_builtin3(BcParse* p, BcLexType type, uint8_t flags, BcInst* prev)




{




assert(type == BC_LEX_KW_MODEXP  type == BC_LEX_KW_DIVMOD);








// Must have a left paren.




bc_lex_next(&p>l);




if (BC_ERR(p>l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_lex_next(&p>l);








// Change the flags as needed for parsing the argument.




flags &= ~(BC_PARSE_PRINT  BC_PARSE_REL);




flags = BC_PARSE_NEEDVAL;








bc_parse_expr_status(p, flags, bc_parse_next_builtin);








// Must have a comma.




if (BC_ERR(p>l.t != BC_LEX_COMMA)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_lex_next(&p>l);








bc_parse_expr_status(p, flags, bc_parse_next_builtin);








// Must have a comma.




if (BC_ERR(p>l.t != BC_LEX_COMMA)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_lex_next(&p>l);








// If it is a divmod, parse an array name. Otherwise, just parse another




// expression.




if (type == BC_LEX_KW_DIVMOD)




{




// Must have a name.




if (BC_ERR(p>l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








// This is safe because the next token should not overwrite the name.




bc_lex_next(&p>l);








// Must have a left bracket.




if (BC_ERR(p>l.t != BC_LEX_LBRACKET))




{




bc_parse_err(p, BC_ERR_PARSE_TOKEN);




}








// This is safe because the next token should not overwrite the name.




bc_lex_next(&p>l);








// Must have a right bracket.




if (BC_ERR(p>l.t != BC_LEX_RBRACKET))




{




bc_parse_err(p, BC_ERR_PARSE_TOKEN);




}








// This is safe because the next token should not overwrite the name.




bc_lex_next(&p>l);




}




else bc_parse_expr_status(p, flags, bc_parse_next_rel);








// Must have a right paren.




if (BC_ERR(p>l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








// Adjust previous based on the token and push it.




*prev = type  BC_LEX_KW_MODEXP + BC_INST_MODEXP;




bc_parse_push(p, *prev);








// If we have divmod, we need to assign the modulus to the array element, so




// we need to push the instructions for doing so.




if (type == BC_LEX_KW_DIVMOD)




{




// The zeroth element.




bc_parse_push(p, BC_INST_ZERO);




bc_parse_push(p, BC_INST_ARRAY_ELEM);








// Push the array.




bc_parse_pushName(p, p>l.str.v, false);








// Swap them and assign. After this, the top item on the stack should




// be the quotient.




bc_parse_push(p, BC_INST_SWAP);




bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);




}








bc_lex_next(&p>l);




}








/**




* Parses the scale keyword. This is special because scale can be a value or a




* builtin function.




* @param p The parser.




* @param type An out parameter; the instruction for the parse.




* @param can_assign An out parameter; whether the expression can be assigned




* to.




* @param flags The expression parsing flags for parsing a scale() arg.




*/




static void




bc_parse_scale(BcParse* p, BcInst* type, bool* can_assign, uint8_t flags)




{




bc_lex_next(&p>l);








// Without the left paren, it's just the keyword.




if (p>l.t != BC_LEX_LPAREN)




{




// Set, push, and return.




*type = BC_INST_SCALE;




*can_assign = true;




bc_parse_push(p, BC_INST_SCALE);




return;




}








// Handle the scale function.




*type = BC_INST_SCALE_FUNC;




*can_assign = false;








// Once again, adjust the flags.




flags &= ~(BC_PARSE_PRINT  BC_PARSE_REL);




flags = BC_PARSE_NEEDVAL;








bc_lex_next(&p>l);








bc_parse_expr_status(p, flags, bc_parse_next_rel);








// Must have a right paren.




if (BC_ERR(p>l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);








bc_parse_push(p, BC_INST_SCALE_FUNC);








bc_lex_next(&p>l);




}








/**




* Parses and increment or decrement operator. This is a bit complex.




* @param p The parser.




* @param prev An out parameter; the previous instruction pointer.




* @param can_assign An out parameter; whether the expression can be assigned




* to.




* @param nexs An in/out parameter; the number of expressions in the




* parse tree that are not used.




* @param flags The expression parsing flags for parsing a scale() arg.




*/




static void




bc_parse_incdec(BcParse* p, BcInst* prev, bool* can_assign, size_t* nexs,




uint8_t flags)




{




BcLexType type;




uchar inst;




BcInst etype = *prev;




BcLexType last = p>l.last;








assert(prev != NULL && can_assign != NULL);








// If we can't assign to the previous token, then we have an error.




if (BC_ERR(last == BC_LEX_OP_INC  last == BC_LEX_OP_DEC 




last == BC_LEX_RPAREN))




{




bc_parse_err(p, BC_ERR_PARSE_ASSIGN);




}








// Is the previous instruction for a variable?




if (BC_PARSE_INST_VAR(etype))




{




// If so, this is a postfix operator.




if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);








// Only postfix uses BC_INST_INC and BC_INST_DEC.




*prev = inst = BC_INST_INC + (p>l.t != BC_LEX_OP_INC);




bc_parse_push(p, inst);




bc_lex_next(&p>l);




*can_assign = false;




}




else




{




// This is a prefix operator. In that case, we just convert it to




// an assignment instruction.




*prev = inst = BC_INST_ASSIGN_PLUS + (p>l.t != BC_LEX_OP_INC);








bc_lex_next(&p>l);




type = p>l.t;








// Because we parse the next part of the expression

