From 32d7292e47fc8121c056715d5f995da315c905ca Mon Sep 17 00:00:00 2001 From: multipleof4 Date: Fri, 26 Sep 2025 01:53:11 -0700 Subject: [PATCH] Feat: Implement robust AST parser for Hi language --- grammar.ne | 101 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/grammar.ne b/grammar.ne index 9bd9c41..12e2906 100644 --- a/grammar.ne +++ b/grammar.ne @@ -5,75 +5,122 @@ const lexer = moo.compile({ ws: /[ \t]+/, nl: { match: /\n/, lineBreaks: true }, comment: /\/\/.*?$/, - number: /0|[1-9][0-9]*/, + number: /0|[1-9][0-9]*(\.[0-9]+)?/, string: /"(?:\\["\\]|[^\n"\\])*"/, lbrace: '{', rbrace: '}', - identifier: /[a-zA-Z_][a-zA-Z0-9_]*/, - colon: ':', - eq: '=', lparen: '(', rparen: ')', + dot: '.', + plus: '+', + comma: ',', + colon: ':', + eq: '=', + identifier: /[a-zA-Z_][a-zA-Z0-9_]*/, }); %} @lexer lexer -# Main entry point: a program is a series of statements Program -> _ Statements _ {% ([,,stmts]) => ({ type: 'Program', body: stmts }) %} -# Statements are separated by newlines -Statements -> Statement (_NL Statement):* _ {% +Statements -> Statement (_NL Statement):* _ {% (d) => { const stmts = [d[0]]; for (const rest of d[1]) { stmts.push(rest[1]); } - return stmts.filter(s => s !== null); // Filter out empty lines + return stmts.filter(s => s !== null && s.type !== 'Comment'); } %} -# A statement can be a declaration, assignment, or expression Statement -> Declaration {% id %} | Assignment {% id %} - | OutputCall {% id %} + | ExpressionStatement {% id %} | Comment {% id %} +ExpressionStatement -> Expression {% (d) => ({ type: 'ExpressionStatement', expression: d[0] }) %} + Comment -> %comment {% d => ({ type: 'Comment', value: d[0].value }) %} -Declaration -> %identifier _ %colon _ Expression {% - (d) => ({ +Declaration -> %identifier _ %colon _ Expression {% + (d) => ({ type: 'VariableDeclaration', identifier: d[0].value, value: d[4] - }) + }) %} -Assignment -> %identifier _ %eq _ Expression {% - (d) => ({ +Assignment -> %identifier _ %eq _ Expression {% + (d) => ({ type: 'Assignment', identifier: d[0].value, value: d[4] - }) + }) %} -OutputCall -> %identifier %lparen _ Expression _ %rparen {% - (d) => { - // For now, only allow '_' as the function name - if (d[0].value !== '_') { - throw new Error("Only the '_' function is supported for output."); +# Expression parsing hierarchy to handle operator precedence +Expression -> Additive {% id %} + +Additive -> Additive _ %plus _ Call {% (d) => ({ type: 'BinaryExpression', operator: '+', left: d[0], right: d[4] }) %} + | Call {% id %} + +Call -> Member ( %lparen _ (ArgumentList):? _ %rparen ):? {% + (d) => { + if (d[1]) { // It is a function call + return { + type: 'CallExpression', + callee: d[0], + arguments: d[1][2] || [] + } + } + return d[0]; // Not a call, just a Member/Primary expression } - return { type: 'OutputCall', arguments: [d[3]] } - } %} -# Expressions (very simple for now) -Expression -> Literal {% id %} +Member -> Primary ( %dot %identifier ):* {% + (d) => { + let obj = d[0]; + for (const part of d[1]) { + obj = { + type: 'MemberExpression', + object: obj, + property: { type: 'Identifier', name: part[1].value } + } + } + return obj; + } +%} + +Primary -> Literal {% id %} + | Block {% id %} + | Identifier {% id %} + | %lparen _ Expression _ %rparen {% (d) => d[2] %} + +ArgumentList -> Expression ( _ %comma _ Expression):* {% + (d) => [d[0], ...d[1].map(m => m[3])] +%} + +Identifier -> %identifier {% d => ({ type: 'Identifier', name: d[0].value }) %} Literal -> %number {% d => ({ type: 'NumericLiteral', value: Number(d[0].value) }) %} | %string {% d => ({ type: 'StringLiteral', value: d[0].value }) %} -# Whitespace and Newlines +Block -> %lbrace _ (KeyValuePairs):? _ %rbrace {% + (d) => ({ type: 'Block', properties: d[2] || [] }) +%} + +KeyValuePairs -> KeyValuePair (_NL KeyValuePair):* {% + (d) => [d[0], ...d[1].map(m => m[1])] +%} + +KeyValuePair -> %identifier _ %colon _ Expression {% + (d) => ({ + type: 'Property', + key: d[0].value, + value: d[4] + }) +%} + _ -> (%ws | %comment):* {% () => null %} _NL -> (_ %nl):+ {% () => null %} -