Feat: Implement robust AST parser for Hi language

This commit is contained in:
2025-09-26 01:53:11 -07:00
parent 072eeb7e99
commit 32d7292e47

View File

@@ -5,40 +5,42 @@ const lexer = moo.compile({
ws: /[ \t]+/, ws: /[ \t]+/,
nl: { match: /\n/, lineBreaks: true }, nl: { match: /\n/, lineBreaks: true },
comment: /\/\/.*?$/, comment: /\/\/.*?$/,
number: /0|[1-9][0-9]*/, number: /0|[1-9][0-9]*(\.[0-9]+)?/,
string: /"(?:\\["\\]|[^\n"\\])*"/, string: /"(?:\\["\\]|[^\n"\\])*"/,
lbrace: '{', lbrace: '{',
rbrace: '}', rbrace: '}',
identifier: /[a-zA-Z_][a-zA-Z0-9_]*/,
colon: ':',
eq: '=',
lparen: '(', lparen: '(',
rparen: ')', rparen: ')',
dot: '.',
plus: '+',
comma: ',',
colon: ':',
eq: '=',
identifier: /[a-zA-Z_][a-zA-Z0-9_]*/,
}); });
%} %}
@lexer lexer @lexer lexer
# Main entry point: a program is a series of statements
Program -> _ Statements _ {% ([,,stmts]) => ({ type: 'Program', body: stmts }) %} Program -> _ Statements _ {% ([,,stmts]) => ({ type: 'Program', body: stmts }) %}
# Statements are separated by newlines
Statements -> Statement (_NL Statement):* _ {% Statements -> Statement (_NL Statement):* _ {%
(d) => { (d) => {
const stmts = [d[0]]; const stmts = [d[0]];
for (const rest of d[1]) { for (const rest of d[1]) {
stmts.push(rest[1]); stmts.push(rest[1]);
} }
return stmts.filter(s => s !== null); // Filter out empty lines return stmts.filter(s => s !== null && s.type !== 'Comment');
} }
%} %}
# A statement can be a declaration, assignment, or expression
Statement -> Declaration {% id %} Statement -> Declaration {% id %}
| Assignment {% id %} | Assignment {% id %}
| OutputCall {% id %} | ExpressionStatement {% id %}
| Comment {% id %} | Comment {% id %}
ExpressionStatement -> Expression {% (d) => ({ type: 'ExpressionStatement', expression: d[0] }) %}
Comment -> %comment {% d => ({ type: 'Comment', value: d[0].value }) %} Comment -> %comment {% d => ({ type: 'Comment', value: d[0].value }) %}
Declaration -> %identifier _ %colon _ Expression {% Declaration -> %identifier _ %colon _ Expression {%
@@ -57,23 +59,68 @@ Assignment -> %identifier _ %eq _ Expression {%
}) })
%} %}
OutputCall -> %identifier %lparen _ Expression _ %rparen {% # Expression parsing hierarchy to handle operator precedence
Expression -> Additive {% id %}
Additive -> Additive _ %plus _ Call {% (d) => ({ type: 'BinaryExpression', operator: '+', left: d[0], right: d[4] }) %}
| Call {% id %}
Call -> Member ( %lparen _ (ArgumentList):? _ %rparen ):? {%
(d) => { (d) => {
// For now, only allow '_' as the function name if (d[1]) { // It is a function call
if (d[0].value !== '_') { return {
throw new Error("Only the '_' function is supported for output."); type: 'CallExpression',
callee: d[0],
arguments: d[1][2] || []
} }
return { type: 'OutputCall', arguments: [d[3]] } }
return d[0]; // Not a call, just a Member/Primary expression
} }
%} %}
# Expressions (very simple for now) Member -> Primary ( %dot %identifier ):* {%
Expression -> Literal {% id %} (d) => {
let obj = d[0];
for (const part of d[1]) {
obj = {
type: 'MemberExpression',
object: obj,
property: { type: 'Identifier', name: part[1].value }
}
}
return obj;
}
%}
Primary -> Literal {% id %}
| Block {% id %}
| Identifier {% id %}
| %lparen _ Expression _ %rparen {% (d) => d[2] %}
ArgumentList -> Expression ( _ %comma _ Expression):* {%
(d) => [d[0], ...d[1].map(m => m[3])]
%}
Identifier -> %identifier {% d => ({ type: 'Identifier', name: d[0].value }) %}
Literal -> %number {% d => ({ type: 'NumericLiteral', value: Number(d[0].value) }) %} Literal -> %number {% d => ({ type: 'NumericLiteral', value: Number(d[0].value) }) %}
| %string {% d => ({ type: 'StringLiteral', value: d[0].value }) %} | %string {% d => ({ type: 'StringLiteral', value: d[0].value }) %}
# Whitespace and Newlines Block -> %lbrace _ (KeyValuePairs):? _ %rbrace {%
(d) => ({ type: 'Block', properties: d[2] || [] })
%}
KeyValuePairs -> KeyValuePair (_NL KeyValuePair):* {%
(d) => [d[0], ...d[1].map(m => m[1])]
%}
KeyValuePair -> %identifier _ %colon _ Expression {%
(d) => ({
type: 'Property',
key: d[0].value,
value: d[4]
})
%}
_ -> (%ws | %comment):* {% () => null %} _ -> (%ws | %comment):* {% () => null %}
_NL -> (_ %nl):+ {% () => null %} _NL -> (_ %nl):+ {% () => null %}