mirror of
https://github.com/hi-language/transpiler.git
synced 2026-01-14 08:38:37 +00:00
Feat: Implement robust AST parser for Hi language
This commit is contained in:
83
grammar.ne
83
grammar.ne
@@ -5,40 +5,42 @@ const lexer = moo.compile({
|
|||||||
ws: /[ \t]+/,
|
ws: /[ \t]+/,
|
||||||
nl: { match: /\n/, lineBreaks: true },
|
nl: { match: /\n/, lineBreaks: true },
|
||||||
comment: /\/\/.*?$/,
|
comment: /\/\/.*?$/,
|
||||||
number: /0|[1-9][0-9]*/,
|
number: /0|[1-9][0-9]*(\.[0-9]+)?/,
|
||||||
string: /"(?:\\["\\]|[^\n"\\])*"/,
|
string: /"(?:\\["\\]|[^\n"\\])*"/,
|
||||||
lbrace: '{',
|
lbrace: '{',
|
||||||
rbrace: '}',
|
rbrace: '}',
|
||||||
identifier: /[a-zA-Z_][a-zA-Z0-9_]*/,
|
|
||||||
colon: ':',
|
|
||||||
eq: '=',
|
|
||||||
lparen: '(',
|
lparen: '(',
|
||||||
rparen: ')',
|
rparen: ')',
|
||||||
|
dot: '.',
|
||||||
|
plus: '+',
|
||||||
|
comma: ',',
|
||||||
|
colon: ':',
|
||||||
|
eq: '=',
|
||||||
|
identifier: /[a-zA-Z_][a-zA-Z0-9_]*/,
|
||||||
});
|
});
|
||||||
%}
|
%}
|
||||||
|
|
||||||
@lexer lexer
|
@lexer lexer
|
||||||
|
|
||||||
# Main entry point: a program is a series of statements
|
|
||||||
Program -> _ Statements _ {% ([,,stmts]) => ({ type: 'Program', body: stmts }) %}
|
Program -> _ Statements _ {% ([,,stmts]) => ({ type: 'Program', body: stmts }) %}
|
||||||
|
|
||||||
# Statements are separated by newlines
|
|
||||||
Statements -> Statement (_NL Statement):* _ {%
|
Statements -> Statement (_NL Statement):* _ {%
|
||||||
(d) => {
|
(d) => {
|
||||||
const stmts = [d[0]];
|
const stmts = [d[0]];
|
||||||
for (const rest of d[1]) {
|
for (const rest of d[1]) {
|
||||||
stmts.push(rest[1]);
|
stmts.push(rest[1]);
|
||||||
}
|
}
|
||||||
return stmts.filter(s => s !== null); // Filter out empty lines
|
return stmts.filter(s => s !== null && s.type !== 'Comment');
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|
||||||
# A statement can be a declaration, assignment, or expression
|
|
||||||
Statement -> Declaration {% id %}
|
Statement -> Declaration {% id %}
|
||||||
| Assignment {% id %}
|
| Assignment {% id %}
|
||||||
| OutputCall {% id %}
|
| ExpressionStatement {% id %}
|
||||||
| Comment {% id %}
|
| Comment {% id %}
|
||||||
|
|
||||||
|
ExpressionStatement -> Expression {% (d) => ({ type: 'ExpressionStatement', expression: d[0] }) %}
|
||||||
|
|
||||||
Comment -> %comment {% d => ({ type: 'Comment', value: d[0].value }) %}
|
Comment -> %comment {% d => ({ type: 'Comment', value: d[0].value }) %}
|
||||||
|
|
||||||
Declaration -> %identifier _ %colon _ Expression {%
|
Declaration -> %identifier _ %colon _ Expression {%
|
||||||
@@ -57,23 +59,68 @@ Assignment -> %identifier _ %eq _ Expression {%
|
|||||||
})
|
})
|
||||||
%}
|
%}
|
||||||
|
|
||||||
OutputCall -> %identifier %lparen _ Expression _ %rparen {%
|
# Expression parsing hierarchy to handle operator precedence
|
||||||
|
Expression -> Additive {% id %}
|
||||||
|
|
||||||
|
Additive -> Additive _ %plus _ Call {% (d) => ({ type: 'BinaryExpression', operator: '+', left: d[0], right: d[4] }) %}
|
||||||
|
| Call {% id %}
|
||||||
|
|
||||||
|
Call -> Member ( %lparen _ (ArgumentList):? _ %rparen ):? {%
|
||||||
(d) => {
|
(d) => {
|
||||||
// For now, only allow '_' as the function name
|
if (d[1]) { // It is a function call
|
||||||
if (d[0].value !== '_') {
|
return {
|
||||||
throw new Error("Only the '_' function is supported for output.");
|
type: 'CallExpression',
|
||||||
|
callee: d[0],
|
||||||
|
arguments: d[1][2] || []
|
||||||
}
|
}
|
||||||
return { type: 'OutputCall', arguments: [d[3]] }
|
}
|
||||||
|
return d[0]; // Not a call, just a Member/Primary expression
|
||||||
}
|
}
|
||||||
%}
|
%}
|
||||||
|
|
||||||
# Expressions (very simple for now)
|
Member -> Primary ( %dot %identifier ):* {%
|
||||||
Expression -> Literal {% id %}
|
(d) => {
|
||||||
|
let obj = d[0];
|
||||||
|
for (const part of d[1]) {
|
||||||
|
obj = {
|
||||||
|
type: 'MemberExpression',
|
||||||
|
object: obj,
|
||||||
|
property: { type: 'Identifier', name: part[1].value }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
%}
|
||||||
|
|
||||||
|
Primary -> Literal {% id %}
|
||||||
|
| Block {% id %}
|
||||||
|
| Identifier {% id %}
|
||||||
|
| %lparen _ Expression _ %rparen {% (d) => d[2] %}
|
||||||
|
|
||||||
|
ArgumentList -> Expression ( _ %comma _ Expression):* {%
|
||||||
|
(d) => [d[0], ...d[1].map(m => m[3])]
|
||||||
|
%}
|
||||||
|
|
||||||
|
Identifier -> %identifier {% d => ({ type: 'Identifier', name: d[0].value }) %}
|
||||||
|
|
||||||
Literal -> %number {% d => ({ type: 'NumericLiteral', value: Number(d[0].value) }) %}
|
Literal -> %number {% d => ({ type: 'NumericLiteral', value: Number(d[0].value) }) %}
|
||||||
| %string {% d => ({ type: 'StringLiteral', value: d[0].value }) %}
|
| %string {% d => ({ type: 'StringLiteral', value: d[0].value }) %}
|
||||||
|
|
||||||
# Whitespace and Newlines
|
Block -> %lbrace _ (KeyValuePairs):? _ %rbrace {%
|
||||||
|
(d) => ({ type: 'Block', properties: d[2] || [] })
|
||||||
|
%}
|
||||||
|
|
||||||
|
KeyValuePairs -> KeyValuePair (_NL KeyValuePair):* {%
|
||||||
|
(d) => [d[0], ...d[1].map(m => m[1])]
|
||||||
|
%}
|
||||||
|
|
||||||
|
KeyValuePair -> %identifier _ %colon _ Expression {%
|
||||||
|
(d) => ({
|
||||||
|
type: 'Property',
|
||||||
|
key: d[0].value,
|
||||||
|
value: d[4]
|
||||||
|
})
|
||||||
|
%}
|
||||||
|
|
||||||
_ -> (%ws | %comment):* {% () => null %}
|
_ -> (%ws | %comment):* {% () => null %}
|
||||||
_NL -> (_ %nl):+ {% () => null %}
|
_NL -> (_ %nl):+ {% () => null %}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user