Gymbo
parser.h
Go to the documentation of this file.
1 
7 #pragma once
8 #include <vector>
9 
10 #include "tokenizer.h"
11 
15 char LETTER_EQ[] = "==";
16 
20 char LETTER_NEQ[] = "!=";
21 
25 char LETTER_LQ[] = "<";
26 
30 char LETTER_LEQ[] = "<=";
31 
35 char LETTER_GQ[] = ">";
36 
40 char LETTER_GEQ[] = ">=";
41 
45 char LETTER_PLUS[] = "+";
46 
50 char LETTER_MINUS[] = "-";
51 
55 char LETTER_MUL[] = "*";
56 
60 char LETTER_DIV[] = "/";
61 
65 char LETTER_AND[] = "&&";
66 
70 char LETTER_OR[] = "||";
71 
75 char LETTER_NOT[] = "!";
76 
80 char LETTER_LP[] = "(";
81 
85 char LETTER_RP[] = ")";
86 
90 char LETTER_SC[] = ";";
91 
95 char LETTER_ELSE[] = "else";
96 
100 char LETTER_LB[] = "{";
101 
105 char LETTER_RB[] = "}";
106 
107 namespace gymbo {
108 
113 typedef enum {
114  ND_ADD, // +
115  ND_SUB, // -
116  ND_MUL, // *
117  ND_DIV, // /
118  ND_AND, // &&
119  ND_OR, // ||
120  ND_NOT, // !
121  ND_EQ, // ==
122  ND_NE, // !=
123  ND_LT, // <
124  ND_LE, // <=
127  ND_NUM, // Integer
132 } NodeKind;
133 
137 struct Node {
144  std::vector<Node *> blocks;
145  float val;
146  int offset;
147 };
148 
149 Node *assign(Token *&token, char *user_input);
150 Node *expr(Token *&token, char *user_input);
151 Node *equality(Token *&token, char *user_input);
152 Node *relational(Token *&token, char *user_input);
153 Node *logical(Token *&token, char *user_input);
154 Node *add(Token *&token, char *user_input);
155 Node *mul(Token *&token, char *user_input);
156 Node *unary(Token *&token, char *user_input);
157 Node *primary(Token *&token, char *user_input);
158 Node *stmt(Token *&token, char *user_input);
159 
167  Node *node = new Node();
168  node->kind = kind;
169  return node;
170 }
171 
181 Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) {
182  Node *node = new_node(kind);
183  node->lhs = lhs;
184  node->rhs = rhs;
185  return node;
186 }
187 
195 Node *new_num(float val) {
196  Node *node = new_node(ND_NUM);
197  node->val = val;
198  return node;
199 }
200 
210 Node *expr(Token *&token, char *user_input) {
211  return assign(token, user_input);
212 }
213 
223 Node *assign(Token *&token, char *user_input) {
224  char LETTER_ASS[] = "=";
225  Node *node = logical(token, user_input);
226  if (consume(token, LETTER_ASS))
227  node = new_binary(ND_ASSIGN, node, assign(token, user_input));
228  return node;
229 }
230 
240 Node *logical(Token *&token, char *user_input) {
241  Node *node = equality(token, user_input);
242 
243  for (;;) {
244  if (consume(token, LETTER_AND))
245  node = new_binary(ND_AND, node, equality(token, user_input));
246  else if (consume(token, LETTER_OR))
247  node = new_binary(ND_OR, node, equality(token, user_input));
248  else
249  return node;
250  }
251 }
252 
262 Node *equality(Token *&token, char *user_input) {
263  Node *node = relational(token, user_input);
264 
265  for (;;) {
266  if (consume(token, LETTER_EQ))
267  node = new_binary(ND_EQ, node, relational(token, user_input));
268  else if (consume(token, LETTER_NEQ))
269  node = new_binary(ND_NE, node, relational(token, user_input));
270  else
271  return node;
272  }
273 }
274 
284 Node *relational(Token *&token, char *user_input) {
285  Node *node = add(token, user_input);
286 
287  for (;;) {
288  if (consume(token, LETTER_LQ))
289  node = new_binary(ND_LT, node, add(token, user_input));
290  else if (consume(token, LETTER_LEQ))
291  node = new_binary(ND_LE, node, add(token, user_input));
292  else if (consume(token, LETTER_GQ))
293  node = new_binary(ND_LT, add(token, user_input), node);
294  else if (consume(token, LETTER_GEQ))
295  node = new_binary(ND_LE, add(token, user_input), node);
296  else
297  return node;
298  }
299 }
300 
310 inline Node *add(Token *&token, char *user_input) {
311  Node *node = mul(token, user_input);
312 
313  for (;;) {
314  if (consume(token, LETTER_PLUS))
315  node = new_binary(ND_ADD, node, mul(token, user_input));
316  else if (consume(token, LETTER_MINUS))
317  node = new_binary(ND_SUB, node, mul(token, user_input));
318  else
319  return node;
320  }
321 }
322 
333 inline Node *mul(Token *&token, char *user_input) {
334  Node *node = unary(token, user_input);
335 
336  for (;;) {
337  if (consume(token, LETTER_MUL))
338  node = new_binary(ND_MUL, node, unary(token, user_input));
339  else if (consume(token, LETTER_DIV))
340  node = new_binary(ND_DIV, node, unary(token, user_input));
341  else
342  return node;
343  }
344 }
345 
356 inline Node *unary(Token *&token, char *user_input) {
357  if (consume(token, LETTER_PLUS)) return unary(token, user_input);
358  if (consume(token, LETTER_MINUS))
359  return new_binary(ND_SUB, new_num(0), unary(token, user_input));
360  return primary(token, user_input);
361 }
362 
372 inline Node *primary(Token *&token, char *user_input) {
373  if (consume(token, LETTER_LP)) {
374  Node *node = expr(token, user_input);
375  expect(token, user_input, LETTER_RP);
376  return node;
377  }
378 
379  Token *tok = consume_ident(token);
380  if (tok) {
381  Node *node = (Node *)std::calloc(1, sizeof(Node));
382  node->kind = ND_LVAR;
383  node->offset = tok->var_id;
384  return node;
385  }
386 
387  return new_num(expect_number(token, user_input));
388 }
389 
397 inline Node *stmt(Token *&token, char *user_input) {
398  Node *node;
399 
400  if (consume(token, LETTER_LB)) {
401  node = new Node();
402  node->kind = ND_BLOCK;
403  while (true) {
404  node->blocks.emplace_back(stmt(token, user_input));
405  if (consume(token, LETTER_RB)) {
406  break;
407  }
408  }
409  } else if (consume_tok(token, TOKEN_RETURN)) {
410  node = new Node();
411  node->kind = ND_RETURN;
412  node->lhs = expr(token, user_input);
413  expect(token, user_input, LETTER_SC);
414  } else if (consume_tok(token, TOKEN_IF)) {
415  node = new Node();
416  node->kind = ND_IF;
417  expect(token, user_input, LETTER_LP);
418  node->cond = expr(token, user_input);
419  expect(token, user_input, LETTER_RP);
420  node->then = stmt(token, user_input);
421  if (consume_tok(token, TOKEN_ELSE)) {
422  node->els = stmt(token, user_input);
423  }
424  } else {
425  node = expr(token, user_input);
426  expect(token, user_input, LETTER_SC);
427  }
428 
429  return node;
430 }
431 
439 inline void generate_ast(Token *&token, char *user_input,
440  std::vector<Node *> &code) {
441  while (!at_eof(token)) code.emplace_back(stmt(token, user_input));
442  code.emplace_back(nullptr);
443 }
444 } // namespace gymbo
Definition: compiler.h:11
Node * stmt(Token *&token, char *user_input)
Parse and construct an AST node representing a statement.
Definition: parser.h:397
bool consume_tok(Token *&token, TokenKind tok)
Consumes the current token if it matches tok.
Definition: tokenizer.h:120
NodeKind
Enumeration representing different node kinds in the Abstract Syntax Tree (AST).
Definition: parser.h:113
@ ND_BLOCK
Definition: parser.h:131
@ ND_MUL
Definition: parser.h:116
@ ND_DIV
Definition: parser.h:117
@ ND_NUM
Definition: parser.h:127
@ ND_SUB
Definition: parser.h:115
@ ND_ASSIGN
Definition: parser.h:125
@ ND_LVAR
Definition: parser.h:126
@ ND_RETURN
Definition: parser.h:128
@ ND_EQ
Definition: parser.h:121
@ ND_NE
Definition: parser.h:122
@ ND_ADD
Definition: parser.h:114
@ ND_LT
Definition: parser.h:123
@ ND_OR
Definition: parser.h:119
@ ND_LE
Definition: parser.h:124
@ ND_IF
Definition: parser.h:129
@ ND_AND
Definition: parser.h:118
@ ND_FOR
Definition: parser.h:130
@ ND_NOT
Definition: parser.h:120
Node * new_binary(NodeKind kind, Node *lhs, Node *rhs)
Create a new binary AST node with the given kind, left-hand side, and right-hand side.
Definition: parser.h:181
Node * new_node(NodeKind kind)
Create a new AST node with the given kind.
Definition: parser.h:166
bool consume(Token *&token, char *op)
Consumes the current token if it matches op.
Definition: tokenizer.h:105
Node * assign(Token *&token, char *user_input)
Parses an assignment statement from a C-like language program.
Definition: parser.h:223
bool at_eof(Token *token)
Checks if the current token is at the end of the program.
Definition: tokenizer.h:183
Node * mul(Token *&token, char *user_input)
Parse and construct an AST node representing multiplication or division.
Definition: parser.h:333
void generate_ast(Token *&token, char *user_input, std::vector< Node * > &code)
Parses a C-like language program into an AST.
Definition: parser.h:439
Node * relational(Token *&token, char *user_input)
Parses a relational expression from a C-like language program.
Definition: parser.h:284
Node * add(Token *&token, char *user_input)
Parse and construct an AST node representing addition or subtraction.
Definition: parser.h:310
float expect_number(Token *&token, char *user_input)
Ensures that the current token is a number.
Definition: tokenizer.h:166
@ TOKEN_ELSE
Token representing the 'else' keyword.
Definition: tokenizer.h:79
@ TOKEN_RETURN
Token representing the 'return' keyword.
Definition: tokenizer.h:77
@ TOKEN_IF
Token representing the 'if' keyword.
Definition: tokenizer.h:78
Node * equality(Token *&token, char *user_input)
Parses an equality expression from a C-like language program.
Definition: parser.h:262
Node * expr(Token *&token, char *user_input)
Parses an expression from a C-like language program.
Definition: parser.h:210
Node * primary(Token *&token, char *user_input)
Parse and construct an AST node representing primary expressions.
Definition: parser.h:372
Node * unary(Token *&token, char *user_input)
Parse and construct an AST node representing unary operations.
Definition: parser.h:356
Node * new_num(float val)
Create a new AST node representing a numeric value with the given value.
Definition: parser.h:195
void expect(Token *&token, char *user_input, char *op)
Ensures that the current token matches op.
Definition: tokenizer.h:150
Token * consume_ident(Token *&token)
Consumes the current token if it is an identifier.
Definition: tokenizer.h:135
Node * logical(Token *&token, char *user_input)
Parses a logical expression from a C-like language program.
Definition: parser.h:240
char LETTER_NEQ[]
Array representing the inequality operator "!=".
Definition: parser.h:20
char LETTER_OR[]
Array representing the logical OR operator "||".
Definition: parser.h:70
char LETTER_LEQ[]
Array representing the less than or equal to operator "<=".
Definition: parser.h:30
char LETTER_RB[]
Array representing the right brace "}".
Definition: parser.h:105
char LETTER_DIV[]
Array representing the division operator "/".
Definition: parser.h:60
char LETTER_NOT[]
Array representing the logical NOT operator "!".
Definition: parser.h:75
char LETTER_LQ[]
Array representing the less than operator "<".
Definition: parser.h:25
char LETTER_EQ[]
Array representing the equality operator "==".
Definition: parser.h:15
char LETTER_LB[]
Array representing the left brace "{".
Definition: parser.h:100
char LETTER_MINUS[]
Array representing the subtraction operator "-".
Definition: parser.h:50
char LETTER_RP[]
Array representing the right parenthesis ")".
Definition: parser.h:85
char LETTER_ELSE[]
Array representing the keyword "else".
Definition: parser.h:95
char LETTER_GQ[]
Array representing the greater than operator ">".
Definition: parser.h:35
char LETTER_MUL[]
Array representing the multiplication operator "*".
Definition: parser.h:55
char LETTER_LP[]
Array representing the left parenthesis "(".
Definition: parser.h:80
char LETTER_PLUS[]
Array representing the addition operator "+".
Definition: parser.h:45
char LETTER_GEQ[]
Array representing the greater than or equal to operator ">=".
Definition: parser.h:40
char LETTER_SC[]
Array representing the semicolon ";".
Definition: parser.h:90
char LETTER_AND[]
Array representing the logical AND operator "&&".
Definition: parser.h:65
Structure representing a node in the Abstract Syntax Tree (AST).
Definition: parser.h:137
Node * lhs
Left-hand side.
Definition: parser.h:139
std::vector< Node * > blocks
Vector of child blocks.
Definition: parser.h:144
int offset
Offset.
Definition: parser.h:146
NodeKind kind
Node kind.
Definition: parser.h:138
Node * cond
Condition.
Definition: parser.h:141
float val
Used if kind is ND_NUM.
Definition: parser.h:145
Node * rhs
Right-hand side.
Definition: parser.h:140
Node * els
'Else' branch
Definition: parser.h:143
Node * then
'Then' branch
Definition: parser.h:142
Structure representing a token.
Definition: tokenizer.h:89
int var_id
Variable ID.
Definition: tokenizer.h:95
Implementation of tokenizer.