From b6e5858f06ca86d6e6dd0ebd2e1f18ee07f82493 Mon Sep 17 00:00:00 2001 From: Euryn Date: Sat, 6 Jan 2024 01:03:57 +0100 Subject: [PATCH] finished documentation for interpreter --- docs/technical-documentation/index.md | 5 +- .../interpreter/01-lexer.md | 266 ++--- .../interpreter/02-parser.md | 1030 ++++++++++++++--- 3 files changed, 1001 insertions(+), 300 deletions(-) diff --git a/docs/technical-documentation/index.md b/docs/technical-documentation/index.md index 62959be..66e3b30 100644 --- a/docs/technical-documentation/index.md +++ b/docs/technical-documentation/index.md @@ -3,8 +3,9 @@ This section discusses the implementation of the project. It is written entirely in [TypeScript](https://www.typescriptlang.org/) with [React](https://react.dev/) and bundled together using [vite](https://vitejs.dev/). -In my persective, the project consists of three major parts. -The interpreter and the renderer belong to the business logic, while the frontend belongs to the application logic. +In my persective, the project consists of three major parts: the interpreter,the renderer, and the frontend. + +The interpreter and the renderer belong to the business logic, abd the frontend belongs to the application logic. ## Interpreter diff --git a/docs/technical-documentation/interpreter/01-lexer.md b/docs/technical-documentation/interpreter/01-lexer.md index 7e46035..55d1705 100644 --- a/docs/technical-documentation/interpreter/01-lexer.md +++ b/docs/technical-documentation/interpreter/01-lexer.md @@ -1,12 +1,10 @@ # Lexer -The lexer tokenizes the user input. -Its implementation can be found at the [lexer](https://github.com/Eurydia/project-nassi-shneiderman-diagram-builder-online/blob/1bf484c9082dc5ea0fcfc6cf37121d273f7831b5/src/interpreter/lexer.ts) module. +The primary purpose of this module is to convert a string into a sequence of tokens. -The interface of the module is as follows: +Here is an interface of the module: ```ts -// src/interpreter/lexer.ts export enum TokenKind {} export type Token; @@ -17,8 +15,7 @@ export type Lexer; export const lexerInit: (content: string) => Lexer; export const lexerGetAllTokens: (lexer: Lexer) => Token[]; -const lexerSafeGetNextTokenThenAdvance: (lexer: Lexer) => Token; -const lexerTrimLeft: (lexer: Lexer) => void; +const lexerGetNextTokenThenAdvance: (lexer: Lexer) => Token; ``` ## `TokenKind` enum @@ -34,12 +31,21 @@ export enum TokenKind { LEFT_CURLY, RIGHT_CURLY, SEMICOLON, + + WHITE_SPACE, } ``` -The `TokenKind` enum represents the type of a token. +The `TokenKind` enumeration categorize the different types of tokens that a lexer recognizes. +Each token is assigned a `TokenKind` to denote its type. -The purpose of each member is discussed later in parts of this section. +- `END`: represents the end of the input. When a lexer encounters this token, it knows there are no more tokens to process. +- `SYMBOL`: represents any piece of text. +- `KEYWORD`: represents keywords, like `if`, `for`, `while`, etc. +- `LEFT_PAREN` and `RIGHT_PAREN`: represent left and right parentheses (`(...)`), respectively. +- `LEFT_CURLY` and `RIGHT_CURLY`: represent left and right curly braces (`{...}`), respectively. +- `SEMICOLON`: represents the semicolon (`;`) used at the end of statements. +- `WHITE_SPACE`: represents any whitespace characters, like spaces, tabs, or newlines. ## `Token` type @@ -50,13 +56,13 @@ export type Token = { }; ``` -The `Token` type represents a token. +The `Token` type represents a token in a string. +A token is a meaningful sequence of characters, such as a keyword or a delimiter. -The `kind` property represents which kind of token it is. +A token is an object with two properties: -The `text` property represents the piece of text that made up the token. -Since the lexer removes whitespace characters, this property will be non-empty. -The only exception is `TokenKind.END` tokens which has an empty `text` property. +- `kind`: represents a `TokenKind`, which is an enumeration of the different types of tokens that can be recognized. +- `text`: represents the actual text of the token. ## `KEYWORDS` array @@ -70,10 +76,10 @@ const KEYWORDS: string[] = [ ]; ``` -The `KEYWORD` array stores reserved keywords. +The `KEYWORDS` array defines a list of keywords for the lexer. -I added it reduce code duplication in [lexerSafeGetNextTokenThenAdvance](#lexersafegetnexttokenthenadvance-function) function. -In the future, if I want to introduce additional keywords, I can add them to this array. +During tokenization, a lexer checks if each word in the string is in the `KEYWORDS` array. +If it is, the lexer creates a `Token` with a `kind` of `TokenKind.KEYWORD`. ## `LITERAL_TOKENS` record @@ -88,9 +94,10 @@ const LITERAL_TOKENS: Record = }; ``` -The `LITERAL_TOKENS` record maps each literal token to their appropriate [kind](#tokenkind-enum). +The `LITERAL_TOKENS` object defines a mapping of literal characters to their corresponding token kinds. -Similar to the [KEYWORDS](#keywords-array) array, I added it to reduce code duplication in [lexerSafeGetNextTokenThenAdvance](#lexersafegetnexttokenthenadvance-function) function. +During tokenization, a lexer checks if each character in the string is a key in the `LITERAL_TOKENS` object. +If it is, the lexer creates a `Token` with a `kind` that corresponds to the value of that key in the `LITERAL_TOKENS` object. ## `Lexer` type @@ -102,18 +109,13 @@ export type Lexer = { }; ``` -The `Lexer` type represents a lexer. - -Lexers are initialized with [lexerInit](#lexerinit-function) function. -After a lexer is initialized, use [lexerGetAllTokens](#lexergetalltokens-function) to obtain a list of [token](#token-type). +The `Lexer` type defines the structure of a lexer object. -The `content` property stores content of a particular lexer. -Though the property is not explicitly immutable, a lexer does not change manipulate it directly. +A lexer is an object with three properties: -The `contentLength` property stores the length of `content` during initialization of a lexer. -I added this property so I do not have to keep using [length](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length) property. - -The `cursorPos` property stores the current position of the cursor. +- `content` represents a string which is being tokenize. +- `contentLength` that represents the length of the `content` string. This is useful for when a lexer needs to check if it has reached the end of the `content` string. +- `cursorPos` represents the current position of the cursor in the `content` string. This is used to keep track of where a lexer is in the `content` string during tokenization. ## `lexerInit` function @@ -129,83 +131,20 @@ export const lexerInit = ( }; ``` -The `lexerInit` function initializes a [lexer](#lexer-type) object from user input. - -Calling [normalize](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize) on user input is quite important due to the way accented characters behave. - -## `lexerGetAllTokens` function - -```ts -export const lexerGetAllTokens = ( - l: Lexer, -): Token[] => { - const tokens: Token[] = []; - let token: Token; - while ( - (token = lexerSafeGetNextTokenThenAdvance(l)) - .kind !== TokenKind.END - ) { - tokens.push(token); - } - return tokens; -}; -``` - -The `lexerGetAllTokens` function takes a [lexer](#lexer-type) and return a list of [tokens](#token-type) left in it. - -I added this function to simplify the interface. -The real work is done by [lexerSafeGetNextTokenThenAdvance](#lexersafegetnexttokenthenadvance-function) function. - -The `tokens` array collects the tokens. -The function returns this array back to the caller. - -The "magic" part about this function lies in following snippet, so let me clarify what happens. - -```ts -let token: Token; -while ( - (token = lexerSafeGetNextTokenThenAdvance(l)) - .kind !== TokenKind.END -) {} -``` - -Outside of the while loop, it declares a variable of type [token](#token-type). - -```ts -let token: Token; -``` - -Then, in a condition, it invokes [lexerSafeGetNextTokenThenAdvance](#lexersafegetnexttokenthenadvance-function) which returns a token. -It assign the returned token to the declared variable. +The `lexerInit` function initializes a new `Lexer` object with a given string. +It is used to prepare a `Lexer` object for the tokenization process. -```ts -token = lexerSafeGetNextTokenThenAdvance(l); -``` +The function takes a string argument `content`. +It returns a `Lexer` object with the `content` property set to the normalized version of the `content` string, the `contentLength` property set to the length of the normalized `content` string, and the `cursorPos` property set to 0. -In JavaScript, the [assignment](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Assignment) operation has a return value which is the right-hand side of the equal sign. +Calling [normalize](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize) on user input is quite important due to the way [accented characters](https://stackoverflow.com/questions/63013552/whats-the-point-of-string-normalize) behave. -It can access the `kind` property of the returned token. +## `lexerGetNextTokenThenAdvance` function ```ts -(token = lexerSafeGetNextTokenThenAdvance(l)) - .kind; -``` - -The loop terminates once the returned token is a `TokenKind.END` token. - -```ts -(token = lexerSafeGetNextTokenThenAdvance(l)) - .kind !== TokenKind.END; -``` - -## `lexerSafeGetNextTokenThenAdvance` function - -```ts -export const lexerSafeGetNextTokenThenAdvance = ( +export const lexerGetNextTokenThenAdvance = ( l: Lexer, ): Token => { - lexerTrimLeft(l); - const token = { kind: TokenKind.END, text: "", @@ -215,7 +154,15 @@ export const lexerSafeGetNextTokenThenAdvance = ( return token; } + if (/\s/.test(l.content[l.cursorPos])) { + token["kind"] = TokenKind.WHITE_SPACE; + token["text"] = l.content[l.cursorPos]; + l.cursorPos++; + return token; + } + token["text"] = l.content[l.cursorPos]; + l.cursorPos++; if (token["text"] in LITERAL_TOKENS) { token["kind"] = LITERAL_TOKENS[token["text"]]; @@ -241,56 +188,61 @@ export const lexerSafeGetNextTokenThenAdvance = ( }; ``` -The `lexerSafeGetNextTokenThenAdvance` function returns a [token](#token-type). +The `lexerGetNextTokenThenAdvance` function tokenize the next token from a `Lexer` object and advance the cursor position. +It is a key part of the tokenization process. -This function does a majority of the work, but the idea is that it tokenize the user input and return a token. -If it has completely tokenize a lexer, it returns a `TokenKind.END` token. +The function takes a `Lexer` object as an argument and returns a `Token` object. -First, it calls [lexerTrimLeft](#lexertrimleft-function) to skip all leading whitespace characters. -This will move the cursor to a non-whitespace character or the end of user input. +The breakdown of the function is as follows: -```ts -lexerTrimLeft(l); -``` - -Then, it prepares a placeholder token with `TokenKind.END` and empty string. +First, it starts by creating a `Token` object with a `kind` of `TokenKind.END` and an empty `text` string. +If the cursor position in the `Lexer` object is at or beyond the end of the content string, it returns this `Token` object as is. ```ts +// ... const token = { kind: TokenKind.END, text: "", }; + +if (l.cursorPos >= l.contentLength) { + return token; +} +// ... ``` -If the cursor would be out of bound, it returns the placeholder token. +If the character at the cursor position is a whitespace character, it sets the `kind` of the `Token` object to `TokenKind.WHITE_SPACE`, sets the `text` of the `Token` object to the whitespace character, increments the cursor position, and returns the `Token` object. ```ts -if (l.cursorPos >= l.contentLength) { +// ... +if (/\s/.test(l.content[l.cursorPos])) { + token["kind"] = TokenKind.WHITE_SPACE; + token["text"] = l.content[l.cursorPos]; + l.cursorPos++; return token; } +// ... ``` -Otherwise, there is at least one character left to be tokenize. -It consumes the current character and advances the cursor. +Otherwise, it sets the `text` of the `Token` object to the character at the cursor position and increments the cursor position. +If the character is a key in the `LITERAL_TOKENS` object, it sets the `kind` of the `Token` object to the corresponding value in the `LITERAL_TOKENS` object and returns the `Token` object. ```ts +// ... token["text"] = l.content[l.cursorPos]; l.cursorPos++; -``` - -It checks whether the character is one of the [LITERAL_TOKENS](#literal_tokens-record) or not. -If the token should be interpreted as a literal token, it sets the `kind` property to the appropriate value, and returns the token. -```ts if (token["text"] in LITERAL_TOKENS) { token["kind"] = LITERAL_TOKENS[token["text"]]; return token; } +// ... ``` -If it is not a literal token, it collects the rest of the characters until it reaches a whitespace character or a literal token. +If the character is not a key in the `LITERAL_TOKENS` object, it continues to add the following characters to the `text` of the `Token` object and increment the cursor position until it reaches a character that is a key in the `LITERAL_TOKENS` object or a whitespace character, or until it reaches the end of the content string. ```ts +// ... while ( l.cursorPos < l.contentLength && !(l.content[l.cursorPos] in LITERAL_TOKENS) && @@ -299,56 +251,72 @@ while ( token["text"] += l.content[l.cursorPos]; l.cursorPos++; } +// ... ``` -The first condition prevents over-indexing. - -```ts -l.cursorPos < l.contentLength; -``` - -The second condition terminates the loop if it encounters a literal token. - -```ts -!(l.content[l.cursorPos] in LITERAL_TOKENS); -``` - -The third condition terminates the loop if it encounters a whitespace character. - -```ts -!/\s/.test(l.content[l.cursorPos]); -``` - -After the loop has terminated, it checks whether the collected characters form a keyword or not. -If they form a keyword, it sets the `kind` property on the token to `TokenKind.KEYWORD` and returns the token. +If the `text` of the `Token` object is a keyword, it sets the `kind` of the `Token` object to `TokenKind.KEYWORD` and returns the `Token` object. +Otherwise, it sets the `kind` of the `Token` object to `TokenKind.SYMBOL` and returns the `Token` object. ```ts +// ... if (KEYWORDS.includes(token["text"])) { token["kind"] = TokenKind.KEYWORD; return token; } -``` - -If they do not form a keyword, it sets the `kind` property on the token to `TokenKind.SYMBOL` and returns the token. -```ts token["kind"] = TokenKind.SYMBOL; return token; +// ... ``` -## `lexerTrimLeft` function +## `lexerGetAllTokens` function ```ts -const lexerTrimLeft = (l: Lexer): void => { +export const lexerGetAllTokens = ( + l: Lexer, +): Token[] => { + const tokens: Token[] = []; + let token: Token; while ( - l.cursorPos < l.contentLength && - /\s/.test(l.content[l.cursorPos]) + (token = lexerGetNextTokenThenAdvance(l)) + .kind !== TokenKind.END ) { - l.cursorPos++; + tokens.push(token); } + return tokens; }; ``` -The `lexerTrimLeft` function is a helper to [lexerSafeGetNextTokenThenAdvance](#lexersafegetnexttokenthenadvance-function) function. +The `lexerGetAllTokens` function generates a list of all tokens from a `Lexer` object. +It is used to tokenize an entire string at once. + +The function takes a `Lexer` object as an argument and returns an array of `Token` objects. + +The breakdown of the function is as follows: + +First, it starts by creating an empty array `tokens` to store the `Token` objects. + +```ts +// ... +const tokens: Token[] = []; +// ... +``` + +Then, it enters a while loop where it calls the `lexerGetNextTokenThenAdvance` function with the `Lexer` object as the argument to get the next `Token` object and advance the cursor position. +If the `kind` of the `Token` object is `TokenKind.END`, it breaks out of the while loop. +Otherwise, it pushes the `Token` object to the `tokens` array and continues the loop. + +After the while loop, it returns the `tokens` array, which contains all the `Token` objects that represent the tokens in the `Lexer` object's content string. -It consumes whitespace characters by moving the cursor forward until it reaches the first a non-whitespace character. +```ts +// ... +let token: Token; +while ( + (token = lexerGetNextTokenThenAdvance(l)) + .kind !== TokenKind.END +) { + tokens.push(token); +} +return tokens; +// ... +``` diff --git a/docs/technical-documentation/interpreter/02-parser.md b/docs/technical-documentation/interpreter/02-parser.md index 712905c..95cdda7 100644 --- a/docs/technical-documentation/interpreter/02-parser.md +++ b/docs/technical-documentation/interpreter/02-parser.md @@ -1,39 +1,42 @@ # Parser -The parser builds an [abstract syntax trees](https://en.wikipedia.org/wiki/Abstract_syntax_tree) from tokens generated by the lexer. -Its implemention can be found at the [parser](https://github.com/Eurydia/project-nassi-shneiderman-diagram-builder-online/blob/1bf484c9082dc5ea0fcfc6cf37121d273f7831b5/src/interpreter/parser.ts) module. +The primary purpose of this module is to convert a list of tokens into an abstract syntax tree. -The interface of the module is as follows: +Here is an interface of the module: ```ts -// src/interpreter/parser.ts -export enum ASTNodeKind {} +export enum NodeKind {} -export type ASTNodeEnd; -export type ASTNodeProcess; -export type ASTNodeLoopFirst; -export type ASTNodeLoopLast; -export type ASTNodeIfElse; -export type ASTNode; +export type NodeEnd; +export type NodeProcess; +export type NodeLoopFirst; +export type NodeLoopLast; +export type NodeIfElse; +export type Node; export type Parser; export const parserInit: (tokens: Token[]) => Parser; -export const parserGetAllNodes: (p: Parser) => ASTNode[]; +const parserCollectTokens: ( + p: Parser, + startToken: TokenKind, + stopToken: TokenKind +) => Token[]; +const parserSkipWhiteSpace: (p: Parser) => void; -const parserGetNextNodeThenAdvance = (p: Parser) => ASTNode -const parserSafeGetNextTokenThenAdvance: (p: Parser) => Token; +const parserBuildLoopFirstNode: (p: Parser) => NodeLoopFirst; +const parserBuildLoopLastNode: (p: Parser) => NodeLoopLast; +const parserBuildIfElseNode: (p: Parser) => NodeIfElse; -const parserBuildLoopFirstNode = (p: Parser) => ASTNodeLoopFirst; -const parserBuildLoopLastNode = (p: Parser) => ASTNodeLoopLast; -const parserBuildIfElseNode = (p: Parser) => ASTNodeIfElse; +const parserGetNextNodeThenAdvance = (p: Parser) => Node; +export const parserGetAllNodes: (p: Parser) => Node[]; ``` -## `ASTNodeKind` enum +## `NodeKind` enum ```ts -export enum ASTNodeKind { - END = 0, +export enum NodeKind { + END, PROCESS, LOOP_FIRST, LOOP_LAST, @@ -41,263 +44,992 @@ export enum ASTNodeKind { } ``` -The `ASTNodeKind` enum represents the type of an AST node. +The `NodeKind` enumeration represents different kinds of nodes. + +It has five members: `END`, `PROCESS`, `LOOP_FIRST`, `LOOP_LAST`, and `IF_ELSE`. +Each member represents a different kind of node that can appear in the AST. + +## `NodeEnd` type + +```ts +type NodeEnd = { + kind: NodeKind.END; +}; +``` + +The `NodeEnd` type represents an end node. -It represents node that the parser can build and recongize. +It is an object with a single property `kind`, which is always `NodeKind.END`. -### `ASTNodeKind.END` +## `NodeProcess` type -`ASTNodeKind.END` represents the end of an abstract syntax tree. +```ts +type NodeProcess = { + kind: NodeKind.PROCESS; + body: Token[]; +}; +``` -This node kind tells the parser to terminate. +The `NodeProcess` type represents a process node. -### `ASTNodeKind.PROCESS` +It is an object with two properties: -`ASTNodeKind.PROCESS` represents a process block. +- `kind` is always `NodeKind.PROCESS`. +- `body` is an array of `Token` objects, which represent the tokens that make up the body of the process. -### `ASTNodeKind.LOOP_FIRST` +## `NodeLoopFirst` type -`ASTNodeKind.LOOP_FIRST` represents a test-first loop block, which includes both for loops and while loops. +```ts +type NodeLoopFirst = { + kind: NodeKind.LOOP_FIRST; + condition: Token[]; + body: Node[]; +}; +``` -### `ASTNodeKind.LOOP_LAST` +The `NodeLoopFirst` type represents a loop-first node, such as a `for` or `while` loop. -`ASTNodeKind.LOOP_FIRST` represents a test-last loop block or do-while loops. +It is an object with three properties: -### `ASTNodeKind.IF_ELSE` +- `kind` is always `NodeKind.LOOP_FIRST`. +- `condition` is an array of `Token` objects, which represent the tokens that make up the condition of the loop. +- `body` is an array of `Node` objects, which represent the nodes that make up the body of the loop. -`ASTNodeKind.IF_ELSE` represents a branching block, which includes both if blocks and if-else blocks. +For example, when I encounter a node of type `NodeLoopFirst` during the interpretation process, I can evaluate the `condition` array to determine whether to execute the `body` array, simulating the execution of the loop in the original code. -## `ASTNode` type +## `NodeLoopLast` type ```ts -export type ASTNode = - | ASTNodeEnd - | ASTNodeProcess - | ASTNodeLoopFirst - | ASTNodeLoopLast - | ASTNodeIfElse; +type NodeLoopLast = { + kind: NodeKind.LOOP_LAST; + condition: Token[]; + body: Node[]; +}; ``` -The `ASTNode` type represents a node in an abstract syntax tree. +The `NodeLoopLast` type represents a loop-last node, such as a `do-while` loop. -### `ASTNodeEnd` type +It is an object with three properties: + +- `kind` is always `NodeKind.LOOP_LAST`. +- `condition` is an array of `Token` objects, which represent the tokens that make up the condition of the loop. +- `body` is an array of `Node` objects, which represent the nodes that make up the body of the loop. + +## `NodeIfElse` type ```ts -export type ASTNodeEnd = { - kind: ASTNodeKind.END; +type NodeIfElse = { + kind: NodeKind.IF_ELSE; + condition: Token[]; + bodyIf: Node[]; + bodyElse: Node[]; }; ``` -The `ASTNodeEnd` type represents the end node. +The `NodeIfElse` type represents an if-else node. -The sole purpose of this node type is to terminate the parser. +It is an object with four properties: -### `ASTNodeProcess` type +- `kind` is always `NodeKind.IF_ELSE`. +- `condition` is an array of `Token` objects, which represent the tokens that make up the condition of the `if` statement. +- `bodyIf` and `bodyElse` properties are arrays of `Node` objects, which represent the nodes that make up the `if` and `else` parts of the if-else statement, respectively. + +## `Node` type ```ts -export type ASTNodeProcess = { - kind: ASTNodeKind.PROCESS; - body: Token[]; -}; +export type Node = + | NodeEnd + | NodeProcess + | NodeLoopFirst + | NodeLoopLast + | NodeIfElse; ``` -The `ASTNodeProcess` type represents a process block. +The `Node` type represents a node. -The content of the process is stored in the `body` property. +It is a union type that can be one of five types: `NodeEnd`, `NodeProcess`, `NodeLoopFirst`, `NodeLoopLast`, or `NodeIfElse`. +Each of these types represents a different kind of node that can appear. -### `ASTNodeLoopFirst` type +## `Parser` type ```ts -export type ASTNodeLoopFirst = { - kind: ASTNodeKind.LOOP_FIRST; - condition: Token[]; - body: ASTNode[]; +export type Parser = { + tokens: Token[]; + tokenLength: number; + cursorPos: number; }; ``` -The `ASTNodeLoopFirst` type represents test-first loop blocks. +The `Parser` type represents a parser. + +A parser is an object with three properties: + +- `tokens` is an array of `Token` objects, which represent the tokens that have been parsed. +- `tokenLength` represents the total number of tokens. +- `cursorPos` represents the current position of the parser in the array of tokens. -### `ASTNodeLoopLast` type +## `parserInit` function ```ts -export type ASTNodeLoopLast = { - kind: ASTNodeKind.LOOP_LAST; - condition: Token[]; - body: ASTNode[]; +export const parserInit = ( + tokens: Token[], +): Parser => { + return { + tokens: tokens, + tokenLength: tokens.length, + cursorPos: 0, + }; }; ``` -The `ASTNodeLoopLast` type represents test-last loop blocks. +The `parserInit` function initializes a new `Parser` object with a given array of `Token` objects. -### `ASTNodeIfElse` type +The function takes an array of `Token` objects as its argument. +It returns a new `Parser` object with the `tokens` property set to the given array, the `tokenLength` property set to the length of the array, and the `cursorPos` property set to 0. + +## `parserCollectTokens` function ```ts -export type ASTNodeIfElse = { - kind: ASTNodeKind.IF_ELSE; - condition: Token[]; - bodyIf: ASTNode[]; - bodyElse: ASTNode[]; +const parserCollectTokens = ( + p: Parser, + startToken: TokenKind, + stopToken: TokenKind, +): Token[] => { + if (p.cursorPos >= p.tokenLength) { + return []; + } + if (p.tokens[p.cursorPos].kind !== startToken) { + return []; + } + p.cursorPos++; + const tokens: Token[] = []; + let depth = -1; + let token: Token; + while (p.cursorPos < p.tokenLength) { + token = p.tokens[p.cursorPos]; + p.cursorPos++; + if (token.kind === startToken) { + depth--; + } + if (token.kind === stopToken) { + depth++; + } + if (depth === 0) { + break; + } + tokens.push(token); + } + return tokens; }; ``` -The `ASTNodeIfElse` type represents if blocks and if-else blocks. +The `parserCollectTokens` function collects a sequence of tokens from a `Parser` object between a specified start token and stop token. +It is particularly useful for parsing constructs that are enclosed by specific tokens and keeps the code DRY. -## `Parser` type +The function takes three arguments: a `Parser` object, a start token, and a stop token. +It returns an array of `Token` objects that were found between the start token and the stop token. + +The breakdown of the function is as follows: + +First, it checks if the parser's cursor position is beyond the length of the tokens array or if the current token is not the start token. +If either of these conditions is true, it returns an empty array. ```ts -export type Parser = { - tokens: Token[]; - tokenLength: number; - cursorPos: number; +// ... +if (p.cursorPos >= p.tokenLength) { + return []; +} +if (p.tokens[p.cursorPos].kind !== startToken) { + return []; +} +//... +``` + +Next, it increments the cursor position and initializes an empty array `tokens` to collect the tokens and a variable `depth` to keep track of nested tokens of the same kind as the start and stop tokens. + +```ts +// ... +p.cursorPos++; +const tokens: Token[] = []; +let depth = -1; +let token: Token; +// ... +``` + +Then, it enters a loop where it continues to collect tokens until it encounters the stop token at the same nesting level as the start token or until it has processed all the tokens. + +```ts +// ... +while (p.cursorPos < p.tokenLength) { + token = p.tokens[p.cursorPos]; + p.cursorPos++; + if (token.kind === startToken) { + depth--; + } + if (token.kind === stopToken) { + depth++; + } + if (depth === 0) { + break; + } + tokens.push(token); +} +return tokens; +// ... +``` + +## `parserSkipWhiteSpace` function + +```ts +const parserSkipWhiteSpace = ( + p: Parser, +): void => { + while ( + p.cursorPos < p.tokenLength && + p.tokens[p.cursorPos].kind === + TokenKind.WHITE_SPACE + ) { + p.cursorPos++; + } }; ``` -The `Parser` type represents a parser. +The `parserSkipWhiteSpace` function advances the cursor position of a `Parser` object past any white space tokens. + +The function takes a `Parser` object as its argument. +It does not return anything, but it modifies the `cursorPos` property of the `Parser` object. -It has three properties: `tokens`, `tokenLength`, and `cursorPos`. +The function enters a loop where it checks if the cursor position is less than the length of the tokens array and if the current token is a white space token. If both conditions are true, it increments the cursor position. +The loop continues until it encounters a non-white space token or has processed all the tokens. -Parsers should be initialized using the `parserInit` function. +## `parserBuildLoopFirstNode` function + +```ts +const parserBuildLoopFirstNode = ( + p: Parser, +): NodeLoopFirst => { + const node: NodeLoopFirst = { + kind: NodeKind.LOOP_FIRST, + body: [], + condition: [], + }; + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN + ) { + return node; + } + node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, + ); + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY + ) { + return node; + } + node.body = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), + ); + return node; +}; +``` -To generate an AST, the parser uses the `parserGetAllNodes` function which returns a list of `ASTNode`. +The `parserBuildLoopFirstNode` function builds a `NodeLoopFirst` object from a `Parser` object. -### `Parser.tokens` property +The function takes a `Parser` object as its argument. +It returns a `NodeLoopFirst` object. -`Parser.tokens` stores the tokens generated by the lexer. +Here is the breakdown of the function: -This list is not explicitly immutable, but the parser does not change the list in any way. +First, it creates a new `NodeLoopFirst` object `node` with the `kind` property set to `NodeKind.LOOP_FIRST` and the `condition` and `body` properties set to empty arrays. -### `Parser.tokenLength` property +```ts +// ... +const node: NodeLoopFirst = { + kind: NodeKind.LOOP_FIRST, + body: [], + condition: [], +}; +// ... +``` -`Parser.tokenLength` stores the length of the `Parser.tokens` list. +Then, it uses the `parserSkipWhiteSpace` function to skip any white space tokens in the `Parser` object. +If the cursor position is beyond the length of the tokens array or if the current token is not a left parenthesis token, it returns the `node` object as is. -I just added this property to avoid calling `Parser.tokens.length` multiple times. +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN +) { + return node; +} +// ... +``` -### `Parser.cursorPos` property +Otherwise, it uses the `parserCollectTokens` function to collect the tokens between the left parenthesis token and the corresponding right parenthesis token, and assigns the result to the `condition` property of the `node` object. -`Parser.cursorPos` stores the current position of the cursor. +```ts +// ... +node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, +); +// ... +``` -## `parserInit` function +Next, it again uses the `parserSkipWhiteSpace` function to skip any white space tokens. +If the cursor position is beyond the length of the tokens array or if the current token is not a left curly brace token, it returns the `node` object as is. ```ts -export const parserInit: ( - tokens: Token[], -) => Parser; +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY +) { + return node; +} +// ... ``` -The `parserInit` function initializes a parser. +Otherwise, it uses the `parserCollectTokens` function to collect the tokens between the left curly brace token and the corresponding right curly brace token, initializes a new `Parser` object with these tokens using the `parserInit` function, obtains all nodes from this new `Parser` object using the `parserGetAllNodes` function, assigns the result to the `body` property of the `node` object, and returns the `node` object. -It takes a list of tokens as input and returns a parser object. +```ts +// ... +node.body = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), +); +return node; +// ... +``` -## `parserGetAllNodes` function +## `parserBuildLoopLastNode` function ```ts -export const parserGetAllNodes: ( +const parserBuildLoopLastNode = ( p: Parser, -) => ASTNode[]; +): NodeLoopLast => { + const node: NodeLoopLast = { + kind: NodeKind.LOOP_LAST, + body: [], + condition: [], + }; + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY + ) { + return node; + } + node.body = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), + ); + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.KEYWORD || + p.tokens[p.cursorPos].text !== "while" + ) { + return node; + } + p.cursorPos++; + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN + ) { + return node; + } + node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, + ); + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind === + TokenKind.SEMICOLON + ) { + p.cursorPos++; + } + return node; +}; ``` -The `parserGetAllNodes` function generates a list of `ASTNode`. +The `parserBuildLoopLastNode` function builds a `NodeLoopLast` object from a `Parser` object. -Internally, it repeatedly calls the `parserGetNextNodeThenAdvance` function. -It terminates when it reaches an `ASTNodeKind.END` node, in which case it returns the list of collected AST nodes. +The function takes a `Parser` object as its argument. +It returns a `NodeLoopLast` object. -Note that this function does not return the `ASTNodeKind.END` node. +Here is the breakdown of the function: -## `parserGetNextNodeThenAdvance` function +First, it creates a new `NodeLoopLast` object `node` with the `kind` property set to `NodeKind.LOOP_LAST` and the `condition` and `body` properties set to empty arrays. ```ts -const parserGetNextNodeThenAdvance: ( - p: Parser, -) => ASTNode; +// ... +const node: NodeLoopLast = { + kind: NodeKind.LOOP_LAST, + body: [], + condition: [], +}; +// ... +``` + +Then, it uses the `parserSkipWhiteSpace` function to skip any white space tokens in the `Parser` object. +If the cursor position is beyond the length of the tokens array or if the current token is not a left curly brace token, it returns the `node` object as is. + +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY +) { + return node; +} +// ... +``` + +Otherwise, it uses the `parserCollectTokens` function to collect the tokens between the left curly brace token and the corresponding right curly brace token, initializes a new `Parser` object with these tokens using the `parserInit` function, obtains all nodes from this new `Parser` object using the `parserGetAllNodes` function, and assigns the result to the `body` property of the `node` object. + +```ts +// ... +node.body = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), +); +// ... ``` -The `parserGetNextNodeThenAdvance` does the majority of the work in building the AST. +Next, it again uses the `parserSkipWhiteSpace` function to skip any white space tokens. If the cursor position is beyond the length of the tokens array or if the current token is not a ` while` keyword token, it returns the `node` object as is. + +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.KEYWORD || + p.tokens[p.cursorPos].text !== "while" +) { + return node; +} +// ... +``` -If the `Parser.cursorPos` is greater than or equal to `Parser.tokenLength`, it returns an `ASTNodeKind.END` node. +Otherwise, it increments the cursor position to consume the `while` keyword, uses the `parserCollectTokens` function to collect the tokens between the left parenthesis token and the corresponding right parenthesis token, and assigns the result to the `condition` property of the `node` object. -Internally, it calls `parserSafeGetNextTokenThenAdvance` to collect the current token and advance the cursor. +```ts +// ... +p.cursorPos++; +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN +) { + return node; +} +node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, +); +// ... +``` -If the current token is a `TokenKind.KEYWORD` token, it calls the appropriate `parserBuild` function to build the AST node. +Finally, it again uses the `parserSkipWhiteSpace` function to skip any white space tokens. +If the cursor position is beyond the length of the tokens array or if the current token is a semicolon token, it increments the cursor position to consume the semicolon. -If not, it keeps calling `parserGetNextNodeThenAdvance` until it reaches a `TokenKind.SEMICOLON` token and return an `ASTNodeKind.PROCESS` node with collected tokens as the body. +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind === + TokenKind.SEMICOLON +) { + p.cursorPos++; +} +return node; +// ... +``` -## `parserSafeGetNextTokenThenAdvance` function +## `parserBuildIfElseNode` function ```ts -const parserSafeGetNextTokenThenAdvance: ( +const parserBuildIfElseNode = ( p: Parser, -) => Token; +): NodeIfElse => { + const node: NodeIfElse = { + kind: NodeKind.IF_ELSE, + condition: [], + bodyIf: [], + bodyElse: [], + }; + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN + ) { + return node; + } + node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, + ); + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY + ) { + return node; + } + node.bodyIf = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), + ); + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.KEYWORD || + p.tokens[p.cursorPos].text !== "else" + ) { + return node; + } + p.cursorPos++; + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return node; + } + if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY + ) { + return node; + } + node.bodyElse = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), + ); + return node; +}; ``` -The `parserSafeGetNextTokenThenAdvance` function collects the current token and advances the cursor. +The `parserBuildIfElseNode` function builds a `NodeIfElse` object from a `Parser` object. +It takes a `Parser` object as its argument, and returns a `NodeIfElse` object. -If the `Parser.cursorPos` is greater than or equal to `Parser.tokenLength`, it returns a `TokenKind.END` token to prevent index out of bounds error. +Here is the breakdown of the function: -## `parserBuildLoopFirstNode` function +First,it creates a new `NodeIfElse` object `node` with the `kind` property set to `NodeKind.IF_ELSE` and the `condition`, `bodyIf`, and `bodyElse` properties set to empty arrays. ```ts -const parserBuildLoopFirstNode: ( - p: Parser, -) => ASTNodeLoopFirst; +// ... +const node: NodeIfElse = { + kind: NodeKind.IF_ELSE, + condition: [], + bodyIf: [], + bodyElse: [], +}; +// ... ``` -The `parserBuildLoopFirstNode` function builds an `ASTNodeKind.LOOP_FIRST` node. +Then, it uses the `parserSkipWhiteSpace` function to skip any white space tokens in the `Parser` object. +If the cursor position is beyond the length of the tokens array or if the current token is not a left parenthesis token, it returns the `node` object as is. -First, it expects the current token to be a `TokenKind.LEFT_PAREN` token. +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_PAREN +) { + return node; +} +// ... +``` -Then, it collects the "condition" tokens by repeatedly calling `parserSafeGetNextTokenThenAdvance` until it reaches a `TokenKind.RIGHT_PAREN` token. +Otherwise, it uses the `parserCollectTokens` function to collect the tokens between the left parenthesis token and the corresponding right parenthesis token, and assigns the result to the `condition` property of the `node` object. -Then, it expects the current token to be a `TokenKind.LEFT_BRACE` token. +```ts +// ... +node.condition = parserCollectTokens( + p, + TokenKind.LEFT_PAREN, + TokenKind.RIGHT_PAREN, +); +// ... +``` -Then, it collects the "body" nodes by repeatedly calling `parserGetNextNodeThenAdvance` until it reaches a `TokenKind.RIGHT_BRACE` token. +Next, it again uses the `parserSkipWhiteSpace` function to skip any white space tokens. +If the cursor position is beyond the length of the tokens array or if the current token is not a left curly brace token, it returns the `node` object as is. -Once it reaches a `TokenKind.RIGHT_BRACE` token, it calls `parserGetNextNodeThenAdvance` to advance the cursor. -This effectly recursively builds the body of the loop. +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY +) { + return node; +} +// ... +``` -Then, it returns an `ASTNodeKind.LOOP_FIRST` node with collected "condition" tokens and "body" nodes. +Otherwise, it uses the `parserCollectTokens` function to collect the tokens between the left curly brace token and the corresponding right curly brace token, initializes a new `Parser` object with these tokens using the `parserInit` function, obtains all nodes from this new `Parser` object using the `parserGetAllNodes` function, and assigns the result to the `bodyIf` property of the `node` object. -## `parserBuildLoopLastNode` function +```ts +// ... +node.bodyIf = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), +); +// ... +``` + +Then, it again uses the `parserSkipWhiteSpace` function to skip any white space tokens. +If the cursor position is beyond the length of the tokens array or if the current token is not an `else` keyword token, it returns the `node` object as is. ```ts -const parserBuildLoopLastNode: ( +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.KEYWORD || + p.tokens[p.cursorPos].text !== "else" +) { + return node; +} +// ... +``` + +Otherwise, it increments the cursor position to consume the `else` keyword, uses the `parserCollectTokens` function to collect the tokens between the left curly brace token and the corresponding right curly brace token, initializes a new `Parser` object with these tokens using the `parserInit` function, obtains all nodes from this new `Parser` object using the `parserGetAllNodes` function, and assigns the result to the `bodyElse` property of the `node` object. + +```ts +// ... +p.cursorPos++; +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return node; +} +if ( + p.tokens[p.cursorPos].kind !== + TokenKind.LEFT_CURLY +) { + return node; +} +node.bodyElse = parserGetAllNodes( + parserInit( + parserCollectTokens( + p, + TokenKind.LEFT_CURLY, + TokenKind.RIGHT_CURLY, + ), + ), +); +// ... +``` + +Finally, it returns the `node` object. + +```ts +// ... +return node; +// ... +``` + +By using the `parserBuildIfElseNode` function, I can build a `NodeIfElse` object from a `Parser` object, which represents an if-else node in an AST. This is particularly useful for parsing if-else constructs in the code. + +## `parserGetNextNodeThenAdvance` function + +```ts +const parserGetNextNodeThenAdvance = ( p: Parser, -) => ASTNodeLoopLast; +): Node => { + parserSkipWhiteSpace(p); + if (p.cursorPos >= p.tokenLength) { + return { + kind: NodeKind.END, + }; + } + let token = p.tokens[p.cursorPos]; + p.cursorPos++; + if (token.kind === TokenKind.KEYWORD) { + switch (token.text) { + case "for": + case "while": + return parserBuildLoopFirstNode(p); + case "do": + return parserBuildLoopLastNode(p); + case "if": + return parserBuildIfElseNode(p); + default: + break; + } + } + const node: NodeProcess = { + kind: NodeKind.PROCESS, + body: [], + }; + if (token.kind === TokenKind.SEMICOLON) { + return node; + } + node.body.push(token); + while (p.cursorPos < p.tokenLength) { + token = p.tokens[p.cursorPos]; + p.cursorPos++; + if ( + token.kind === TokenKind.END || + token.kind === TokenKind.SEMICOLON + ) { + break; + } + node.body.push(token); + } + return node; +}; ``` -The `parserBuildLoopLastNode` function builds an `ASTNodeKind.LOOP_LAST` node. +The `parserGetNextNodeThenAdvance` function parses the next node from a `Parser` object and advances the cursor position. +It function takes a `Parser` object as its argument, and returns a `Node` object. -First, it expects the current token to be a `TokenKind.LEFT_BRACE` token. +Here is the breakdown of the function: -Then, it collects the "body" nodes by repeatedly calling `parserGetNextNodeThenAdvance` until it reaches a `TokenKind.RIGHT_BRACE` token. +First, it uses the `parserSkipWhiteSpace` function to skip any white space tokens in the `Parser` object. +If the cursor position is beyond the length of the tokens array, it returns a `NodeEnd` object. -Once it reaches a `TokenKind.RIGHT_BRACE` token, it calls `parserGetNextNodeThenAdvance` to advance the cursor. +```ts +// ... +parserSkipWhiteSpace(p); +if (p.cursorPos >= p.tokenLength) { + return { + kind: NodeKind.END, + }; +} +// ... +``` -Then, it expects the current token to be a `TokenKind.KEYWORD` token. +Then, it obtains the current token and increments the cursor position. +If the current token is a keyword token, it checks the text of the token and calls the corresponding function to build a loop or if-else node, and returns the result. -Then, it collects the "condition" tokens by repeatedly calling `parserSafeGetNextTokenThenAdvance` until it reaches a `TokenKind.SEMICOLON` token. +```ts +// ... +let token = p.tokens[p.cursorPos]; +p.cursorPos++; +if (token.kind === TokenKind.KEYWORD) { + switch (token.text) { + case "for": + case "while": + return parserBuildLoopFirstNode(p); + case "do": + return parserBuildLoopLastNode(p); + case "if": + return parserBuildIfElseNode(p); + default: + break; + } +} +// ... +``` -Then, it returns an `ASTNodeKind.LOOP_LAST` node with collected "condition" tokens and "body" nodes. +Otherwise, it creates a new `NodeProcess` object `node` with the `kind` property set to `NodeKind.PROCESS` and the `body` property set to an empty array. +If the current token is a semicolon token, it returns the `node` object as is. -## `parserBuildIfElseNode` function +```ts +// ... +const node: NodeProcess = { + kind: NodeKind.PROCESS, + body: [], +}; +if (token.kind === TokenKind.SEMICOLON) { + return node; +} +// ... +``` + +Otherwise, it adds the current token to the `body` property of the `node` object. + +```ts +// ... +node.body.push(token); +// ... +``` + +Then, it enters a loop where it obtains the current token, increments the cursor position, and checks if the current token is an end or semicolon token. +If it is, it breaks the loop. Otherwise, it adds the current token to the `body` property of the `node` object. + +```ts +// ... +while (p.cursorPos < p.tokenLength) { + token = p.tokens[p.cursorPos]; + p.cursorPos++; + if ( + token.kind === TokenKind.END || + token.kind === TokenKind.SEMICOLON + ) { + break; + } + node.body.push(token); +} +// ... +``` + +Finally, it returns the `node` object. ```ts -const parserBuildIfElseNode: ( +// ... +return node; +// ... +``` + +## `parserGetAllNodes` function + +```ts +export const parserGetAllNodes = ( p: Parser, -) => ASTNodeIfElse; +): Node[] => { + const nodes: Node[] = []; + let node: Node; + while ( + (node = parserGetNextNodeThenAdvance(p)) + .kind !== NodeKind.END + ) { + nodes.push(node); + } + return nodes; +}; ``` -The `parserBuildIfElseNode` function builds an `ASTNodeKind.IF_ELSE` node. +The `parserGetAllNodes` parses all nodes from a `Parser` object. +It takes a `Parser` object as its argument and returns an array of `Node` objects. -First, it expects the current token to be a `TokenKind.LEFT_PAREN` token. +Here is the breakdown of the function: -Then, it collects the "condition" tokens by repeatedly calling `parserSafeGetNextTokenThenAdvance` until it reaches a `TokenKind.RIGHT_PAREN` token. +First, it creates an empty array `nodes` to store the nodes. -Then, it expects the current token to be a `TokenKind.LEFT_BRACE` token. +```ts +// ... +const nodes: Node[] = []; +// ... +``` -Then, it collects the "bodyIf" nodes by repeatedly calling `parserGetNextNodeThenAdvance` until it reaches a `TokenKind.RIGHT_BRACE` token. +Then, it enters a loop where it uses the `parserGetNextNodeThenAdvance` function to get the next node from the `Parser` object and advance the cursor position, and checks if the `kind` property of the node is `NodeKind.END`. +If it is, it breaks the loop. Otherwise, it adds the node to the `nodes` array. -Once it reaches a `TokenKind.RIGHT_BRACE` token, it calls `parserGetNextNodeThenAdvance` to advance the cursor. +```ts +// ... +let node: Node; +while ( + (node = parserGetNextNodeThenAdvance(p)) + .kind !== NodeKind.END +) { + nodes.push(node); +} +// ... +``` -By this point, if the current token is a `TokenKind.KEYWORD` token, with text "else", it collects the "bodyElse" nodes by repeatedly calling `parserGetNextNodeThenAdvance` until it reaches a `TokenKind.RIGHT_BRACE` token. +Finally, it returns the `nodes` array. -Then, it returns an `ASTNodeKind.IF_ELSE` node with collected "condition" tokens, "bodyIf" nodes, and "bodyElse" nodes. +```ts +// ... +return nodes; +// ... +```