From 802b286d46efc751994da86fbd46e1473306665a Mon Sep 17 00:00:00 2001 From: Mauricio Massa Date: Sat, 23 Nov 2024 12:36:28 -0500 Subject: [PATCH] add symbol table & line tracking (#12) --- ast/ast.go | 12 ++++++ lexer/lexer.go | 96 +++++++++++++++++++++++------------------- parser/parser.go | 63 +++++++++++++++++++++++---- parser/symbol_table.go | 48 +++++++++++++++++++++ run_tests.sh | 1 + 5 files changed, 168 insertions(+), 52 deletions(-) create mode 100644 parser/symbol_table.go diff --git a/ast/ast.go b/ast/ast.go index 72bf608..a6cbdaf 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -740,3 +740,15 @@ func (fs *ForEachStatement) String() string { return out.String() } + +type NodeStatement struct { + Token token.Token + IPAddress string + Port string +} + +func (ns *NodeStatement) expressionNode() {} +func (ns *NodeStatement) TokenLiteral() string { return ns.Token.Literal } +func (ns *NodeStatement) String() string { + return fmt.Sprintf("node %s %s", ns.IPAddress, ns.Port) +} diff --git a/lexer/lexer.go b/lexer/lexer.go index e91d6cc..0de34c0 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -95,8 +95,8 @@ func (l *Lexer) readChar() { } } -func newToken(tokenType token.TokenType, ch byte) token.Token { - return token.Token{Type: tokenType, Literal: string(ch)} +func newToken(tokenType token.TokenType, ch byte, line int) token.Token { + return token.Token{Type: tokenType, Literal: string(ch), Line: line} } func (l *Lexer) NextToken() token.Token { @@ -113,43 +113,44 @@ func (l *Lexer) NextToken() token.Token { switch l.ch { case '\n': l.line++ + return l.NextToken() case '=': if l.peekChar() == '=' { ch := l.ch l.readChar() literal := string(ch) + string(l.ch) - tok = token.Token{Type: token.EQ, Literal: literal} + tok = token.Token{Type: token.EQ, Literal: literal, Line: l.line} if config.DebugMode { fmt.Printf("DEBUG: Lexer produced EQ token in case '=': %v\n", tok) } } else { - tok = newToken(token.ASSIGN, l.ch) + tok = newToken(token.ASSIGN, l.ch, l.line) } case '{': - tok = newToken(token.LBRACE, l.ch) + tok = newToken(token.LBRACE, l.ch, l.line) l.braceDepth++ if config.DebugMode { fmt.Printf("DEBUG: Lexer identified opening brace '{', depth now %d\n", l.braceDepth) } case '}': - tok = newToken(token.RBRACE, l.ch) + tok = newToken(token.RBRACE, l.ch, l.line) l.braceDepth-- if config.DebugMode { fmt.Printf("DEBUG: Lexer identified closing brace '}', depth now %d\n", l.braceDepth) } case '(': - tok = newToken(token.LPAREN, l.ch) + tok = newToken(token.LPAREN, l.ch, l.line) case ')': - tok = newToken(token.RPAREN, l.ch) + tok = newToken(token.RPAREN, l.ch, l.line) case '[': - tok = newToken(token.LBRACKET, l.ch) + tok = newToken(token.LBRACKET, l.ch, l.line) case ']': - tok = newToken(token.RBRACKET, l.ch) + tok = newToken(token.RBRACKET, l.ch, l.line) case ',': - tok = newToken(token.COMMA, l.ch) + tok = newToken(token.COMMA, l.ch, l.line) case '%': - tok = newToken(token.PERCENT, l.ch) + tok = newToken(token.PERCENT, l.ch, l.line) case '$': tok.Type = token.IDENT tok.Literal = l.readVariable() @@ -158,85 +159,85 @@ func (l *Lexer) NextToken() token.Token { tok.Type = token.STRING tok.Literal = l.readString() case '+': - tok = newToken(token.PLUS, l.ch) + tok = newToken(token.PLUS, l.ch, l.line) case ';': - tok = newToken(token.SEMICOLON, l.ch) + tok = newToken(token.SEMICOLON, l.ch, l.line) case '<': - tok = newToken(token.LT, l.ch) + tok = newToken(token.LT, l.ch, l.line) case '>': - tok = newToken(token.GT, l.ch) + tok = newToken(token.GT, l.ch, l.line) case '*': - tok = newToken(token.ASTERISK, l.ch) + tok = newToken(token.ASTERISK, l.ch, l.line) case '/': - tok = newToken(token.SLASH, l.ch) + tok = newToken(token.SLASH, l.ch, l.line) case '-': if l.isPartOfHeaderName() { return l.readHeaderName() } - tok = newToken(token.MINUS, l.ch) + tok = newToken(token.MINUS, l.ch, l.line) case '&': if l.peekChar() == '&' { ch := l.ch l.readChar() literal := string(ch) + string(l.ch) - tok = token.Token{Type: token.AND, Literal: literal} + tok = token.Token{Type: token.AND, Literal: literal, Line: l.line} } else { - tok = newToken(token.AND, l.ch) + tok = newToken(token.AND, l.ch, l.line) } case '|': if l.peekChar() == '|' { ch := l.ch l.readChar() literal := string(ch) + string(l.ch) - tok = token.Token{Type: token.OR, Literal: literal} + tok = token.Token{Type: token.OR, Literal: literal, Line: l.line} } else { - tok = newToken(token.ILLEGAL, l.ch) + tok = newToken(token.ILLEGAL, l.ch, l.line) } case '!': if l.peekChar() == '=' { ch := l.ch l.readChar() literal := string(ch) + string(l.ch) - tok = token.Token{Type: token.NOT_EQ, Literal: literal} + tok = token.Token{Type: token.NOT_EQ, Literal: literal, Line: l.line} } else { - tok = newToken(token.BANG, l.ch) + tok = newToken(token.BANG, l.ch, l.line) } case ':': if l.peekChar() == ':' { ch := l.ch l.readChar() literal := string(ch) + string(l.ch) - tok = token.Token{Type: token.DOUBLE_COLON, Literal: literal} + tok = token.Token{Type: token.DOUBLE_COLON, Literal: literal, Line: l.line} } else { - tok = newToken(token.COLON, l.ch) + tok = newToken(token.COLON, l.ch, l.line) } case 'H': peekedWord := l.peekWord() if tokenType, isHTTPKeyword := HttpKeywords[peekedWord]; isHTTPKeyword { - l.readIdentifier() // consume the word - return token.Token{Type: tokenType, Literal: peekedWord} + identifier, line := l.readIdentifier() + return token.Token{Type: tokenType, Literal: identifier, Line: line} } - identifier := l.readIdentifier() - return token.Token{Type: token.IDENT, Literal: identifier} + identifier, line := l.readIdentifier() + return token.Token{Type: token.IDENT, Literal: identifier, Line: line} case 'L': peekedWord := l.peekWord() if tokenType, isLBKeyword := LbKeywords[peekedWord]; isLBKeyword { - l.readIdentifier() // consume the word - return token.Token{Type: tokenType, Literal: peekedWord} + l.readIdentifier() + return token.Token{Type: tokenType, Literal: peekedWord, Line: l.line} } - identifier := l.readIdentifier() - return token.Token{Type: token.IDENT, Literal: identifier} + identifier, line := l.readIdentifier() + return token.Token{Type: token.IDENT, Literal: identifier, Line: line} case 'S': peekedWord := l.peekWord() if tokenType, isSSLKeyword := SSLKeywords[peekedWord]; isSSLKeyword { - l.readIdentifier() // consume the word - return token.Token{Type: tokenType, Literal: peekedWord} + identifier, line := l.readIdentifier() + return token.Token{Type: tokenType, Literal: identifier, Line: line} } - identifier := l.readIdentifier() - return token.Token{Type: token.IDENT, Literal: identifier} + identifier, line := l.readIdentifier() + return token.Token{Type: token.IDENT, Literal: identifier, Line: line} case 0: if l.braceDepth > 0 { l.reportError(fmt.Sprintf("Unexpected EOF: unclosed brace, depth: %d", l.braceDepth)) @@ -254,7 +255,7 @@ func (l *Lexer) NextToken() token.Token { // Check for identifier if IsLetter(l.ch) { - tok.Literal = l.readIdentifier() + tok.Literal, tok.Line = l.readIdentifier() switch tok.Literal { case "IP::client_addr": tok.Type = token.IP_CLIENT_ADDR @@ -279,7 +280,7 @@ func (l *Lexer) NextToken() token.Token { // Everything else is an illegal token l.reportError("NextToken: Illegal token found = '%c'\n", l.ch) fmt.Printf("NextToken: Illegal token found = '%c'\n", l.ch) - tok = newToken(token.ILLEGAL, l.ch) + tok = newToken(token.ILLEGAL, l.ch, l.line) } l.readChar() @@ -290,12 +291,16 @@ func (l *Lexer) NextToken() token.Token { return tok } -func (l *Lexer) readIdentifier() string { +func (l *Lexer) readIdentifier() (string, int) { position := l.position + startLine := l.line for IsLetter(l.ch) || IsDigit(l.ch) || l.ch == '_' || l.ch == ':' || l.ch == '.' || l.ch == '-' { + if l.ch == '\n' { + l.line++ + } l.readChar() } - return l.input[position:l.position] + return l.input[position:l.position], startLine } func IsLetter(ch byte) bool { @@ -417,6 +422,7 @@ func (l *Lexer) readNumberOrIpAddress() token.Token { return token.Token{ Type: token.NUMBER, Literal: l.input[startPosition:l.position], + Line: l.line, } } @@ -436,6 +442,7 @@ func (l *Lexer) readIpAddress(startPosition int) token.Token { return token.Token{ Type: token.IP_ADDRESS, Literal: l.input[startPosition:l.position], + Line: l.line, } } @@ -443,6 +450,7 @@ func (l *Lexer) readIpAddress(startPosition int) token.Token { return token.Token{ Type: token.NUMBER, Literal: l.input[startPosition:l.position], + Line: l.line, } } @@ -456,7 +464,7 @@ func (l *Lexer) readHeaderName() token.Token { for l.position < len(l.input) && (IsLetter(l.ch) || IsDigit(l.ch) || l.ch == '-') { l.readChar() } - return token.Token{Type: token.IDENT, Literal: l.input[position:l.position]} + return token.Token{Type: token.IDENT, Literal: l.input[position:l.position], Line: l.line} } func (l *Lexer) reportError(format string, args ...interface{}) { diff --git a/parser/parser.go b/parser/parser.go index cf39c5b..ad0c3fa 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -79,6 +79,8 @@ type Parser struct { braceCount int declaredVariables map[string]bool + symbolTable *SymbolTable + currentLine int } func New(l *lexer.Lexer) *Parser { @@ -86,13 +88,15 @@ func New(l *lexer.Lexer) *Parser { l: l, errors: []string{}, declaredVariables: make(map[string]bool), + symbolTable: NewSymbolTable(), } + // read two tokens so curToken and peekToken are both set p.nextToken() p.nextToken() // Initialize prevToken to an "empty" token or a special "start of file" token - p.prevToken = token.Token{Type: token.ILLEGAL, Literal: ""} + p.prevToken = token.Token{Type: token.ILLEGAL, Literal: "", Line: p.curToken.Line} // Check for lexer errors if lexerErrors := l.Errors(); len(lexerErrors) > 0 { @@ -202,6 +206,7 @@ func (p *Parser) nextToken() { p.prevToken = p.curToken p.curToken = p.peekToken p.peekToken = p.l.NextToken() + p.currentLine = p.curToken.Line if p.curToken.Type == token.LBRACE { p.braceCount++ @@ -380,12 +385,19 @@ func (p *Parser) parseSetStatement() *ast.SetStatement { func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement { if config.DebugMode { - fmt.Printf("DEBUG: parseExpressionStatement Start, current token: %s\n", p.curToken.Type) + fmt.Printf("DEBUG: parseExpressionStatement Start, current token: %s, Line: %d\n", p.curToken.Type, p.currentLine) } stmt := &ast.ExpressionStatement{Token: p.curToken} - if p.curTokenIs(token.IDENT) && p.curToken.Literal == "pool" { - stmt.Expression = p.parsePoolStatement() + if p.curTokenIs(token.IDENT) { + switch p.curToken.Literal { + case "pool": + stmt.Expression = p.parsePoolStatement() + case "node": + stmt.Expression = p.parseNodeStatement() + default: + stmt.Expression = p.parseExpression(LOWEST) + } } else { stmt.Expression = p.parseExpression(LOWEST) } @@ -459,7 +471,7 @@ func (p *Parser) parseExpression(precedence int) ast.Expression { } if identifier != p.curToken.Literal { leftExp = &ast.Identifier{ - Token: token.Token{Type: token.IDENT, Literal: identifier}, + Token: token.Token{Type: token.IDENT, Literal: identifier, Line: p.curToken.Line}, Value: identifier, } } @@ -682,6 +694,9 @@ func (p *Parser) parseBlockStatement() *ast.BlockStatement { block := &ast.BlockStatement{Token: p.curToken} block.Statements = []ast.Statement{} + p.symbolTable.EnterScope() + defer p.symbolTable.ExitScope() + p.braceCount++ p.nextToken() // consume opening brace @@ -1618,9 +1633,11 @@ func (p *Parser) parseMapArgument() ast.Expression { func (p *Parser) parsePoolStatement() ast.Expression { if config.DebugMode { - fmt.Printf("DEBUG: parsePoolStatement Start\n") + fmt.Printf("DEBUG: parsePoolStatement Start - Current token: %s, Line: %d\n", p.curToken.Type, p.currentLine) } + p.symbolTable.Declare(p, POOL) + poolStmt := &ast.CallExpression{ Token: p.curToken, Function: &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}, @@ -1732,7 +1749,7 @@ func (p *Parser) parseStringLiteralContents(s *ast.StringLiteral) ast.Expression end := strings.Index(value, "}") if end != -1 { varName := value[2:end] - parts = append(parts, &ast.Identifier{Token: token.Token{Type: token.IDENT, Literal: varName}, Value: varName}) + parts = append(parts, &ast.Identifier{Token: token.Token{Type: token.IDENT, Literal: varName, Line: p.curToken.Line}, Value: varName}) value = value[end+1:] } else { // Unclosed variable, treat as literal @@ -2150,6 +2167,36 @@ func (p *Parser) isValidCustomIdentifier(s string) bool { func (p *Parser) reportError(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) - formattedMsg := " " + msg + formattedMsg := fmt.Sprintf(" %s Line: %d", msg, p.curToken.Line) p.errors = append(p.errors, formattedMsg) } + +func (p *Parser) parseNodeStatement() ast.Expression { + if config.DebugMode { + fmt.Printf("DEBUG: parseNodeStatement Start - Current token: %s, Line: %d\n", p.curToken.Type, p.curToken.Line) + } + + p.symbolTable.Declare(p, NODE) + + nodeStmt := &ast.NodeStatement{ + Token: p.curToken, + } + + // Expect the next token to be an IP address + if !p.expectPeek(token.IP_ADDRESS) { + return nil + } + nodeStmt.IPAddress = p.curToken.Literal + + // Expect the next token to be a port number + if !p.expectPeek(token.NUMBER) { + return nil + } + nodeStmt.Port = p.curToken.Literal + + if config.DebugMode { + fmt.Printf("DEBUG: parseNodeStatement End\n") + } + + return nodeStmt +} diff --git a/parser/symbol_table.go b/parser/symbol_table.go new file mode 100644 index 0000000..9a98b55 --- /dev/null +++ b/parser/symbol_table.go @@ -0,0 +1,48 @@ +package parser + +type SymbolType int + +const ( + NODE SymbolType = iota + POOL +) + +type SymbolTable struct { + scopes []map[SymbolType]SymbolInfo +} + +type SymbolInfo struct { + declared bool + line int +} + +func NewSymbolTable() *SymbolTable { + return &SymbolTable{ + scopes: []map[SymbolType]SymbolInfo{make(map[SymbolType]SymbolInfo)}, + } +} + +func (st *SymbolTable) EnterScope() { + st.scopes = append(st.scopes, make(map[SymbolType]SymbolInfo)) +} + +func (st *SymbolTable) ExitScope() { + if len(st.scopes) > 1 { + st.scopes = st.scopes[:len(st.scopes)-1] + } +} + +func (st *SymbolTable) Declare(p *Parser, symType SymbolType) { + currentScope := st.scopes[len(st.scopes)-1] + + if symType == NODE && currentScope[POOL].declared { + p.reportError("Invalid combination: 'node' and 'pool' in the same block.") + return + } + if symType == POOL && currentScope[NODE].declared { + p.reportError("Invalid combination: 'pool' and 'node' in the same block.") + return + } + + currentScope[symType] = SymbolInfo{declared: true} +} diff --git a/run_tests.sh b/run_tests.sh index 47df22c..2d30aa3 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -19,6 +19,7 @@ exclude_files=( "class_match.irule" "complex3.irule" "cookie.irule" + "routes01.irule" ) # Initialize counters and arrays to store results