From 3e8dd1e956ac406da88bfd400eda36c252515a59 Mon Sep 17 00:00:00 2001 From: Akos Date: Sun, 6 Oct 2024 10:06:39 +0200 Subject: [PATCH] Extend tokenizer with identifiers/keywords and digits --- lexer/lexer.go | 25 +++++++++++++++++++++++++ lexer/lexer_test.go | 14 ++++++-------- token/token.go | 12 ++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 157a406..1342ac9 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -5,6 +5,8 @@ import "moodeng/token" func (l *Lexer) NextToken() token.Token { var tok token.Token + l.skipWhitespace() + switch l.ch { case '=': tok = newToken(token.ASSIGN, l.ch) @@ -28,6 +30,11 @@ func (l *Lexer) NextToken() token.Token { default: if isLetter(l.ch) { tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() return tok } else { tok = newToken(token.ILLEGAL, l.ch) @@ -76,3 +83,21 @@ func (l *Lexer) readIdentifier() string { func isLetter(ch byte) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' } + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\n' { + l.readChar() + } +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 88e02da..209ce1e 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -7,16 +7,14 @@ import ( ) func TestNextToken(t *testing.T) { - input := ` - let five = 5; - let ten = 10; + input := `let five = 5; +let ten = 10; - let add = fn(x, y) { - x + y; - }; +let add = fn(x, y) { +x + y; +}; - let result = add(five, ten); - ` +let result = add(five, ten);` tests := []struct { expectedType token.TokenType diff --git a/token/token.go b/token/token.go index 6f415bf..218eee2 100644 --- a/token/token.go +++ b/token/token.go @@ -7,6 +7,18 @@ type Token struct { Literal string } +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +} + const ( ILLEGAL = "ILLEGAL" EOF = "EOF"