commit 44d9b2c7ec9f433841fd7788073a8344f0f8ec0b Author: tijani Date: Thu May 12 20:51:16 2022 +0000 Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e3ed429 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module monkey + +go 1.18 diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..7edab73 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,102 @@ +package lexer + +import "monkey/token" + +type Lexer struct { + input string + position int // current position in input (the current_char) + read_position int // current reading position in input (after current_char) + current_char byte +} + +func new(input string) *Lexer { + l := &Lexer{input: input} + l.read_char() + return l +} + +func new_token(TokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: TokenType, Literal: string(ch)} +} + +func (lexer *Lexer) read_char() { + if lexer.read_position >= len(lexer.input) { + lexer.current_char = 0 + } else { + lexer.current_char = lexer.input[lexer.read_position] + } + lexer.position = lexer.read_position + lexer.read_position += 1 +} + +func (lexer *Lexer) read_identifier() string { + position := lexer.position + for is_letter(lexer.current_char) { + lexer.read_char() + } + return lexer.input[position:lexer.position] +} + +func is_letter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func (lexer *Lexer) skip_whitespace() { + for lexer.current_char == ' ' || lexer.current_char == '\t' || lexer.current_char == '\n' || lexer.current_char == '\r' { + lexer.read_char() + } +} + +func (lexer *Lexer) read_number() string { + position := lexer.position + for is_digit(lexer.current_char){ + lexer.read_char() + } + return lexer.input[position: lexer.position] +} + +func is_digit(ch byte) bool{ + return '0' <= ch && ch <= '9' +} + +func (lexer *Lexer) next_token() token.Token { + var tok token.Token + lexer.skip_whitespace() + + switch lexer.current_char { + case '=': + tok = new_token(token.ASSIGN, lexer.current_char) + case ';': + tok = new_token(token.SEMICOLON, lexer.current_char) + case '(': + tok = new_token(token.LPAREN, lexer.current_char) + case ')': + tok = new_token(token.RPAREN, lexer.current_char) + case '{': + tok = new_token(token.LBRACE, lexer.current_char) + case '}': + tok = new_token(token.RBRACE, lexer.current_char) + case ',': + tok = new_token(token.COMMA, lexer.current_char) + case '+': + tok = new_token(token.PLUS, lexer.current_char) + case 0: + tok.Literal = "" + tok.Type = token.EOF + + default: + if is_letter(lexer.current_char) { + tok.Literal = lexer.read_identifier() + tok.Type = token.LookupIdentifier(tok.Literal) + return tok + } else if is_digit(lexer.current_char) { + tok.Type = token.INT + tok.Literal = lexer.read_number() + return tok + } else { + tok = new_token(token.ILLEGAL, lexer.current_char) + } + } + lexer.read_char() + return tok +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..910c888 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,73 @@ +package lexer + +import ( + "testing" + + "monkey/token" +) + +func TestNextToken(t *testing.T) { + input := `let five = 5; + let ten = 10; + let add = fn(x, y){ + x + y; + }; + let result = add(five, ten); + ` + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.LET, "let"}, + {token.IDENT, "five"}, + {token.ASSIGN, "="}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, + {token.LPAREN, "("}, + {token.IDENT, "x"}, + {token.COMMA, ","}, + {token.IDENT, "y"}, + {token.RPAREN, ")"}, + {token.LBRACE, "{"}, + {token.IDENT, "x"}, + {token.PLUS, "+"}, + {token.IDENT, "y"}, + {token.SEMICOLON, ";"}, + {token.RBRACE, "}"}, + {token.SEMICOLON, ";"}, + + {token.LET, "let"}, + {token.IDENT, "result"}, + {token.ASSIGN, "="}, + {token.IDENT, "add"}, + {token.LPAREN, "("}, + {token.IDENT, "five"}, + {token.COMMA, ","}, + {token.IDENT, "ten"}, + {token.RPAREN, ")"}, + {token.SEMICOLON, ";"}, + {token.EOF, ""}, + } + + l := new(input) + for i, tt := range tests { + tok := l.next_token() + if tok.Type != tt.expectedType { + t.Fatalf("test[%d] - tokentype wrong. expected=%q, got=%q", i, tt.expectedType, tok.Type) + } + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal) + } + } +} diff --git a/token/tokens.go b/token/tokens.go new file mode 100644 index 0000000..e9c9921 --- /dev/null +++ b/token/tokens.go @@ -0,0 +1,45 @@ +package token + +type TokenType string + +type Token struct { + Type TokenType + Literal string +} + +const ( + ILLEGAL = "ILLEGAL" + EOF = "EOF" + + // Identifiers + Literals + IDENT = "IDENT" + INT = "INT" + + // Operators + ASSIGN = "=" + PLUS = "+" + + // Delimiters + COMMA = "," + SEMICOLON = ";" + LPAREN = "(" + RPAREN = ")" + LBRACE = "{" + RBRACE = "}" + + // Keywords + FUNCTION = "FUNCTION" + LET = "LET" +) + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +func LookupIdentifier(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}