monna/lexer/lexer.go

160 lines
4.2 KiB
Go
Raw Normal View History

package lexer
import "monkey/token"
type Lexer struct {
input string
position int // current position in input (the current_char)
read_position int // current reading position in input (after current_char)
current_char byte
}
func New(input string) *Lexer {
l := &Lexer{input: input}
l.read_char()
return l
}
func (l_lexer *Lexer) NextToken() token.Token {
var tok token.Token
l_lexer.skip_whitespace()
switch l_lexer.current_char {
case '=':
if l_lexer.peek_char() == '=' {
ch := l_lexer.current_char
l_lexer.read_char()
literal := string(ch) + string(l_lexer.current_char)
tok = token.Token{Type: token.EQ, Literal: literal}
} else {
tok = new_token(token.ASSIGN, l_lexer.current_char)
}
case '!':
if l_lexer.peek_char() == '=' {
ch := l_lexer.current_char
l_lexer.read_char()
literal := string(ch) + string(l_lexer.current_char)
tok = token.Token{Type: token.NOT_EQ, Literal: literal}
} else {
tok = new_token(token.BANG, l_lexer.current_char)
}
case ';':
tok = new_token(token.SEMICOLON, l_lexer.current_char)
case '(':
tok = new_token(token.LPAREN, l_lexer.current_char)
case ')':
tok = new_token(token.RPAREN, l_lexer.current_char)
case '{':
tok = new_token(token.LBRACE, l_lexer.current_char)
case '}':
tok = new_token(token.RBRACE, l_lexer.current_char)
case ',':
tok = new_token(token.COMMA, l_lexer.current_char)
case '+':
tok = new_token(token.PLUS, l_lexer.current_char)
case '-':
tok = new_token(token.MINUS, l_lexer.current_char)
case '/':
tok = new_token(token.SLASH, l_lexer.current_char)
case '*':
tok = new_token(token.ASTERISK, l_lexer.current_char)
case '<':
tok = new_token(token.LT, l_lexer.current_char)
case '>':
tok = new_token(token.GT, l_lexer.current_char)
case '"':
tok.Type = token.STRING
tok.Literal = l_lexer.read_string()
case '[':
tok = new_token(token.LBRACKET, l_lexer.current_char)
case ']':
tok = new_token(token.RBRACKET, l_lexer.current_char)
case 0:
tok.Literal = ""
tok.Type = token.EOF
default:
if is_letter(l_lexer.current_char) {
tok.Literal = l_lexer.read_identifier()
tok.Type = token.LookupIdentifier(tok.Literal)
return tok
} else if is_digit(l_lexer.current_char) {
tok.Type = token.INT
tok.Literal = l_lexer.read_number()
return tok
} else {
tok = new_token(token.ILLEGAL, l_lexer.current_char)
}
}
l_lexer.read_char()
return tok
}
func new_token(TokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: TokenType, Literal: string(ch)}
}
func (l_lexer *Lexer) read_char() {
if l_lexer.read_position >= len(l_lexer.input) {
l_lexer.current_char = 0
} else {
l_lexer.current_char = l_lexer.input[l_lexer.read_position]
}
l_lexer.position = l_lexer.read_position
l_lexer.read_position += 1
}
func (l_lexer *Lexer) peek_char() byte {
if l_lexer.read_position >= len(l_lexer.input) {
return 0
} else {
return l_lexer.input[l_lexer.read_position]
}
}
func (l_lexer *Lexer) read_identifier() string {
position := l_lexer.position
for is_letter(l_lexer.current_char) {
l_lexer.read_char()
}
return l_lexer.input[position:l_lexer.position]
}
func is_letter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
func (l_lexer *Lexer) skip_whitespace() {
for l_lexer.current_char == ' ' || l_lexer.current_char == '\t' || l_lexer.current_char == '\n' || l_lexer.current_char == '\r' {
l_lexer.read_char()
}
}
func (l_lexer *Lexer) read_number() string {
position := l_lexer.position
for is_digit(l_lexer.current_char) {
l_lexer.read_char()
}
return l_lexer.input[position:l_lexer.position]
}
func is_digit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
/*
Read the current character until it encounters a closing '"' or end of input.
TODO: some additional thing that can be done at the lexer level with strings is to report an error when it
reaches the end of input without proper termination. Support for character escaping would be really neat.
*/
func (l_lexer *Lexer) read_string() string {
position := l_lexer.position + 1
for {
l_lexer.read_char()
if l_lexer.current_char == '"' || l_lexer.current_char == 0 {
break
}
}
return l_lexer.input[position:l_lexer.position]
}