monna/lexer/lexer.go

package lexer

import "monna/token"

type Lexer struct {
	input         string
	position      int // current position in input (the current_char)
	read_position int // current reading position in input (after current_char)
	current_char  byte
}

func New(input string) *Lexer {
	l := &Lexer{input: input}
	l.read_char()
	return l
}

func (l_lexer *Lexer) NextToken() token.Token {
	var tok token.Token
	l_lexer.skip_whitespace()

	switch l_lexer.current_char {
	case '=':
		if l_lexer.peek_char() == '=' {
			ch := l_lexer.current_char
			l_lexer.read_char()
			literal := string(ch) + string(l_lexer.current_char)
			tok = token.Token{Type: token.EQ, Literal: literal}
		} else {
			tok = new_token(token.ASSIGN, l_lexer.current_char)
		}
	case '!':
		if l_lexer.peek_char() == '=' {
			ch := l_lexer.current_char
			l_lexer.read_char()
			literal := string(ch) + string(l_lexer.current_char)
			tok = token.Token{Type: token.NOT_EQ, Literal: literal}
		} else {
			tok = new_token(token.BANG, l_lexer.current_char)
		}
	case ';':
		tok = new_token(token.SEMICOLON, l_lexer.current_char)
	case '(':
		tok = new_token(token.LPAREN, l_lexer.current_char)
	case ')':
		tok = new_token(token.RPAREN, l_lexer.current_char)
	case '{':
		tok = new_token(token.LBRACE, l_lexer.current_char)
	case '}':
		tok = new_token(token.RBRACE, l_lexer.current_char)
	case ',':
		tok = new_token(token.COMMA, l_lexer.current_char)
	case '+':
		tok = new_token(token.PLUS, l_lexer.current_char)
	case '-':
		tok = new_token(token.MINUS, l_lexer.current_char)
	case '/':
		tok = new_token(token.SLASH, l_lexer.current_char)
	case '*':
		tok = new_token(token.ASTERISK, l_lexer.current_char)
	case '<':
		tok = new_token(token.LT, l_lexer.current_char)
	case '>':
		tok = new_token(token.GT, l_lexer.current_char)
	case '"':
		tok.Literal = l_lexer.read_string()
		tok.Type = token.STRING

	case 0:
		tok.Literal = ""
		tok.Type = token.EOF

	default:
		if is_letter(l_lexer.current_char) {
			tok.Literal = l_lexer.read_identifier()
			tok.Type = token.LookupIdentifier(tok.Literal)
			return tok
		} else if is_digit(l_lexer.current_char) {
			tok.Type = token.INT
			tok.Literal = l_lexer.read_number()
			return tok
		} else {
			tok = new_token(token.ILLEGAL, l_lexer.current_char)
		}
	}
	l_lexer.read_char()
	return tok
}

func new_token(TokenType token.TokenType, ch byte) token.Token {
	return token.Token{Type: TokenType, Literal: string(ch)}
}

func (l_lexer *Lexer) read_char() {
	if l_lexer.read_position >= len(l_lexer.input) {
		l_lexer.current_char = 0
	} else {
		l_lexer.current_char = l_lexer.input[l_lexer.read_position]
	}
	l_lexer.position = l_lexer.read_position
	l_lexer.read_position += 1
}

func (l_lexer *Lexer) peek_char() byte {
	if l_lexer.read_position >= len(l_lexer.input) {
		return 0
	} else {
		return l_lexer.input[l_lexer.read_position]
	}
}

func (l_lexer *Lexer) read_identifier() string {
	position := l_lexer.position
	for is_letter(l_lexer.current_char) {
		l_lexer.read_char()
	}
	return l_lexer.input[position:l_lexer.position]
}

func is_letter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

func (l_lexer *Lexer) skip_whitespace() {
	for l_lexer.current_char == ' ' || l_lexer.current_char == '\t' || l_lexer.current_char == '\n' || l_lexer.current_char == '\r' {
		l_lexer.read_char()
	}
}

func (l_lexer *Lexer) read_number() string {
	position := l_lexer.position
	for is_digit(l_lexer.current_char) {
		l_lexer.read_char()
	}
	return l_lexer.input[position:l_lexer.position]
}

func is_digit(ch byte) bool {
	return '0' <= ch && ch <= '9'
}

func (l_lexer *Lexer) read_string() string {
	position := l_lexer.position + 1
	for {
		l_lexer.read_char()
		if l_lexer.current_char == '"' || l_lexer.current_char == 0 {
			break
		}
	}
	return l_lexer.input[position:l_lexer.position]
}
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`package lexer`

Update name change. 2024-08-28 19:31:35 -05:00			`import "monna/token"`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00
			`type Lexer struct {`
			`input string`
			`position int // current position in input (the current_char)`
			`read_position int // current reading position in input (after current_char)`
			`current_char byte`
			`}`

Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`func New(input string) *Lexer {`
			`l := &Lexer{input: input}`
			`l.read_char()`
			`return l`
Formatting git-svn-id: https://svn.tlawal.org/svn/monkey@15 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-26 11:33:09 -05:00			`}`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00
			`func (l_lexer *Lexer) NextToken() token.Token {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`var tok token.Token`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`l_lexer.skip_whitespace()`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`switch l_lexer.current_char {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case '=':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`if l_lexer.peek_char() == '=' {`
			`ch := l_lexer.current_char`
			`l_lexer.read_char()`
			`literal := string(ch) + string(l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`tok = token.Token{Type: token.EQ, Literal: literal}`
			`} else {`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.ASSIGN, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`}`
			`case '!':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`if l_lexer.peek_char() == '=' {`
			`ch := l_lexer.current_char`
			`l_lexer.read_char()`
			`literal := string(ch) + string(l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`tok = token.Token{Type: token.NOT_EQ, Literal: literal}`
			`} else {`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.BANG, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`}`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case ';':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.SEMICOLON, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case '(':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.LPAREN, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case ')':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.RPAREN, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case '{':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.LBRACE, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case '}':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.RBRACE, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case ',':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.COMMA, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case '+':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.PLUS, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '-':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.MINUS, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '/':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.SLASH, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '*':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.ASTERISK, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '<':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.LT, l_lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '>':`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.GT, l_lexer.current_char)`
Now supports strings. git-svn-id: https://svn.tlawal.org/svn/monkey@63 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2023-05-09 14:59:18 -05:00			`case '"':`
			`tok.Literal = l_lexer.read_string()`
			`tok.Type = token.STRING`

Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case 0:`
			`tok.Literal = ""`
			`tok.Type = token.EOF`

			`default:`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`if is_letter(l_lexer.current_char) {`
			`tok.Literal = l_lexer.read_identifier()`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`tok.Type = token.LookupIdentifier(tok.Literal)`
			`return tok`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`} else if is_digit(l_lexer.current_char) {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`tok.Type = token.INT`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok.Literal = l_lexer.read_number()`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`return tok`
			`} else {`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`tok = new_token(token.ILLEGAL, l_lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`}`
			`}`
Do not show username at interpreter startup. Now Monk not monkey Formatting and code clarity git-svn-id: https://svn.tlawal.org/svn/monkey@9 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-23 09:18:27 -05:00			`l_lexer.read_char()`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`return tok`
			`}`
Formatting git-svn-id: https://svn.tlawal.org/svn/monkey@15 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-26 11:33:09 -05:00
			`func new_token(TokenType token.TokenType, ch byte) token.Token {`
			`return token.Token{Type: TokenType, Literal: string(ch)}`
			`}`

			`func (l_lexer *Lexer) read_char() {`
			`if l_lexer.read_position >= len(l_lexer.input) {`
			`l_lexer.current_char = 0`
			`} else {`
			`l_lexer.current_char = l_lexer.input[l_lexer.read_position]`
			`}`
			`l_lexer.position = l_lexer.read_position`
			`l_lexer.read_position += 1`
			`}`

			`func (l_lexer *Lexer) peek_char() byte {`
			`if l_lexer.read_position >= len(l_lexer.input) {`
			`return 0`
			`} else {`
			`return l_lexer.input[l_lexer.read_position]`
			`}`
			`}`

			`func (l_lexer *Lexer) read_identifier() string {`
			`position := l_lexer.position`
			`for is_letter(l_lexer.current_char) {`
			`l_lexer.read_char()`
			`}`
			`return l_lexer.input[position:l_lexer.position]`
			`}`

			`func is_letter(ch byte) bool {`
			`return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_'`
			`}`

			`func (l_lexer *Lexer) skip_whitespace() {`
			`for l_lexer.current_char == ' ' \|\| l_lexer.current_char == '\t' \|\| l_lexer.current_char == '\n' \|\| l_lexer.current_char == '\r' {`
			`l_lexer.read_char()`
			`}`
			`}`

			`func (l_lexer *Lexer) read_number() string {`
			`position := l_lexer.position`
			`for is_digit(l_lexer.current_char) {`
			`l_lexer.read_char()`
			`}`
			`return l_lexer.input[position:l_lexer.position]`
			`}`

			`func is_digit(ch byte) bool {`
			`return '0' <= ch && ch <= '9'`
			`}`
Now supports strings. git-svn-id: https://svn.tlawal.org/svn/monkey@63 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2023-05-09 14:59:18 -05:00
			`func (l_lexer *Lexer) read_string() string {`
			`position := l_lexer.position + 1`
			`for {`
			`l_lexer.read_char()`
			`if l_lexer.current_char == '"' \|\| l_lexer.current_char == 0 {`
			`break`
			`}`
			`}`
			`return l_lexer.input[position:l_lexer.position]`
			`}`