monna/lexer/lexer.go

package lexer

import "monkey/token"

type Lexer struct {
	input         string
	position      int // current position in input (the current_char)
	read_position int // current reading position in input (after current_char)
	current_char  byte
}

func New(input string) *Lexer {
	l := &Lexer{input: input}
	l.read_char()
	return l
}

func new_token(TokenType token.TokenType, ch byte) token.Token {
	return token.Token{Type: TokenType, Literal: string(ch)}
}

func (lexer *Lexer) read_char() {
	if lexer.read_position >= len(lexer.input) {
		lexer.current_char = 0
	} else {
		lexer.current_char = lexer.input[lexer.read_position]
	}
	lexer.position = lexer.read_position
	lexer.read_position += 1
}

func (lexer *Lexer) peek_char() byte {
	if lexer.read_position >= len(lexer.input) {
		return 0
	} else {
		return lexer.input[lexer.read_position]
	}
}

func (lexer *Lexer) read_identifier() string {
	position := lexer.position
	for is_letter(lexer.current_char) {
		lexer.read_char()
	}
	return lexer.input[position:lexer.position]
}

func is_letter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

func (lexer *Lexer) skip_whitespace() {
	for lexer.current_char == ' ' || lexer.current_char == '\t' || lexer.current_char == '\n' || lexer.current_char == '\r' {
		lexer.read_char()
	}
}

func (lexer *Lexer) read_number() string {
	position := lexer.position
	for is_digit(lexer.current_char) {
		lexer.read_char()
	}
	return lexer.input[position:lexer.position]
}

func is_digit(ch byte) bool {
	return '0' <= ch && ch <= '9'
}

func (lexer *Lexer) NextToken() token.Token {
	var tok token.Token
	lexer.skip_whitespace()

	switch lexer.current_char {
	case '=':
		if lexer.peek_char() == '=' {
			ch := lexer.current_char
			lexer.read_char()
			literal := string(ch) + string(lexer.current_char)
			tok = token.Token{Type: token.EQ, Literal: literal}
		} else {
			tok = new_token(token.ASSIGN, lexer.current_char)
		}
	case '!':
		if lexer.peek_char() == '=' {
			ch := lexer.current_char
			lexer.read_char()
			literal := string(ch) + string(lexer.current_char)
			tok = token.Token{Type: token.NOT_EQ, Literal: literal}
		} else {
			tok = new_token(token.BANG, lexer.current_char)
		}
	case ';':
		tok = new_token(token.SEMICOLON, lexer.current_char)
	case '(':
		tok = new_token(token.LPAREN, lexer.current_char)
	case ')':
		tok = new_token(token.RPAREN, lexer.current_char)
	case '{':
		tok = new_token(token.LBRACE, lexer.current_char)
	case '}':
		tok = new_token(token.RBRACE, lexer.current_char)
	case ',':
		tok = new_token(token.COMMA, lexer.current_char)
	case '+':
		tok = new_token(token.PLUS, lexer.current_char)
	case '-':
		tok = new_token(token.MINUS, lexer.current_char)
	case '/':
		tok = new_token(token.SLASH, lexer.current_char)
	case '*':
		tok = new_token(token.ASTERISK, lexer.current_char)
	case '<':
		tok = new_token(token.LT, lexer.current_char)
	case '>':
		tok = new_token(token.GT, lexer.current_char)
	case 0:
		tok.Literal = ""
		tok.Type = token.EOF

	default:
		if is_letter(lexer.current_char) {
			tok.Literal = lexer.read_identifier()
			tok.Type = token.LookupIdentifier(tok.Literal)
			return tok
		} else if is_digit(lexer.current_char) {
			tok.Type = token.INT
			tok.Literal = lexer.read_number()
			return tok
		} else {
			tok = new_token(token.ILLEGAL, lexer.current_char)
		}
	}
	lexer.read_char()
	return tok
}
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`package lexer`

			`import "monkey/token"`

			`type Lexer struct {`
			`input string`
			`position int // current position in input (the current_char)`
			`read_position int // current reading position in input (after current_char)`
			`current_char byte`
			`}`

Rename new -> New and next_token -> NextToken, because of go's weird requirement that first letter of public methods in a package that would be used in another package must be capitalized. git-svn-id: https://svn.tlawal.org/svn/monkey@4 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:38:47 -05:00			`func New(input string) *Lexer {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`l := &Lexer{input: input}`
			`l.read_char()`
			`return l`
			`}`

			`func new_token(TokenType token.TokenType, ch byte) token.Token {`
			`return token.Token{Type: TokenType, Literal: string(ch)}`
			`}`

			`func (lexer *Lexer) read_char() {`
			`if lexer.read_position >= len(lexer.input) {`
			`lexer.current_char = 0`
			`} else {`
			`lexer.current_char = lexer.input[lexer.read_position]`
			`}`
			`lexer.position = lexer.read_position`
			`lexer.read_position += 1`
			`}`

- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`func (lexer *Lexer) peek_char() byte {`
			`if lexer.read_position >= len(lexer.input) {`
			`return 0`
			`} else {`
			`return lexer.input[lexer.read_position]`
			`}`
			`}`

Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`func (lexer *Lexer) read_identifier() string {`
			`position := lexer.position`
			`for is_letter(lexer.current_char) {`
			`lexer.read_char()`
			`}`
			`return lexer.input[position:lexer.position]`
			`}`

			`func is_letter(ch byte) bool {`
			`return 'a' <= ch && ch <= 'z' \|\| 'A' <= ch && ch <= 'Z' \|\| ch == '_'`
			`}`

			`func (lexer *Lexer) skip_whitespace() {`
			`for lexer.current_char == ' ' \|\| lexer.current_char == '\t' \|\| lexer.current_char == '\n' \|\| lexer.current_char == '\r' {`
			`lexer.read_char()`
			`}`
			`}`

			`func (lexer *Lexer) read_number() string {`
			`position := lexer.position`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`for is_digit(lexer.current_char) {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`lexer.read_char()`
			`}`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`return lexer.input[position:lexer.position]`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`}`

- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`func is_digit(ch byte) bool {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`return '0' <= ch && ch <= '9'`
			`}`

Rename new -> New and next_token -> NextToken, because of go's weird requirement that first letter of public methods in a package that would be used in another package must be capitalized. git-svn-id: https://svn.tlawal.org/svn/monkey@4 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:38:47 -05:00			`func (lexer *Lexer) NextToken() token.Token {`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`var tok token.Token`
			`lexer.skip_whitespace()`

			`switch lexer.current_char {`
			`case '=':`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`if lexer.peek_char() == '=' {`
			`ch := lexer.current_char`
			`lexer.read_char()`
			`literal := string(ch) + string(lexer.current_char)`
			`tok = token.Token{Type: token.EQ, Literal: literal}`
			`} else {`
			`tok = new_token(token.ASSIGN, lexer.current_char)`
			`}`
			`case '!':`
			`if lexer.peek_char() == '=' {`
			`ch := lexer.current_char`
			`lexer.read_char()`
			`literal := string(ch) + string(lexer.current_char)`
			`tok = token.Token{Type: token.NOT_EQ, Literal: literal}`
			`} else {`
			`tok = new_token(token.BANG, lexer.current_char)`
			`}`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case ';':`
			`tok = new_token(token.SEMICOLON, lexer.current_char)`
			`case '(':`
			`tok = new_token(token.LPAREN, lexer.current_char)`
			`case ')':`
			`tok = new_token(token.RPAREN, lexer.current_char)`
			`case '{':`
			`tok = new_token(token.LBRACE, lexer.current_char)`
			`case '}':`
			`tok = new_token(token.RBRACE, lexer.current_char)`
			`case ',':`
			`tok = new_token(token.COMMA, lexer.current_char)`
			`case '+':`
			`tok = new_token(token.PLUS, lexer.current_char)`
- The lexer can now peek forward into the input stream. - Added the rest of the basic math operators. - Added > and <. - Test, Test, Test. - Added if, else, return, true, false. - Lexer can now differentiate between '=' and '==' and '!' and '!='. git-svn-id: https://svn.tlawal.org/svn/monkey@2 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 17:04:29 -05:00			`case '-':`
			`tok = new_token(token.MINUS, lexer.current_char)`
			`case '/':`
			`tok = new_token(token.SLASH, lexer.current_char)`
			`case '*':`
			`tok = new_token(token.ASTERISK, lexer.current_char)`
			`case '<':`
			`tok = new_token(token.LT, lexer.current_char)`
			`case '>':`
			`tok = new_token(token.GT, lexer.current_char)`
Monkey can now lex its basic types and keywords. git-svn-id: https://svn.tlawal.org/svn/monkey@1 f6afcba9-9ef1-4bdd-9b72-7484f5705bac 2022-05-12 15:51:16 -05:00			`case 0:`
			`tok.Literal = ""`
			`tok.Type = token.EOF`

			`default:`
			`if is_letter(lexer.current_char) {`
			`tok.Literal = lexer.read_identifier()`
			`tok.Type = token.LookupIdentifier(tok.Literal)`
			`return tok`
			`} else if is_digit(lexer.current_char) {`
			`tok.Type = token.INT`
			`tok.Literal = lexer.read_number()`
			`return tok`
			`} else {`
			`tok = new_token(token.ILLEGAL, lexer.current_char)`
			`}`
			`}`
			`lexer.read_char()`
			`return tok`
			`}`