From 2791a27f1fb10013d46e83f93d7f80f23f488372 Mon Sep 17 00:00:00 2001
From: tijani <tijani@tijanilawal.com>
Date: Thu, 3 Nov 2022 11:11:42 +0000
Subject: [PATCH] Built-in datatypes: Added String The parser now understands
 what a strings is and can take a string literal and curn out an Stringliteral
 AST nodes.

git-svn-id: https://svn.tlawal.org/svn/monkey@56 f6afcba9-9ef1-4bdd-9b72-7484f5705bac
---
 ast/ast.go            | 10 ++++++++++
 lexer/lexer.go        | 19 +++++++++++++++++++
 lexer/lexer_test.go   |  5 +++++
 object/object.go      |  9 +++++++++
 parser/parser.go      | 10 ++++++++++
 parser/parser_test.go | 18 ++++++++++++++++++
 token/tokens.go       |  5 +++--
 7 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/ast/ast.go b/ast/ast.go
index 7e94417..84b480b 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -286,3 +286,13 @@ func (ce *CallExpression) String() string {
 
 	return out.String()
 }
+
+// String Literal
+type StringLiteral struct {
+	Token token.Token
+	Value string
+}
+
+func (sl *StringLiteral) expression_node()     {}
+func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal }
+func (sl *StringLiteral) String() string       { return sl.Token.Literal }
diff --git a/lexer/lexer.go b/lexer/lexer.go
index 4e864ec..f16f7f3 100644
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@@ -62,6 +62,9 @@ func (l_lexer *Lexer) NextToken() token.Token {
 		tok = new_token(token.LT, l_lexer.current_char)
 	case '>':
 		tok = new_token(token.GT, l_lexer.current_char)
+	case '"':
+		tok.Type = token.STRING
+		tok.Literal = l_lexer.read_string()
 	case 0:
 		tok.Literal = ""
 		tok.Type = token.EOF
@@ -134,3 +137,19 @@ func (l_lexer *Lexer) read_number() string {
 func is_digit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
+
+/*
+   Read the current character until it encounters a closing '"' or end of input.
+   TODO: some additional thing that can be done at the lexer level with strings is to report an error when it
+   reaches the end of input without proper termination. Support for character escaping would be really neat.
+*/
+func (l_lexer *Lexer) read_string() string {
+	position := l_lexer.position + 1
+	for {
+		l_lexer.read_char()
+		if l_lexer.current_char == '"' || l_lexer.current_char == 0 {
+			break
+		}
+	}
+	return l_lexer.input[position:l_lexer.position]
+}
diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
index 2e6f8d5..b3930bc 100644
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@@ -25,6 +25,8 @@ func TestNextToken(t *testing.T) {
 
 						10 == 10;
 						10 != 9;
+            "foobar"
+            "foo babr"
 	`
 	tests := []struct {
 		expectedType    token.TokenType
@@ -111,6 +113,9 @@ func TestNextToken(t *testing.T) {
 		{token.INT, "9"},
 		{token.SEMICOLON, ";"},
 
+		{token.STRING, "foobar"},
+		{token.STRING, "foo babr"},
+
 		{token.EOF, ""},
 	}
 
diff --git a/object/object.go b/object/object.go
index 171c83d..f81eddd 100644
--- a/object/object.go
+++ b/object/object.go
@@ -16,6 +16,7 @@ const (
 	RETURN_VALUE_OBJECT = "RETURN_VALUE"
 	ERROR_OBJECT        = "ERROR"
 	FUNCTION_OBJECT     = "FUNCTION"
+	STRING_OBJECT       = "STRING"
 )
 
 type Object interface {
@@ -101,3 +102,11 @@ func (f *Function) Inspect() string {
 
 	return out.String()
 }
+
+// String
+type String struct {
+	Value string
+}
+
+func (s *String) Type() ObjectType { return STRING_OBJECT }
+func (s *String) Inspec() string   { return s.Value }
diff --git a/parser/parser.go b/parser/parser.go
index 44a0d00..eb97a76 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -113,6 +113,9 @@ func New(l_lexer *lexer.Lexer) *Parser {
 	// Call Expression
 	l_parser.register_infix(token.LPAREN, l_parser.parse_call_expression)
 
+	// String
+	l_parser.register_prefix(token.STRING, l_parser.parse_string_literal)
+
 	return l_parser
 }
 
@@ -431,3 +434,10 @@ func (l_parser *Parser) parse_call_arguments() []ast.Expression {
 	}
 	return args
 }
+
+func (l_parser *Parser) parse_string_literal() ast.Expression {
+	return &ast.StringLiteral{
+		Token: l_parser.current_token,
+		Value: l_parser.current_token.Literal,
+	}
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
index daf2e01..4345034 100644
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -658,6 +658,24 @@ func TestLetStatements(l_test *testing.T) {
 	}
 }
 
+func TestStringLiteralExpression(l_test *testing.T) {
+	input := `"hello world";`
+	l_lexer := lexer.New(input)
+	l_parser := New(l_lexer)
+	program := l_parser.ParseProgram()
+	check_parser_errors(l_test, l_parser)
+
+	statement := program.Statements[0].(*ast.ExpressionStatement)
+	literal, ok := statement.Expression.(*ast.StringLiteral)
+	if !ok {
+		l_test.Fatalf("expression not *ast.StringLiteral, got=%T", statement.Expression)
+	}
+
+	if literal.Value != "hello world" {
+		l_test.Errorf("literal.Value not %q, got=%q", "hello world", literal.Value)
+	}
+}
+
 // Helpers
 
 func check_parser_errors(l_test *testing.T, l_parser *Parser) {
diff --git a/token/tokens.go b/token/tokens.go
index b2658da..b8d980b 100644
--- a/token/tokens.go
+++ b/token/tokens.go
@@ -13,8 +13,9 @@ const (
 	COMMENT = "COMMENT" // TODO(tijani): Implement this!!
 
 	// Identifiers and basic type literals
-	IDENT = "IDENT"
-	INT   = "INT"
+	IDENT  = "IDENT"
+	INT    = "INT"
+	STRING = "STRING"
 
 	// Operators
 	ASSIGN   = "="