From 6c55f214642d8c33acd1465fe8c7586e2b342a8e Mon Sep 17 00:00:00 2001
From: Masaaki Goshima <goccy54@gmail.com>
Date: Wed, 30 Oct 2024 01:16:11 +0900
Subject: [PATCH 1/3] reuse context

---
 scanner/context.go |  8 ++++
 scanner/scanner.go | 97 +++++++++++++++++++++++++++-------------------
 2 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/scanner/context.go b/scanner/context.go
index 1522e5ba..44e15be5 100644
--- a/scanner/context.go
+++ b/scanner/context.go
@@ -49,6 +49,14 @@ func (c *Context) release() {
 	ctxPool.Put(c)
 }
 
+func (c *Context) clear() {
+	c.resetBuffer()
+	c.isRawFolded = false
+	c.isLiteral = false
+	c.isFolded = false
+	c.literalOpt = ""
+}
+
 func (c *Context) reset(src []rune) {
 	c.idx = 0
 	c.size = len(src)
diff --git a/scanner/scanner.go b/scanner/scanner.go
index aa9bcc7d..80f59d26 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -437,31 +437,38 @@ func (s *Scanner) isMergeKey(ctx *Context) bool {
 	return false
 }
 
-func (s *Scanner) scanTag(ctx *Context) *token.Token {
+func (s *Scanner) scanTag(ctx *Context) bool {
+	if ctx.existsBuffer() {
+		return false
+	}
+
 	ctx.addOriginBuf('!')
 	s.progress(ctx, 1) // skip '!' character
 
-	var (
-		tk       *token.Token
-		progress int
-	)
+	var progress int
 	for idx, c := range ctx.src[ctx.idx:] {
 		progress = idx + 1
 		ctx.addOriginBuf(c)
 		switch c {
 		case ' ', '\n', '\r':
 			value := ctx.source(ctx.idx-1, ctx.idx+idx)
-			tk = token.Tag(value, string(ctx.obuf), s.pos())
+			ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos()))
 			progress = len([]rune(value))
 			goto END
 		}
 	}
 END:
 	s.progressColumn(ctx, progress)
-	return tk
+	ctx.clear()
+	return true
 }
 
-func (s *Scanner) scanComment(ctx *Context) *token.Token {
+func (s *Scanner) scanComment(ctx *Context) bool {
+	if ctx.existsBuffer() && ctx.previousChar() != ' ' {
+		return false
+	}
+
+	s.addBufferedTokenIfExists(ctx)
 	ctx.addOriginBuf('#')
 	s.progress(ctx, 1) // skip '#' character
 
@@ -474,19 +481,21 @@ func (s *Scanner) scanComment(ctx *Context) *token.Token {
 			}
 			value := ctx.source(ctx.idx, ctx.idx+idx)
 			progress := len([]rune(value))
-			tk := token.Comment(value, string(ctx.obuf), s.pos())
+			ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos()))
 			s.progressColumn(ctx, progress)
 			s.progressLine(ctx)
-			return tk
+			ctx.clear()
+			return true
 		}
 	}
 	// document ends with comment.
 	value := string(ctx.src[ctx.idx:])
-	tk := token.Comment(value, string(ctx.obuf), s.pos())
+	ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos()))
 	progress := len([]rune(value))
 	s.progressColumn(ctx, progress)
 	s.progressLine(ctx)
-	return tk
+	ctx.clear()
+	return true
 }
 
 func (s *Scanner) trimCommentFromLiteralOpt(text string, header rune) (string, error) {
@@ -583,6 +592,7 @@ func (s *Scanner) scanFlowMapStart(ctx *Context) bool {
 	ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
 	s.startedFlowMapNum++
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -596,6 +606,7 @@ func (s *Scanner) scanFlowMapEnd(ctx *Context) bool {
 	ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos()))
 	s.startedFlowMapNum--
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -608,6 +619,7 @@ func (s *Scanner) scanFlowArrayStart(ctx *Context) bool {
 	ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
 	s.startedFlowSequenceNum++
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -621,6 +633,7 @@ func (s *Scanner) scanFlowArrayEnd(ctx *Context) bool {
 	ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos()))
 	s.startedFlowSequenceNum--
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -633,6 +646,7 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool {
 	ctx.addOriginBuf(c)
 	ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -656,6 +670,7 @@ func (s *Scanner) scanMapDelim(ctx *Context) bool {
 	}
 	ctx.addToken(token.MappingValue(s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -673,6 +688,7 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool {
 	s.addBufferedTokenIfExists(ctx)
 	ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos()))
 	s.progressColumn(ctx, 3)
+	ctx.clear()
 	return true
 }
 
@@ -689,6 +705,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool {
 
 	ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
 	s.progressColumn(ctx, 3)
+	ctx.clear()
 	return true
 }
 
@@ -700,6 +717,7 @@ func (s *Scanner) scanMergeKey(ctx *Context) bool {
 	s.lastDelimColumn = s.column
 	ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos()))
 	s.progressColumn(ctx, 2)
+	ctx.clear()
 	return true
 }
 
@@ -734,6 +752,7 @@ func (s *Scanner) scanSequence(ctx *Context) bool {
 	s.lastDelimColumn = tk.Position.Column
 	ctx.addToken(tk)
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -830,6 +849,7 @@ func (s *Scanner) scanMapKey(ctx *Context) bool {
 
 	ctx.addToken(token.MappingKey(s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -843,6 +863,7 @@ func (s *Scanner) scanDirective(ctx *Context) bool {
 
 	ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -856,6 +877,7 @@ func (s *Scanner) scanAnchor(ctx *Context) bool {
 	ctx.addToken(token.Anchor(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
 	s.isAnchor = true
+	ctx.clear()
 	return true
 }
 
@@ -868,6 +890,7 @@ func (s *Scanner) scanAlias(ctx *Context) bool {
 	ctx.addOriginBuf('*')
 	ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -895,29 +918,29 @@ func (s *Scanner) scan(ctx *Context) error {
 		switch c {
 		case '{':
 			if s.scanFlowMapStart(ctx) {
-				return nil
+				continue
 			}
 		case '}':
 			if s.scanFlowMapEnd(ctx) {
-				return nil
+				continue
 			}
 		case '.':
 			if s.scanDocumentEnd(ctx) {
-				return nil
+				continue
 			}
 		case '<':
 			if s.scanMergeKey(ctx) {
-				return nil
+				continue
 			}
 		case '-':
 			if s.scanDocumentStart(ctx) {
-				return nil
+				continue
 			}
 			if s.scanRawFoldedChar(ctx) {
 				continue
 			}
 			if s.scanSequence(ctx) {
-				return nil
+				continue
 			}
 			if ctx.existsBuffer() {
 				// '-' is literal
@@ -928,19 +951,19 @@ func (s *Scanner) scan(ctx *Context) error {
 			}
 		case '[':
 			if s.scanFlowArrayStart(ctx) {
-				return nil
+				continue
 			}
 		case ']':
 			if s.scanFlowArrayEnd(ctx) {
-				return nil
+				continue
 			}
 		case ',':
 			if s.scanFlowEntry(ctx, c) {
-				return nil
+				continue
 			}
 		case ':':
 			if s.scanMapDelim(ctx) {
-				return nil
+				continue
 			}
 		case '|', '>':
 			scanned, err := s.scanLiteralHeader(ctx)
@@ -951,44 +974,39 @@ func (s *Scanner) scan(ctx *Context) error {
 				continue
 			}
 		case '!':
-			if !ctx.existsBuffer() {
-				token := s.scanTag(ctx)
-				ctx.addToken(token)
-				return nil
+			if s.scanTag(ctx) {
+				continue
 			}
 		case '%':
 			if s.scanDirective(ctx) {
-				return nil
+				continue
 			}
 		case '?':
 			if s.scanMapKey(ctx) {
-				return nil
+				continue
 			}
 		case '&':
 			if s.scanAnchor(ctx) {
-				return nil
+				continue
 			}
 		case '*':
 			if s.scanAlias(ctx) {
-				return nil
+				continue
 			}
 		case '#':
-			if !ctx.existsBuffer() || ctx.previousChar() == ' ' {
-				s.addBufferedTokenIfExists(ctx)
-				token := s.scanComment(ctx)
-				ctx.addToken(token)
-				return nil
+			if s.scanComment(ctx) {
+				continue
 			}
 		case '\'', '"':
 			if !ctx.existsBuffer() {
-				token := s.scanQuote(ctx, c)
-				ctx.addToken(token)
+				ctx.addToken(s.scanQuote(ctx, c))
 				// If the non-whitespace character immediately following the quote is ':', the quote should be treated as a map key.
 				// Therefore, do not return and continue processing as a normal map key.
 				if ctx.currentCharWithSkipWhitespace() == ':' {
 					continue
 				}
-				return nil
+				ctx.clear()
+				continue
 			}
 		case '\r', '\n':
 			s.scanNewLine(ctx, c)
@@ -1008,7 +1026,8 @@ func (s *Scanner) scan(ctx *Context) error {
 			s.addBufferedTokenIfExists(ctx)
 			s.isAnchor = false
 			// rescan white space at next scanning for adding white space to next buffer.
-			return nil
+			ctx.clear()
+			continue
 		}
 		ctx.addBuf(c)
 		ctx.addOriginBuf(c)

From 895ae4bd389cd66a06f0087c3a54c120dd385068 Mon Sep 17 00:00:00 2001
From: Masaaki Goshima <goccy54@gmail.com>
Date: Wed, 30 Oct 2024 02:04:25 +0900
Subject: [PATCH 2/3] refactor scan function

---
 scanner/scanner.go | 62 ++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/scanner/scanner.go b/scanner/scanner.go
index 80f59d26..13295767 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -406,11 +406,36 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token {
 	return tk
 }
 
-func (s *Scanner) scanQuote(ctx *Context, ch rune) *token.Token {
+func (s *Scanner) scanQuote(ctx *Context, ch rune) bool {
+	if ctx.existsBuffer() {
+		return false
+	}
 	if ch == '\'' {
-		return s.scanSingleQuote(ctx)
+		ctx.addToken(s.scanSingleQuote(ctx))
+	} else {
+		ctx.addToken(s.scanDoubleQuote(ctx))
 	}
-	return s.scanDoubleQuote(ctx)
+	ctx.clear()
+	return true
+}
+
+func (s *Scanner) scanWhiteSpace(ctx *Context) bool {
+	if ctx.isDocument() {
+		return false
+	}
+	if !s.isAnchor && !s.isFirstCharAtLine {
+		return false
+	}
+
+	if s.isFirstCharAtLine {
+		s.progressColumn(ctx, 1)
+		ctx.addOriginBuf(' ')
+		return true
+	}
+
+	s.addBufferedTokenIfExists(ctx)
+	s.isAnchor = false
+	return true
 }
 
 func (s *Scanner) isMergeKey(ctx *Context) bool {
@@ -942,13 +967,6 @@ func (s *Scanner) scan(ctx *Context) error {
 			if s.scanSequence(ctx) {
 				continue
 			}
-			if ctx.existsBuffer() {
-				// '-' is literal
-				ctx.addBuf(c)
-				ctx.addOriginBuf(c)
-				s.progressColumn(ctx, 1)
-				continue
-			}
 		case '[':
 			if s.scanFlowArrayStart(ctx) {
 				continue
@@ -998,36 +1016,16 @@ func (s *Scanner) scan(ctx *Context) error {
 				continue
 			}
 		case '\'', '"':
-			if !ctx.existsBuffer() {
-				ctx.addToken(s.scanQuote(ctx, c))
-				// If the non-whitespace character immediately following the quote is ':', the quote should be treated as a map key.
-				// Therefore, do not return and continue processing as a normal map key.
-				if ctx.currentCharWithSkipWhitespace() == ':' {
-					continue
-				}
-				ctx.clear()
+			if s.scanQuote(ctx, c) {
 				continue
 			}
 		case '\r', '\n':
 			s.scanNewLine(ctx, c)
 			continue
 		case ' ':
-			if ctx.isDocument() || (!s.isAnchor && !s.isFirstCharAtLine) {
-				ctx.addBuf(c)
-				ctx.addOriginBuf(c)
-				s.progressColumn(ctx, 1)
-				continue
-			}
-			if s.isFirstCharAtLine {
-				s.progressColumn(ctx, 1)
-				ctx.addOriginBuf(c)
+			if s.scanWhiteSpace(ctx) {
 				continue
 			}
-			s.addBufferedTokenIfExists(ctx)
-			s.isAnchor = false
-			// rescan white space at next scanning for adding white space to next buffer.
-			ctx.clear()
-			continue
 		}
 		ctx.addBuf(c)
 		ctx.addOriginBuf(c)

From e47e0bca50f5803c3672d2186a79a1e1c2abb96f Mon Sep 17 00:00:00 2001
From: Masaaki Goshima <goccy54@gmail.com>
Date: Wed, 30 Oct 2024 02:13:37 +0900
Subject: [PATCH 3/3] remove unused function

---
 scanner/context.go | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/scanner/context.go b/scanner/context.go
index 44e15be5..54dc0b79 100644
--- a/scanner/context.go
+++ b/scanner/context.go
@@ -6,8 +6,6 @@ import (
 	"github.com/goccy/go-yaml/token"
 )
 
-const whitespace = ' '
-
 // Context context at scanning
 type Context struct {
 	idx                int
@@ -152,18 +150,6 @@ func (c *Context) currentChar() rune {
 	return rune(0)
 }
 
-func (c *Context) currentCharWithSkipWhitespace() rune {
-	idx := c.idx
-	for c.size > idx {
-		ch := c.src[idx]
-		if ch != whitespace {
-			return ch
-		}
-		idx++
-	}
-	return rune(0)
-}
-
 func (c *Context) nextChar() rune {
 	if c.size > c.idx+1 {
 		return c.src[c.idx+1]