From c9f75fef0c3f2f9b35366c730148c07c6e08bab2 Mon Sep 17 00:00:00 2001
From: Masaaki Goshima <goccy54@gmail.com>
Date: Wed, 30 Oct 2024 02:18:20 +0900
Subject: [PATCH] Reuse context at Scanner (#487)

* reuse context

* refactor scan function

* remove unused function
---
 scanner/context.go |  22 +++----
 scanner/scanner.go | 149 +++++++++++++++++++++++++--------------------
 2 files changed, 91 insertions(+), 80 deletions(-)

diff --git a/scanner/context.go b/scanner/context.go
index 1522e5ba..54dc0b79 100644
--- a/scanner/context.go
+++ b/scanner/context.go
@@ -6,8 +6,6 @@ import (
 	"github.com/goccy/go-yaml/token"
 )
 
-const whitespace = ' '
-
 // Context context at scanning
 type Context struct {
 	idx                int
@@ -49,6 +47,14 @@ func (c *Context) release() {
 	ctxPool.Put(c)
 }
 
+func (c *Context) clear() {
+	c.resetBuffer()
+	c.isRawFolded = false
+	c.isLiteral = false
+	c.isFolded = false
+	c.literalOpt = ""
+}
+
 func (c *Context) reset(src []rune) {
 	c.idx = 0
 	c.size = len(src)
@@ -144,18 +150,6 @@ func (c *Context) currentChar() rune {
 	return rune(0)
 }
 
-func (c *Context) currentCharWithSkipWhitespace() rune {
-	idx := c.idx
-	for c.size > idx {
-		ch := c.src[idx]
-		if ch != whitespace {
-			return ch
-		}
-		idx++
-	}
-	return rune(0)
-}
-
 func (c *Context) nextChar() rune {
 	if c.size > c.idx+1 {
 		return c.src[c.idx+1]
diff --git a/scanner/scanner.go b/scanner/scanner.go
index aa9bcc7d..13295767 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -406,11 +406,36 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token {
 	return tk
 }
 
-func (s *Scanner) scanQuote(ctx *Context, ch rune) *token.Token {
+func (s *Scanner) scanQuote(ctx *Context, ch rune) bool {
+	if ctx.existsBuffer() {
+		return false
+	}
 	if ch == '\'' {
-		return s.scanSingleQuote(ctx)
+		ctx.addToken(s.scanSingleQuote(ctx))
+	} else {
+		ctx.addToken(s.scanDoubleQuote(ctx))
 	}
-	return s.scanDoubleQuote(ctx)
+	ctx.clear()
+	return true
+}
+
+func (s *Scanner) scanWhiteSpace(ctx *Context) bool {
+	if ctx.isDocument() {
+		return false
+	}
+	if !s.isAnchor && !s.isFirstCharAtLine {
+		return false
+	}
+
+	if s.isFirstCharAtLine {
+		s.progressColumn(ctx, 1)
+		ctx.addOriginBuf(' ')
+		return true
+	}
+
+	s.addBufferedTokenIfExists(ctx)
+	s.isAnchor = false
+	return true
 }
 
 func (s *Scanner) isMergeKey(ctx *Context) bool {
@@ -437,31 +462,38 @@ func (s *Scanner) isMergeKey(ctx *Context) bool {
 	return false
 }
 
-func (s *Scanner) scanTag(ctx *Context) *token.Token {
+func (s *Scanner) scanTag(ctx *Context) bool {
+	if ctx.existsBuffer() {
+		return false
+	}
+
 	ctx.addOriginBuf('!')
 	s.progress(ctx, 1) // skip '!' character
 
-	var (
-		tk       *token.Token
-		progress int
-	)
+	var progress int
 	for idx, c := range ctx.src[ctx.idx:] {
 		progress = idx + 1
 		ctx.addOriginBuf(c)
 		switch c {
 		case ' ', '\n', '\r':
 			value := ctx.source(ctx.idx-1, ctx.idx+idx)
-			tk = token.Tag(value, string(ctx.obuf), s.pos())
+			ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos()))
 			progress = len([]rune(value))
 			goto END
 		}
 	}
 END:
 	s.progressColumn(ctx, progress)
-	return tk
+	ctx.clear()
+	return true
 }
 
-func (s *Scanner) scanComment(ctx *Context) *token.Token {
+func (s *Scanner) scanComment(ctx *Context) bool {
+	if ctx.existsBuffer() && ctx.previousChar() != ' ' {
+		return false
+	}
+
+	s.addBufferedTokenIfExists(ctx)
 	ctx.addOriginBuf('#')
 	s.progress(ctx, 1) // skip '#' character
 
@@ -474,19 +506,21 @@ func (s *Scanner) scanComment(ctx *Context) *token.Token {
 			}
 			value := ctx.source(ctx.idx, ctx.idx+idx)
 			progress := len([]rune(value))
-			tk := token.Comment(value, string(ctx.obuf), s.pos())
+			ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos()))
 			s.progressColumn(ctx, progress)
 			s.progressLine(ctx)
-			return tk
+			ctx.clear()
+			return true
 		}
 	}
 	// document ends with comment.
 	value := string(ctx.src[ctx.idx:])
-	tk := token.Comment(value, string(ctx.obuf), s.pos())
+	ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos()))
 	progress := len([]rune(value))
 	s.progressColumn(ctx, progress)
 	s.progressLine(ctx)
-	return tk
+	ctx.clear()
+	return true
 }
 
 func (s *Scanner) trimCommentFromLiteralOpt(text string, header rune) (string, error) {
@@ -583,6 +617,7 @@ func (s *Scanner) scanFlowMapStart(ctx *Context) bool {
 	ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
 	s.startedFlowMapNum++
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -596,6 +631,7 @@ func (s *Scanner) scanFlowMapEnd(ctx *Context) bool {
 	ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos()))
 	s.startedFlowMapNum--
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -608,6 +644,7 @@ func (s *Scanner) scanFlowArrayStart(ctx *Context) bool {
 	ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
 	s.startedFlowSequenceNum++
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -621,6 +658,7 @@ func (s *Scanner) scanFlowArrayEnd(ctx *Context) bool {
 	ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos()))
 	s.startedFlowSequenceNum--
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -633,6 +671,7 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool {
 	ctx.addOriginBuf(c)
 	ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -656,6 +695,7 @@ func (s *Scanner) scanMapDelim(ctx *Context) bool {
 	}
 	ctx.addToken(token.MappingValue(s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -673,6 +713,7 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool {
 	s.addBufferedTokenIfExists(ctx)
 	ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos()))
 	s.progressColumn(ctx, 3)
+	ctx.clear()
 	return true
 }
 
@@ -689,6 +730,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool {
 
 	ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
 	s.progressColumn(ctx, 3)
+	ctx.clear()
 	return true
 }
 
@@ -700,6 +742,7 @@ func (s *Scanner) scanMergeKey(ctx *Context) bool {
 	s.lastDelimColumn = s.column
 	ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos()))
 	s.progressColumn(ctx, 2)
+	ctx.clear()
 	return true
 }
 
@@ -734,6 +777,7 @@ func (s *Scanner) scanSequence(ctx *Context) bool {
 	s.lastDelimColumn = tk.Position.Column
 	ctx.addToken(tk)
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -830,6 +874,7 @@ func (s *Scanner) scanMapKey(ctx *Context) bool {
 
 	ctx.addToken(token.MappingKey(s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -843,6 +888,7 @@ func (s *Scanner) scanDirective(ctx *Context) bool {
 
 	ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -856,6 +902,7 @@ func (s *Scanner) scanAnchor(ctx *Context) bool {
 	ctx.addToken(token.Anchor(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
 	s.isAnchor = true
+	ctx.clear()
 	return true
 }
 
@@ -868,6 +915,7 @@ func (s *Scanner) scanAlias(ctx *Context) bool {
 	ctx.addOriginBuf('*')
 	ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
 	s.progressColumn(ctx, 1)
+	ctx.clear()
 	return true
 }
 
@@ -895,52 +943,45 @@ func (s *Scanner) scan(ctx *Context) error {
 		switch c {
 		case '{':
 			if s.scanFlowMapStart(ctx) {
-				return nil
+				continue
 			}
 		case '}':
 			if s.scanFlowMapEnd(ctx) {
-				return nil
+				continue
 			}
 		case '.':
 			if s.scanDocumentEnd(ctx) {
-				return nil
+				continue
 			}
 		case '<':
 			if s.scanMergeKey(ctx) {
-				return nil
+				continue
 			}
 		case '-':
 			if s.scanDocumentStart(ctx) {
-				return nil
+				continue
 			}
 			if s.scanRawFoldedChar(ctx) {
 				continue
 			}
 			if s.scanSequence(ctx) {
-				return nil
-			}
-			if ctx.existsBuffer() {
-				// '-' is literal
-				ctx.addBuf(c)
-				ctx.addOriginBuf(c)
-				s.progressColumn(ctx, 1)
 				continue
 			}
 		case '[':
 			if s.scanFlowArrayStart(ctx) {
-				return nil
+				continue
 			}
 		case ']':
 			if s.scanFlowArrayEnd(ctx) {
-				return nil
+				continue
 			}
 		case ',':
 			if s.scanFlowEntry(ctx, c) {
-				return nil
+				continue
 			}
 		case ':':
 			if s.scanMapDelim(ctx) {
-				return nil
+				continue
 			}
 		case '|', '>':
 			scanned, err := s.scanLiteralHeader(ctx)
@@ -951,64 +992,40 @@ func (s *Scanner) scan(ctx *Context) error {
 				continue
 			}
 		case '!':
-			if !ctx.existsBuffer() {
-				token := s.scanTag(ctx)
-				ctx.addToken(token)
-				return nil
+			if s.scanTag(ctx) {
+				continue
 			}
 		case '%':
 			if s.scanDirective(ctx) {
-				return nil
+				continue
 			}
 		case '?':
 			if s.scanMapKey(ctx) {
-				return nil
+				continue
 			}
 		case '&':
 			if s.scanAnchor(ctx) {
-				return nil
+				continue
 			}
 		case '*':
 			if s.scanAlias(ctx) {
-				return nil
+				continue
 			}
 		case '#':
-			if !ctx.existsBuffer() || ctx.previousChar() == ' ' {
-				s.addBufferedTokenIfExists(ctx)
-				token := s.scanComment(ctx)
-				ctx.addToken(token)
-				return nil
+			if s.scanComment(ctx) {
+				continue
 			}
 		case '\'', '"':
-			if !ctx.existsBuffer() {
-				token := s.scanQuote(ctx, c)
-				ctx.addToken(token)
-				// If the non-whitespace character immediately following the quote is ':', the quote should be treated as a map key.
-				// Therefore, do not return and continue processing as a normal map key.
-				if ctx.currentCharWithSkipWhitespace() == ':' {
-					continue
-				}
-				return nil
+			if s.scanQuote(ctx, c) {
+				continue
 			}
 		case '\r', '\n':
 			s.scanNewLine(ctx, c)
 			continue
 		case ' ':
-			if ctx.isDocument() || (!s.isAnchor && !s.isFirstCharAtLine) {
-				ctx.addBuf(c)
-				ctx.addOriginBuf(c)
-				s.progressColumn(ctx, 1)
-				continue
-			}
-			if s.isFirstCharAtLine {
-				s.progressColumn(ctx, 1)
-				ctx.addOriginBuf(c)
+			if s.scanWhiteSpace(ctx) {
 				continue
 			}
-			s.addBufferedTokenIfExists(ctx)
-			s.isAnchor = false
-			// rescan white space at next scanning for adding white space to next buffer.
-			return nil
 		}
 		ctx.addBuf(c)
 		ctx.addOriginBuf(c)