From 6c55f214642d8c33acd1465fe8c7586e2b342a8e Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 30 Oct 2024 01:16:11 +0900 Subject: [PATCH 1/3] reuse context --- scanner/context.go | 8 ++++ scanner/scanner.go | 97 +++++++++++++++++++++++++++------------------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 1522e5ba..44e15be5 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -49,6 +49,14 @@ func (c *Context) release() { ctxPool.Put(c) } +func (c *Context) clear() { + c.resetBuffer() + c.isRawFolded = false + c.isLiteral = false + c.isFolded = false + c.literalOpt = "" +} + func (c *Context) reset(src []rune) { c.idx = 0 c.size = len(src) diff --git a/scanner/scanner.go b/scanner/scanner.go index aa9bcc7d..80f59d26 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -437,31 +437,38 @@ func (s *Scanner) isMergeKey(ctx *Context) bool { return false } -func (s *Scanner) scanTag(ctx *Context) *token.Token { +func (s *Scanner) scanTag(ctx *Context) bool { + if ctx.existsBuffer() { + return false + } + ctx.addOriginBuf('!') s.progress(ctx, 1) // skip '!' character - var ( - tk *token.Token - progress int - ) + var progress int for idx, c := range ctx.src[ctx.idx:] { progress = idx + 1 ctx.addOriginBuf(c) switch c { case ' ', '\n', '\r': value := ctx.source(ctx.idx-1, ctx.idx+idx) - tk = token.Tag(value, string(ctx.obuf), s.pos()) + ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) progress = len([]rune(value)) goto END } } END: s.progressColumn(ctx, progress) - return tk + ctx.clear() + return true } -func (s *Scanner) scanComment(ctx *Context) *token.Token { +func (s *Scanner) scanComment(ctx *Context) bool { + if ctx.existsBuffer() && ctx.previousChar() != ' ' { + return false + } + + s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf('#') s.progress(ctx, 1) // skip '#' character @@ -474,19 +481,21 @@ func (s *Scanner) scanComment(ctx *Context) *token.Token { } value := ctx.source(ctx.idx, ctx.idx+idx) progress := len([]rune(value)) - tk := token.Comment(value, string(ctx.obuf), s.pos()) + ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, progress) s.progressLine(ctx) - return tk + ctx.clear() + return true } } // document ends with comment. value := string(ctx.src[ctx.idx:]) - tk := token.Comment(value, string(ctx.obuf), s.pos()) + ctx.addToken(token.Comment(value, string(ctx.obuf), s.pos())) progress := len([]rune(value)) s.progressColumn(ctx, progress) s.progressLine(ctx) - return tk + ctx.clear() + return true } func (s *Scanner) trimCommentFromLiteralOpt(text string, header rune) (string, error) { @@ -583,6 +592,7 @@ func (s *Scanner) scanFlowMapStart(ctx *Context) bool { ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos())) s.startedFlowMapNum++ s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -596,6 +606,7 @@ func (s *Scanner) scanFlowMapEnd(ctx *Context) bool { ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos())) s.startedFlowMapNum-- s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -608,6 +619,7 @@ func (s *Scanner) scanFlowArrayStart(ctx *Context) bool { ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos())) s.startedFlowSequenceNum++ s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -621,6 +633,7 @@ func (s *Scanner) scanFlowArrayEnd(ctx *Context) bool { ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos())) s.startedFlowSequenceNum-- s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -633,6 +646,7 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool { ctx.addOriginBuf(c) ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos())) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -656,6 +670,7 @@ func (s *Scanner) scanMapDelim(ctx *Context) bool { } ctx.addToken(token.MappingValue(s.pos())) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -673,6 +688,7 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool { s.addBufferedTokenIfExists(ctx) ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos())) s.progressColumn(ctx, 3) + ctx.clear() return true } @@ -689,6 +705,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool { ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos())) s.progressColumn(ctx, 3) + ctx.clear() return true } @@ -700,6 +717,7 @@ func (s *Scanner) scanMergeKey(ctx *Context) bool { s.lastDelimColumn = s.column ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos())) s.progressColumn(ctx, 2) + ctx.clear() return true } @@ -734,6 +752,7 @@ func (s *Scanner) scanSequence(ctx *Context) bool { s.lastDelimColumn = tk.Position.Column ctx.addToken(tk) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -830,6 +849,7 @@ func (s *Scanner) scanMapKey(ctx *Context) bool { ctx.addToken(token.MappingKey(s.pos())) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -843,6 +863,7 @@ func (s *Scanner) scanDirective(ctx *Context) bool { ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos())) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -856,6 +877,7 @@ func (s *Scanner) scanAnchor(ctx *Context) bool { ctx.addToken(token.Anchor(string(ctx.obuf), s.pos())) s.progressColumn(ctx, 1) s.isAnchor = true + ctx.clear() return true } @@ -868,6 +890,7 @@ func (s *Scanner) scanAlias(ctx *Context) bool { ctx.addOriginBuf('*') ctx.addToken(token.Alias(string(ctx.obuf), s.pos())) s.progressColumn(ctx, 1) + ctx.clear() return true } @@ -895,29 +918,29 @@ func (s *Scanner) scan(ctx *Context) error { switch c { case '{': if s.scanFlowMapStart(ctx) { - return nil + continue } case '}': if s.scanFlowMapEnd(ctx) { - return nil + continue } case '.': if s.scanDocumentEnd(ctx) { - return nil + continue } case '<': if s.scanMergeKey(ctx) { - return nil + continue } case '-': if s.scanDocumentStart(ctx) { - return nil + continue } if s.scanRawFoldedChar(ctx) { continue } if s.scanSequence(ctx) { - return nil + continue } if ctx.existsBuffer() { // '-' is literal @@ -928,19 +951,19 @@ func (s *Scanner) scan(ctx *Context) error { } case '[': if s.scanFlowArrayStart(ctx) { - return nil + continue } case ']': if s.scanFlowArrayEnd(ctx) { - return nil + continue } case ',': if s.scanFlowEntry(ctx, c) { - return nil + continue } case ':': if s.scanMapDelim(ctx) { - return nil + continue } case '|', '>': scanned, err := s.scanLiteralHeader(ctx) @@ -951,44 +974,39 @@ func (s *Scanner) scan(ctx *Context) error { continue } case '!': - if !ctx.existsBuffer() { - token := s.scanTag(ctx) - ctx.addToken(token) - return nil + if s.scanTag(ctx) { + continue } case '%': if s.scanDirective(ctx) { - return nil + continue } case '?': if s.scanMapKey(ctx) { - return nil + continue } case '&': if s.scanAnchor(ctx) { - return nil + continue } case '*': if s.scanAlias(ctx) { - return nil + continue } case '#': - if !ctx.existsBuffer() || ctx.previousChar() == ' ' { - s.addBufferedTokenIfExists(ctx) - token := s.scanComment(ctx) - ctx.addToken(token) - return nil + if s.scanComment(ctx) { + continue } case '\'', '"': if !ctx.existsBuffer() { - token := s.scanQuote(ctx, c) - ctx.addToken(token) + ctx.addToken(s.scanQuote(ctx, c)) // If the non-whitespace character immediately following the quote is ':', the quote should be treated as a map key. // Therefore, do not return and continue processing as a normal map key. if ctx.currentCharWithSkipWhitespace() == ':' { continue } - return nil + ctx.clear() + continue } case '\r', '\n': s.scanNewLine(ctx, c) @@ -1008,7 +1026,8 @@ func (s *Scanner) scan(ctx *Context) error { s.addBufferedTokenIfExists(ctx) s.isAnchor = false // rescan white space at next scanning for adding white space to next buffer. - return nil + ctx.clear() + continue } ctx.addBuf(c) ctx.addOriginBuf(c) From 895ae4bd389cd66a06f0087c3a54c120dd385068 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 30 Oct 2024 02:04:25 +0900 Subject: [PATCH 2/3] refactor scan function --- scanner/scanner.go | 62 ++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 80f59d26..13295767 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -406,11 +406,36 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token { return tk } -func (s *Scanner) scanQuote(ctx *Context, ch rune) *token.Token { +func (s *Scanner) scanQuote(ctx *Context, ch rune) bool { + if ctx.existsBuffer() { + return false + } if ch == '\'' { - return s.scanSingleQuote(ctx) + ctx.addToken(s.scanSingleQuote(ctx)) + } else { + ctx.addToken(s.scanDoubleQuote(ctx)) } - return s.scanDoubleQuote(ctx) + ctx.clear() + return true +} + +func (s *Scanner) scanWhiteSpace(ctx *Context) bool { + if ctx.isDocument() { + return false + } + if !s.isAnchor && !s.isFirstCharAtLine { + return false + } + + if s.isFirstCharAtLine { + s.progressColumn(ctx, 1) + ctx.addOriginBuf(' ') + return true + } + + s.addBufferedTokenIfExists(ctx) + s.isAnchor = false + return true } func (s *Scanner) isMergeKey(ctx *Context) bool { @@ -942,13 +967,6 @@ func (s *Scanner) scan(ctx *Context) error { if s.scanSequence(ctx) { continue } - if ctx.existsBuffer() { - // '-' is literal - ctx.addBuf(c) - ctx.addOriginBuf(c) - s.progressColumn(ctx, 1) - continue - } case '[': if s.scanFlowArrayStart(ctx) { continue @@ -998,36 +1016,16 @@ func (s *Scanner) scan(ctx *Context) error { continue } case '\'', '"': - if !ctx.existsBuffer() { - ctx.addToken(s.scanQuote(ctx, c)) - // If the non-whitespace character immediately following the quote is ':', the quote should be treated as a map key. - // Therefore, do not return and continue processing as a normal map key. - if ctx.currentCharWithSkipWhitespace() == ':' { - continue - } - ctx.clear() + if s.scanQuote(ctx, c) { continue } case '\r', '\n': s.scanNewLine(ctx, c) continue case ' ': - if ctx.isDocument() || (!s.isAnchor && !s.isFirstCharAtLine) { - ctx.addBuf(c) - ctx.addOriginBuf(c) - s.progressColumn(ctx, 1) - continue - } - if s.isFirstCharAtLine { - s.progressColumn(ctx, 1) - ctx.addOriginBuf(c) + if s.scanWhiteSpace(ctx) { continue } - s.addBufferedTokenIfExists(ctx) - s.isAnchor = false - // rescan white space at next scanning for adding white space to next buffer. - ctx.clear() - continue } ctx.addBuf(c) ctx.addOriginBuf(c) From e47e0bca50f5803c3672d2186a79a1e1c2abb96f Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 30 Oct 2024 02:13:37 +0900 Subject: [PATCH 3/3] remove unused function --- scanner/context.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 44e15be5..54dc0b79 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -6,8 +6,6 @@ import ( "github.com/goccy/go-yaml/token" ) -const whitespace = ' ' - // Context context at scanning type Context struct { idx int @@ -152,18 +150,6 @@ func (c *Context) currentChar() rune { return rune(0) } -func (c *Context) currentCharWithSkipWhitespace() rune { - idx := c.idx - for c.size > idx { - ch := c.src[idx] - if ch != whitespace { - return ch - } - idx++ - } - return rune(0) -} - func (c *Context) nextChar() rune { if c.size > c.idx+1 { return c.src[c.idx+1]