From 0051e82e81a02bfd3bc65d5cc2e01b7591188ad2 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 28 Dec 2019 22:20:45 +0900 Subject: [PATCH 1/9] Improve performance - reuse context instance - remove strings.TrimLeft and strings.TrimRight --- scanner/context.go | 84 ++++++++++++++++++++++++++++++++-------------- scanner/scanner.go | 5 ++- 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 0f500fa3..4368c12f 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -1,41 +1,68 @@ package scanner import ( - "strings" + "sync" "github.com/goccy/go-yaml/token" ) // Context context at scanning type Context struct { - idx int - size int - src []rune - buf []rune - obuf []rune - tokens token.Tokens - isRawFolded bool - isLiteral bool - isFolded bool - isSingleLine bool - literalOpt string -} + idx int + size int + notSpaceCharPos int + notSpaceOrgCharPos int + src []rune + buf []rune + obuf []rune + tokens token.Tokens + isRawFolded bool + isLiteral bool + isFolded bool + isSingleLine bool + literalOpt string +} + +var ( + ctxPool = sync.Pool{ + New: func() interface{} { + return createContext() + }, + } +) -func newContext(src []rune) *Context { +func createContext() *Context { return &Context{ idx: 0, - size: len(src), - src: src, tokens: token.Tokens{}, - buf: make([]rune, 0, len(src)), - obuf: make([]rune, 0, len(src)), isSingleLine: true, } } +func newContext(src []rune) *Context { + ctx := ctxPool.Get().(*Context) + ctx.reset(src) + return ctx +} + +func (c *Context) release() { + ctxPool.Put(c) +} + +func (c *Context) reset(src []rune) { + c.idx = 0 + c.size = len(src) + c.src = src + c.tokens = c.tokens[:0] + c.resetBuffer() + c.isSingleLine = true +} + func (c *Context) resetBuffer() { c.buf = c.buf[:0] c.obuf = c.obuf[:0] + c.notSpaceCharPos = 0 + c.notSpaceOrgCharPos = 0 } func (c *Context) isSaveIndentMode() bool { @@ -57,16 +84,25 @@ func (c *Context) addToken(tk *token.Token) { } func (c *Context) addBuf(r rune) { + if len(c.buf) == 0 && r == ' ' { + return + } c.buf = append(c.buf, r) + if r != ' ' { + c.notSpaceCharPos = len(c.buf) + } } func (c *Context) addOriginBuf(r rune) { c.obuf = append(c.obuf, r) + if r != ' ' { + c.notSpaceOrgCharPos = len(c.obuf) + } } func (c *Context) removeRightSpaceFromBuf() int { - trimmedBuf := strings.TrimRight(string(c.obuf), " ") - buflen := len([]rune(trimmedBuf)) + trimmedBuf := c.obuf[:c.notSpaceOrgCharPos] + buflen := len(trimmedBuf) diff := len(c.obuf) - buflen if diff > 0 { c.obuf = c.obuf[:buflen] @@ -134,13 +170,12 @@ func (c *Context) nextPos() int { } func (c *Context) bufferedSrc() string { - src := strings.TrimLeft(string(c.buf), " ") - src = strings.TrimRight(src, " ") + src := c.buf[:c.notSpaceCharPos] if len(src) > 0 && src[len(src)-1] == '\n' && c.isDocument() && c.literalOpt == "-" { // remove end '\n' character src = src[:len(src)-1] } - return src + return string(src) } func (c *Context) bufferedToken(pos *token.Position) *token.Token { @@ -152,7 +187,6 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token { return nil } tk := token.New(source, string(c.obuf), pos) - c.buf = c.buf[:0] - c.obuf = c.obuf[:0] + c.resetBuffer() return tk } diff --git a/scanner/scanner.go b/scanner/scanner.go index b8b6a276..633bcd41 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -334,7 +334,9 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) { if removedNum > 0 { s.column -= removedNum s.offset -= removedNum - s.savedPos.Column -= removedNum + if s.savedPos != nil { + s.savedPos.Column -= removedNum + } } if ctx.isEOS() { @@ -607,5 +609,6 @@ func (s *Scanner) Scan() (token.Tokens, error) { ctx := newContext(s.source[s.sourcePos:]) progress := s.scan(ctx) s.sourcePos += progress + ctx.release() return ctx.tokens, nil } From 5a1231fc47f8c28960c993b4454fc52886142af5 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 28 Dec 2019 22:37:50 +0900 Subject: [PATCH 2/9] Improve performance ( fix bufferedSrc interface ) --- scanner/context.go | 12 ++++++++---- scanner/scanner.go | 34 +++++++++++++++++----------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 4368c12f..a6f4ec1c 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -106,7 +106,7 @@ func (c *Context) removeRightSpaceFromBuf() int { diff := len(c.obuf) - buflen if diff > 0 { c.obuf = c.obuf[:buflen] - c.buf = []rune(c.bufferedSrc()) + c.buf = c.bufferedSrc() } return diff } @@ -169,13 +169,17 @@ func (c *Context) nextPos() int { return c.idx + 1 } -func (c *Context) bufferedSrc() string { +func (c *Context) existsBuffer() bool { + return len(c.bufferedSrc()) != 0 +} + +func (c *Context) bufferedSrc() []rune { src := c.buf[:c.notSpaceCharPos] if len(src) > 0 && src[len(src)-1] == '\n' && c.isDocument() && c.literalOpt == "-" { // remove end '\n' character src = src[:len(src)-1] } - return string(src) + return src } func (c *Context) bufferedToken(pos *token.Position) *token.Token { @@ -186,7 +190,7 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token { if len(source) == 0 { return nil } - tk := token.New(source, string(c.obuf), pos) + tk := token.New(string(source), string(c.obuf), pos) c.resetBuffer() return tk } diff --git a/scanner/scanner.go b/scanner/scanner.go index 633bcd41..1a206872 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -95,7 +95,7 @@ func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool { if ctx.isDocument() { return true } - if c == '-' && ctx.bufferedSrc() != "" { + if c == '-' && ctx.existsBuffer() { return true } return false @@ -262,7 +262,7 @@ func (s *Scanner) scanLiteral(ctx *Context, c rune) { if ctx.isEOS() { ctx.addBuf(c) value := ctx.bufferedSrc() - ctx.addToken(token.New(value, string(ctx.obuf), s.pos())) + ctx.addToken(token.New(string(value), string(ctx.obuf), s.pos())) ctx.resetBuffer() s.progressColumn(ctx, 1) } else if s.isNewLineChar(c) { @@ -322,7 +322,7 @@ func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) { func (s *Scanner) scanNewLine(ctx *Context, c rune) { if len(ctx.buf) > 0 && s.savedPos == nil { s.savedPos = s.pos() - s.savedPos.Column -= len([]rune(ctx.bufferedSrc())) + s.savedPos.Column -= len(ctx.bufferedSrc()) } // if the following case, origin buffer has unnecessary two spaces. @@ -375,7 +375,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } switch c { case '{': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { ctx.addOriginBuf(c) ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos())) s.startedFlowMapNum++ @@ -383,7 +383,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '}': - if ctx.bufferedSrc() == "" || s.startedFlowMapNum > 0 { + if !ctx.existsBuffer() || s.startedFlowMapNum > 0 { ctx.addToken(s.bufferedToken(ctx)) ctx.addOriginBuf(c) ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos())) @@ -414,7 +414,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { pos += 2 return } - if ctx.bufferedSrc() != "" && s.isChangedToIndentStateUp() { + if ctx.existsBuffer() && s.isChangedToIndentStateUp() { // raw folded ctx.isRawFolded = true ctx.addBuf(c) @@ -422,7 +422,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { s.progressColumn(ctx, 1) continue } - if ctx.bufferedSrc() != "" && ctx.isSingleLine { + if ctx.existsBuffer() && ctx.isSingleLine { // '-' is literal ctx.addBuf(c) ctx.addOriginBuf(c) @@ -440,7 +440,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '[': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { ctx.addOriginBuf(c) ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos())) s.startedFlowSequenceNum++ @@ -448,7 +448,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case ']': - if ctx.bufferedSrc() == "" || s.startedFlowSequenceNum > 0 { + if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos())) @@ -478,7 +478,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '|', '>': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { progress, err := s.scanLiteralHeader(ctx) if err != nil { // TODO: returns syntax error object @@ -489,7 +489,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { continue } case '!': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { token, progress := s.scanTag(ctx) ctx.addToken(token) s.progressColumn(ctx, progress) @@ -500,20 +500,20 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '%': - if ctx.bufferedSrc() == "" && s.indentNum == 0 { + if !ctx.existsBuffer() && s.indentNum == 0 { ctx.addToken(token.Directive(s.pos())) s.progressColumn(ctx, 1) return } case '?': nc := ctx.nextChar() - if ctx.bufferedSrc() == "" && nc == ' ' { + if !ctx.existsBuffer() && nc == ' ' { ctx.addToken(token.Directive(s.pos())) s.progressColumn(ctx, 1) return } case '&': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.Anchor(string(ctx.obuf), s.pos())) @@ -522,7 +522,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '*': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.Alias(string(ctx.obuf), s.pos())) @@ -530,7 +530,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '#': - if ctx.bufferedSrc() == "" || ctx.previousChar() == ' ' { + if !ctx.existsBuffer() || ctx.previousChar() == ' ' { s.addBufferedTokenIfExists(ctx) token, progress := s.scanComment(ctx) ctx.addToken(token) @@ -540,7 +540,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '\'', '"': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { token, progress := s.scanQuote(ctx, c) ctx.addToken(token) s.progressColumn(ctx, progress) From cc226505d0fc478e5048859fe47e894137333c11 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 28 Dec 2019 23:03:53 +0900 Subject: [PATCH 3/9] Improve performance ( fix Tokenize interface ) --- cmd/ycat/ycat.go | 2 +- lexer/lexer.go | 2 +- lexer/lexer_test.go | 2 +- parser/parser.go | 2 +- parser/parser_test.go | 4 ++-- printer/printer_test.go | 4 ++-- scanner/scanner.go | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/ycat/ycat.go b/cmd/ycat/ycat.go index c70cb3bc..bd6bbf8d 100644 --- a/cmd/ycat/ycat.go +++ b/cmd/ycat/ycat.go @@ -28,7 +28,7 @@ func _main(args []string) error { if err != nil { return err } - tokens := lexer.Tokenize(string(bytes)) + tokens := lexer.Tokenize(bytes) var p printer.Printer p.LineNumber = true p.LineNumberFormat = func(num int) string { diff --git a/lexer/lexer.go b/lexer/lexer.go index 3207f4f2..f93faade 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -8,7 +8,7 @@ import ( ) // Tokenize split to token instances from string -func Tokenize(src string) token.Tokens { +func Tokenize(src []byte) token.Tokens { var s scanner.Scanner s.Init(src) var tokens token.Tokens diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index b54cdcc9..f8f276cc 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -56,6 +56,6 @@ func TestTokenize(t *testing.T) { "a: bogus\n", } for _, src := range sources { - lexer.Tokenize(src).Dump() + lexer.Tokenize([]byte(src)).Dump() } } diff --git a/parser/parser.go b/parser/parser.go index 75734cc7..4c2bece1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -393,7 +393,7 @@ const ( // ParseBytes parse from byte slice, and returns ast.File func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) { - tokens := lexer.Tokenize(string(bytes)) + tokens := lexer.Tokenize(bytes) f, err := Parse(tokens, mode) if err != nil { return nil, errors.Wrapf(err, "failed to parse") diff --git a/parser/parser_test.go b/parser/parser_test.go index 73a89ffa..cd2ab88d 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -69,7 +69,7 @@ func TestParser(t *testing.T) { } for _, src := range sources { fmt.Printf(src) - tokens := lexer.Tokenize(src) + tokens := lexer.Tokenize([]byte(src)) var printer printer.Printer fmt.Println(printer.PrintTokens(tokens)) ast, err := parser.Parse(tokens, 0) @@ -474,7 +474,7 @@ c: d }, } for _, test := range tests { - tokens := lexer.Tokenize(test.source) + tokens := lexer.Tokenize([]byte(test.source)) tokens.Dump() f, err := parser.Parse(tokens, 0) if err != nil { diff --git a/printer/printer_test.go b/printer/printer_test.go index dc8d4bdb..8eb7f161 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -8,7 +8,7 @@ import ( ) func Test_Printer(t *testing.T) { - yml := `--- + yml := []byte(`--- text: aaaa text2: aaaa bbbb @@ -24,7 +24,7 @@ bool: true number: 10 anchor: &x 1 alias: *x -` +`) t.Run("print starting from tokens[3]", func(t *testing.T) { tokens := lexer.Tokenize(yml) var p printer.Printer diff --git a/scanner/scanner.go b/scanner/scanner.go index 1a206872..7d42eb7e 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -585,8 +585,8 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } // Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src. -func (s *Scanner) Init(text string) { - src := []rune(text) +func (s *Scanner) Init(text []byte) { + src := []rune(string(text)) s.source = src s.sourcePos = 0 s.sourceSize = len(src) From 342b3de34d66724d7a9f9e32a873dc051690a561 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 00:06:46 +0900 Subject: [PATCH 4/9] Improve performance ( reduce allocation for map value at decoding ) --- decode.go | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/decode.go b/decode.go index 9bd0dddf..c6c5abae 100644 --- a/decode.go +++ b/decode.go @@ -82,6 +82,22 @@ func (d *Decoder) castToFloat(v interface{}) interface{} { return 0 } +func (d *Decoder) setToMapValue(node ast.Node, m map[string]interface{}) { + switch n := node.(type) { + case *ast.MappingValueNode: + if n.Key.Type() == ast.MergeKeyType { + d.setToMapValue(n.Value, m) + } else { + key := n.Key.GetToken().Value + m[key] = d.nodeToValue(n.Value) + } + case *ast.MappingNode: + for _, value := range n.Values { + d.setToMapValue(value, m) + } + } +} + func (d *Decoder) nodeToValue(node ast.Node) interface{} { switch n := node.(type) { case *ast.NullNode: @@ -122,28 +138,23 @@ func (d *Decoder) nodeToValue(node ast.Node) interface{} { case *ast.LiteralNode: return n.Value.GetValue() case *ast.MappingValueNode: - m := map[string]interface{}{} if n.Key.Type() == ast.MergeKeyType { - mapValue := d.nodeToValue(n.Value).(map[string]interface{}) - for k, v := range mapValue { - m[k] = v - } - } else { - key := n.Key.GetToken().Value - m[key] = d.nodeToValue(n.Value) + m := map[string]interface{}{} + d.setToMapValue(n.Value, m) + return m + } + key := n.Key.GetToken().Value + return map[string]interface{}{ + key: d.nodeToValue(n.Value), } - return m case *ast.MappingNode: - m := map[string]interface{}{} + m := make(map[string]interface{}, len(n.Values)) for _, value := range n.Values { - subMap := d.nodeToValue(value).(map[string]interface{}) - for k, v := range subMap { - m[k] = v - } + d.setToMapValue(value, m) } return m case *ast.SequenceNode: - v := []interface{}{} + v := make([]interface{}, 0, len(n.Values)) for _, value := range n.Values { v = append(v, d.nodeToValue(value)) } From 3a9093bd0148c2c13484789e470df8a30f9aea68 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 00:16:35 +0900 Subject: [PATCH 5/9] Improve performance ( remove strings.TrimLeft from scanner ) --- scanner/scanner.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 7d42eb7e..f9b359a4 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -61,8 +61,7 @@ func (s *Scanner) bufferedToken(ctx *Context) *token.Token { s.savedPos = nil return tk } - trimmedSrc := strings.TrimLeft(string(ctx.buf), " ") - size := len([]rune(trimmedSrc)) + size := len(ctx.buf) return ctx.bufferedToken(&token.Position{ Line: s.line, Column: s.column - size, From a2fad62bcb923e8ab4af4a5aa2cc9781e10c9664 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 02:04:39 +0900 Subject: [PATCH 6/9] Improve performance ( use io.Copy to read from io.Reader ) --- decode.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/decode.go b/decode.go index c6c5abae..e1c1f6d8 100644 --- a/decode.go +++ b/decode.go @@ -1,6 +1,7 @@ package yaml import ( + "bytes" "encoding/base64" "fmt" "io" @@ -957,11 +958,11 @@ func (d *Decoder) Decode(v interface{}) error { if rv.Type().Kind() != reflect.Ptr { return errors.ErrDecodeRequiredPointerType } - bytes, err := ioutil.ReadAll(d.reader) - if err != nil { - return errors.Wrapf(err, "failed to read buffer") + var buf bytes.Buffer + if _, err := io.Copy(&buf, d.reader); err != nil { + return errors.Wrapf(err, "failed to copy from reader") } - node, err := d.decode(bytes) + node, err := d.decode(buf.Bytes()) if err != nil { return errors.Wrapf(err, "failed to decode") } From 99d949e0cbaacabd6229687875800b724cf0b13c Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 11:37:20 +0900 Subject: [PATCH 7/9] Revert "Improve performance ( fix Tokenize interface )" This reverts commit cc226505d0fc478e5048859fe47e894137333c11. --- cmd/ycat/ycat.go | 2 +- lexer/lexer.go | 2 +- lexer/lexer_test.go | 2 +- parser/parser.go | 2 +- parser/parser_test.go | 4 ++-- printer/printer_test.go | 4 ++-- scanner/scanner.go | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/ycat/ycat.go b/cmd/ycat/ycat.go index bd6bbf8d..c70cb3bc 100644 --- a/cmd/ycat/ycat.go +++ b/cmd/ycat/ycat.go @@ -28,7 +28,7 @@ func _main(args []string) error { if err != nil { return err } - tokens := lexer.Tokenize(bytes) + tokens := lexer.Tokenize(string(bytes)) var p printer.Printer p.LineNumber = true p.LineNumberFormat = func(num int) string { diff --git a/lexer/lexer.go b/lexer/lexer.go index f93faade..3207f4f2 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -8,7 +8,7 @@ import ( ) // Tokenize split to token instances from string -func Tokenize(src []byte) token.Tokens { +func Tokenize(src string) token.Tokens { var s scanner.Scanner s.Init(src) var tokens token.Tokens diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index f8f276cc..b54cdcc9 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -56,6 +56,6 @@ func TestTokenize(t *testing.T) { "a: bogus\n", } for _, src := range sources { - lexer.Tokenize([]byte(src)).Dump() + lexer.Tokenize(src).Dump() } } diff --git a/parser/parser.go b/parser/parser.go index 4c2bece1..75734cc7 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -393,7 +393,7 @@ const ( // ParseBytes parse from byte slice, and returns ast.File func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) { - tokens := lexer.Tokenize(bytes) + tokens := lexer.Tokenize(string(bytes)) f, err := Parse(tokens, mode) if err != nil { return nil, errors.Wrapf(err, "failed to parse") diff --git a/parser/parser_test.go b/parser/parser_test.go index cd2ab88d..73a89ffa 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -69,7 +69,7 @@ func TestParser(t *testing.T) { } for _, src := range sources { fmt.Printf(src) - tokens := lexer.Tokenize([]byte(src)) + tokens := lexer.Tokenize(src) var printer printer.Printer fmt.Println(printer.PrintTokens(tokens)) ast, err := parser.Parse(tokens, 0) @@ -474,7 +474,7 @@ c: d }, } for _, test := range tests { - tokens := lexer.Tokenize([]byte(test.source)) + tokens := lexer.Tokenize(test.source) tokens.Dump() f, err := parser.Parse(tokens, 0) if err != nil { diff --git a/printer/printer_test.go b/printer/printer_test.go index 8eb7f161..dc8d4bdb 100644 --- a/printer/printer_test.go +++ b/printer/printer_test.go @@ -8,7 +8,7 @@ import ( ) func Test_Printer(t *testing.T) { - yml := []byte(`--- + yml := `--- text: aaaa text2: aaaa bbbb @@ -24,7 +24,7 @@ bool: true number: 10 anchor: &x 1 alias: *x -`) +` t.Run("print starting from tokens[3]", func(t *testing.T) { tokens := lexer.Tokenize(yml) var p printer.Printer diff --git a/scanner/scanner.go b/scanner/scanner.go index f9b359a4..e11b3947 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -584,8 +584,8 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } // Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src. -func (s *Scanner) Init(text []byte) { - src := []rune(string(text)) +func (s *Scanner) Init(text string) { + src := []rune(text) s.source = src s.sourcePos = 0 s.sourceSize = len(src) From c0dc746a3ebec0712023d1603e597ef8d5940d80 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 11:47:34 +0900 Subject: [PATCH 8/9] Fix release sequence for scanner.Context --- scanner/scanner.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index e11b3947..26228473 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -606,8 +606,10 @@ func (s *Scanner) Scan() (token.Tokens, error) { return nil, io.EOF } ctx := newContext(s.source[s.sourcePos:]) + defer ctx.release() progress := s.scan(ctx) s.sourcePos += progress - ctx.release() - return ctx.tokens, nil + var tokens token.Tokens + tokens = append(tokens, ctx.tokens...) + return tokens, nil } From af4f3ec100c6c74b4cdeb59bab483545d0d7e983 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 29 Dec 2019 12:06:51 +0900 Subject: [PATCH 9/9] Add benchmark --- benchmark_test.go | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 benchmark_test.go diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 00000000..7abd697b --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,52 @@ +package yaml_test + +import ( + "testing" + + "github.com/goccy/go-yaml" + goyaml2 "gopkg.in/yaml.v2" + goyaml3 "gopkg.in/yaml.v3" +) + +func Benchmark(b *testing.B) { + const src = `--- +id: 1 +message: Hello, World +verified: true +elements: + - one + - 0.02 + - null + - -inf +` + type T struct { + ID int `yaml:"id"` + Message string `yaml:"message"` + Verified bool `yaml:"verified,omitempty"` + } + + b.Run("gopkg.in/yaml.v2", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := goyaml2.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) + b.Run("gopkg.in/yaml.v3", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := goyaml3.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) + b.Run("github.com/goccy/go-yaml", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := yaml.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) +}