diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 00000000..7abd697b --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,52 @@ +package yaml_test + +import ( + "testing" + + "github.com/goccy/go-yaml" + goyaml2 "gopkg.in/yaml.v2" + goyaml3 "gopkg.in/yaml.v3" +) + +func Benchmark(b *testing.B) { + const src = `--- +id: 1 +message: Hello, World +verified: true +elements: + - one + - 0.02 + - null + - -inf +` + type T struct { + ID int `yaml:"id"` + Message string `yaml:"message"` + Verified bool `yaml:"verified,omitempty"` + } + + b.Run("gopkg.in/yaml.v2", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := goyaml2.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) + b.Run("gopkg.in/yaml.v3", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := goyaml3.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) + b.Run("github.com/goccy/go-yaml", func(b *testing.B) { + var t T + for i := 0; i < b.N; i++ { + if err := yaml.Unmarshal([]byte(src), &t); err != nil { + b.Fatal(err) + } + } + }) +} diff --git a/decode.go b/decode.go index 9bd0dddf..e1c1f6d8 100644 --- a/decode.go +++ b/decode.go @@ -1,6 +1,7 @@ package yaml import ( + "bytes" "encoding/base64" "fmt" "io" @@ -82,6 +83,22 @@ func (d *Decoder) castToFloat(v interface{}) interface{} { return 0 } +func (d *Decoder) setToMapValue(node ast.Node, m map[string]interface{}) { + switch n := node.(type) { + case *ast.MappingValueNode: + if n.Key.Type() == ast.MergeKeyType { + d.setToMapValue(n.Value, m) + } else { + key := n.Key.GetToken().Value + m[key] = d.nodeToValue(n.Value) + } + case *ast.MappingNode: + for _, value := range n.Values { + d.setToMapValue(value, m) + } + } +} + func (d *Decoder) nodeToValue(node ast.Node) interface{} { switch n := node.(type) { case *ast.NullNode: @@ -122,28 +139,23 @@ func (d *Decoder) nodeToValue(node ast.Node) interface{} { case *ast.LiteralNode: return n.Value.GetValue() case *ast.MappingValueNode: - m := map[string]interface{}{} if n.Key.Type() == ast.MergeKeyType { - mapValue := d.nodeToValue(n.Value).(map[string]interface{}) - for k, v := range mapValue { - m[k] = v - } - } else { - key := n.Key.GetToken().Value - m[key] = d.nodeToValue(n.Value) + m := map[string]interface{}{} + d.setToMapValue(n.Value, m) + return m + } + key := n.Key.GetToken().Value + return map[string]interface{}{ + key: d.nodeToValue(n.Value), } - return m case *ast.MappingNode: - m := map[string]interface{}{} + m := make(map[string]interface{}, len(n.Values)) for _, value := range n.Values { - subMap := d.nodeToValue(value).(map[string]interface{}) - for k, v := range subMap { - m[k] = v - } + d.setToMapValue(value, m) } return m case *ast.SequenceNode: - v := []interface{}{} + v := make([]interface{}, 0, len(n.Values)) for _, value := range n.Values { v = append(v, d.nodeToValue(value)) } @@ -946,11 +958,11 @@ func (d *Decoder) Decode(v interface{}) error { if rv.Type().Kind() != reflect.Ptr { return errors.ErrDecodeRequiredPointerType } - bytes, err := ioutil.ReadAll(d.reader) - if err != nil { - return errors.Wrapf(err, "failed to read buffer") + var buf bytes.Buffer + if _, err := io.Copy(&buf, d.reader); err != nil { + return errors.Wrapf(err, "failed to copy from reader") } - node, err := d.decode(bytes) + node, err := d.decode(buf.Bytes()) if err != nil { return errors.Wrapf(err, "failed to decode") } diff --git a/scanner/context.go b/scanner/context.go index 0f500fa3..a6f4ec1c 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -1,41 +1,68 @@ package scanner import ( - "strings" + "sync" "github.com/goccy/go-yaml/token" ) // Context context at scanning type Context struct { - idx int - size int - src []rune - buf []rune - obuf []rune - tokens token.Tokens - isRawFolded bool - isLiteral bool - isFolded bool - isSingleLine bool - literalOpt string -} + idx int + size int + notSpaceCharPos int + notSpaceOrgCharPos int + src []rune + buf []rune + obuf []rune + tokens token.Tokens + isRawFolded bool + isLiteral bool + isFolded bool + isSingleLine bool + literalOpt string +} + +var ( + ctxPool = sync.Pool{ + New: func() interface{} { + return createContext() + }, + } +) -func newContext(src []rune) *Context { +func createContext() *Context { return &Context{ idx: 0, - size: len(src), - src: src, tokens: token.Tokens{}, - buf: make([]rune, 0, len(src)), - obuf: make([]rune, 0, len(src)), isSingleLine: true, } } +func newContext(src []rune) *Context { + ctx := ctxPool.Get().(*Context) + ctx.reset(src) + return ctx +} + +func (c *Context) release() { + ctxPool.Put(c) +} + +func (c *Context) reset(src []rune) { + c.idx = 0 + c.size = len(src) + c.src = src + c.tokens = c.tokens[:0] + c.resetBuffer() + c.isSingleLine = true +} + func (c *Context) resetBuffer() { c.buf = c.buf[:0] c.obuf = c.obuf[:0] + c.notSpaceCharPos = 0 + c.notSpaceOrgCharPos = 0 } func (c *Context) isSaveIndentMode() bool { @@ -57,20 +84,29 @@ func (c *Context) addToken(tk *token.Token) { } func (c *Context) addBuf(r rune) { + if len(c.buf) == 0 && r == ' ' { + return + } c.buf = append(c.buf, r) + if r != ' ' { + c.notSpaceCharPos = len(c.buf) + } } func (c *Context) addOriginBuf(r rune) { c.obuf = append(c.obuf, r) + if r != ' ' { + c.notSpaceOrgCharPos = len(c.obuf) + } } func (c *Context) removeRightSpaceFromBuf() int { - trimmedBuf := strings.TrimRight(string(c.obuf), " ") - buflen := len([]rune(trimmedBuf)) + trimmedBuf := c.obuf[:c.notSpaceOrgCharPos] + buflen := len(trimmedBuf) diff := len(c.obuf) - buflen if diff > 0 { c.obuf = c.obuf[:buflen] - c.buf = []rune(c.bufferedSrc()) + c.buf = c.bufferedSrc() } return diff } @@ -133,9 +169,12 @@ func (c *Context) nextPos() int { return c.idx + 1 } -func (c *Context) bufferedSrc() string { - src := strings.TrimLeft(string(c.buf), " ") - src = strings.TrimRight(src, " ") +func (c *Context) existsBuffer() bool { + return len(c.bufferedSrc()) != 0 +} + +func (c *Context) bufferedSrc() []rune { + src := c.buf[:c.notSpaceCharPos] if len(src) > 0 && src[len(src)-1] == '\n' && c.isDocument() && c.literalOpt == "-" { // remove end '\n' character src = src[:len(src)-1] @@ -151,8 +190,7 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token { if len(source) == 0 { return nil } - tk := token.New(source, string(c.obuf), pos) - c.buf = c.buf[:0] - c.obuf = c.obuf[:0] + tk := token.New(string(source), string(c.obuf), pos) + c.resetBuffer() return tk } diff --git a/scanner/scanner.go b/scanner/scanner.go index b8b6a276..26228473 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -61,8 +61,7 @@ func (s *Scanner) bufferedToken(ctx *Context) *token.Token { s.savedPos = nil return tk } - trimmedSrc := strings.TrimLeft(string(ctx.buf), " ") - size := len([]rune(trimmedSrc)) + size := len(ctx.buf) return ctx.bufferedToken(&token.Position{ Line: s.line, Column: s.column - size, @@ -95,7 +94,7 @@ func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool { if ctx.isDocument() { return true } - if c == '-' && ctx.bufferedSrc() != "" { + if c == '-' && ctx.existsBuffer() { return true } return false @@ -262,7 +261,7 @@ func (s *Scanner) scanLiteral(ctx *Context, c rune) { if ctx.isEOS() { ctx.addBuf(c) value := ctx.bufferedSrc() - ctx.addToken(token.New(value, string(ctx.obuf), s.pos())) + ctx.addToken(token.New(string(value), string(ctx.obuf), s.pos())) ctx.resetBuffer() s.progressColumn(ctx, 1) } else if s.isNewLineChar(c) { @@ -322,7 +321,7 @@ func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) { func (s *Scanner) scanNewLine(ctx *Context, c rune) { if len(ctx.buf) > 0 && s.savedPos == nil { s.savedPos = s.pos() - s.savedPos.Column -= len([]rune(ctx.bufferedSrc())) + s.savedPos.Column -= len(ctx.bufferedSrc()) } // if the following case, origin buffer has unnecessary two spaces. @@ -334,7 +333,9 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) { if removedNum > 0 { s.column -= removedNum s.offset -= removedNum - s.savedPos.Column -= removedNum + if s.savedPos != nil { + s.savedPos.Column -= removedNum + } } if ctx.isEOS() { @@ -373,7 +374,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } switch c { case '{': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { ctx.addOriginBuf(c) ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos())) s.startedFlowMapNum++ @@ -381,7 +382,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '}': - if ctx.bufferedSrc() == "" || s.startedFlowMapNum > 0 { + if !ctx.existsBuffer() || s.startedFlowMapNum > 0 { ctx.addToken(s.bufferedToken(ctx)) ctx.addOriginBuf(c) ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos())) @@ -412,7 +413,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { pos += 2 return } - if ctx.bufferedSrc() != "" && s.isChangedToIndentStateUp() { + if ctx.existsBuffer() && s.isChangedToIndentStateUp() { // raw folded ctx.isRawFolded = true ctx.addBuf(c) @@ -420,7 +421,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { s.progressColumn(ctx, 1) continue } - if ctx.bufferedSrc() != "" && ctx.isSingleLine { + if ctx.existsBuffer() && ctx.isSingleLine { // '-' is literal ctx.addBuf(c) ctx.addOriginBuf(c) @@ -438,7 +439,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '[': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { ctx.addOriginBuf(c) ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos())) s.startedFlowSequenceNum++ @@ -446,7 +447,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case ']': - if ctx.bufferedSrc() == "" || s.startedFlowSequenceNum > 0 { + if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos())) @@ -476,7 +477,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '|', '>': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { progress, err := s.scanLiteralHeader(ctx) if err != nil { // TODO: returns syntax error object @@ -487,7 +488,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { continue } case '!': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { token, progress := s.scanTag(ctx) ctx.addToken(token) s.progressColumn(ctx, progress) @@ -498,20 +499,20 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '%': - if ctx.bufferedSrc() == "" && s.indentNum == 0 { + if !ctx.existsBuffer() && s.indentNum == 0 { ctx.addToken(token.Directive(s.pos())) s.progressColumn(ctx, 1) return } case '?': nc := ctx.nextChar() - if ctx.bufferedSrc() == "" && nc == ' ' { + if !ctx.existsBuffer() && nc == ' ' { ctx.addToken(token.Directive(s.pos())) s.progressColumn(ctx, 1) return } case '&': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.Anchor(string(ctx.obuf), s.pos())) @@ -520,7 +521,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '*': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { s.addBufferedTokenIfExists(ctx) ctx.addOriginBuf(c) ctx.addToken(token.Alias(string(ctx.obuf), s.pos())) @@ -528,7 +529,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '#': - if ctx.bufferedSrc() == "" || ctx.previousChar() == ' ' { + if !ctx.existsBuffer() || ctx.previousChar() == ' ' { s.addBufferedTokenIfExists(ctx) token, progress := s.scanComment(ctx) ctx.addToken(token) @@ -538,7 +539,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '\'', '"': - if ctx.bufferedSrc() == "" { + if !ctx.existsBuffer() { token, progress := s.scanQuote(ctx, c) ctx.addToken(token) s.progressColumn(ctx, progress) @@ -605,7 +606,10 @@ func (s *Scanner) Scan() (token.Tokens, error) { return nil, io.EOF } ctx := newContext(s.source[s.sourcePos:]) + defer ctx.release() progress := s.scan(ctx) s.sourcePos += progress - return ctx.tokens, nil + var tokens token.Tokens + tokens = append(tokens, ctx.tokens...) + return tokens, nil }