From 73f18048a83c6f0d8095f26ecbce8f5a5c8166cc Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 16 Nov 2024 21:22:59 +0900 Subject: [PATCH 01/14] fix alias in map key --- ast/ast.go | 4 ++++ parser/parser.go | 29 +++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index e5341df5..af0a4335 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1658,6 +1658,10 @@ type AliasNode struct { Value Node } +func (n *AliasNode) stringWithoutComment() string { + return n.Value.String() +} + func (n *AliasNode) SetName(name string) error { if n.Value == nil { return ErrInvalidAliasName diff --git a/parser/parser.go b/parser/parser.go index 169d5edc..b4ab6439 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -254,7 +254,7 @@ func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { if typ == token.LiteralType || typ == token.FoldedType { value, err = p.parseLiteral(ctx) } else { - value = p.parseScalarValue(p.currentToken()) + value, err = p.parseScalarValueWithComment(ctx, p.currentToken()) } case token.SequenceTag, token.SetTag: @@ -657,11 +657,15 @@ func (p *parser) parseAnchor(ctx *context) (*ast.AnchorNode, error) { return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk) } p.progress(1) // skip anchor token - name, err := p.parseToken(ctx, p.currentToken()) + anchorNameTk := p.currentToken() + anchorNameNode, err := p.parseScalarValueWithComment(ctx, anchorNameTk) if err != nil { return nil, err } - anchor.Name = name + if anchorNameNode == nil { + return nil, errors.ErrSyntax("unexpected anchor. anchor name is not scalar value", anchorNameTk) + } + anchor.Name = anchorNameNode ntk = p.nextToken() if ntk == nil { return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", p.currentToken()) @@ -684,17 +688,21 @@ func (p *parser) parseAlias(ctx *context) (*ast.AliasNode, error) { return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk) } p.progress(1) // skip alias token - name, err := p.parseToken(ctx, p.currentToken()) + aliasNameTk := p.currentToken() + aliasNameNode, err := p.parseScalarValueWithComment(ctx, aliasNameTk) if err != nil { return nil, err } - alias.Value = name + if aliasNameNode == nil { + return nil, errors.ErrSyntax("unexpected alias. alias name is not scalar value", aliasNameTk) + } + alias.Value = aliasNameNode return alias, nil } func (p *parser) parseMapKey(ctx *context) (ast.MapKeyNode, error) { tk := p.currentToken() - if value := p.parseScalarValue(tk); value != nil { + if value, _ := p.parseScalarValueWithComment(ctx, tk); value != nil { return value, nil } switch tk.Type { @@ -702,6 +710,8 @@ func (p *parser) parseMapKey(ctx *context) (ast.MapKeyNode, error) { return ast.MergeKey(tk), nil case token.MappingKeyType: return p.parseMappingKey(ctx) + case token.AliasType: + return p.parseAlias(ctx) } return nil, errors.ErrSyntax("unexpected mapping key", tk) } @@ -929,6 +939,13 @@ func (p *parser) createNodeFromToken(ctx *context, tk *token.Token) (ast.Node, e node, err := p.parseMappingValue(ctx) return node, err } + if tk.Type == token.AliasType { + aliasValueTk := p.nextToken() + if aliasValueTk != nil && aliasValueTk.NextType() == token.MappingValueType { + node, err := p.parseMappingValue(ctx) + return node, err + } + } node, err := p.parseScalarValueWithComment(ctx, tk) if err != nil { return nil, err From 76e2eeebbc91f20f91cdb6c36b42c412ee5c64b9 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 16 Nov 2024 22:25:03 +0900 Subject: [PATCH 02/14] fix parsing of wrong-indented-sequence-item --- parser/parser.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parser/parser.go b/parser/parser.go index b4ab6439..dc43f658 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -999,7 +999,13 @@ func (p *parser) parse(ctx *context) (*ast.File, error) { } if doc, ok := node.(*ast.DocumentNode); ok { file.Docs = append(file.Docs, doc) + } else if len(file.Docs) == 0 { + file.Docs = append(file.Docs, ast.Document(nil, node)) } else { + lastNode := file.Docs[len(file.Docs)-1] + if lastNode.GetToken().Position.Column != node.GetToken().Position.Column { + return nil, errors.ErrSyntax("value is not allowed in this context", node.GetToken()) + } file.Docs = append(file.Docs, ast.Document(nil, node)) } } From b093088fb95b371e2282ef1876dfbdd33a365648 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 16 Nov 2024 22:53:36 +0900 Subject: [PATCH 03/14] fix parsing of tab character before comment --- scanner/scanner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 366d18e1..7b7ec7a2 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -537,7 +537,7 @@ func (s *Scanner) scanTag(ctx *Context) bool { } func (s *Scanner) scanComment(ctx *Context) bool { - if ctx.existsBuffer() && ctx.previousChar() != ' ' { + if ctx.existsBuffer() && (ctx.previousChar() != ' ' && ctx.previousChar() != '\t') { return false } From 6c7351c4bc78643712f126aa41c6ae3bf1190fb1 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 16 Nov 2024 23:57:49 +0900 Subject: [PATCH 04/14] fix parsing of white space in double-quote --- scanner/scanner.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 7b7ec7a2..10f117c8 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -301,12 +301,17 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { c := src[idx] ctx.addOriginBuf(c) if s.isNewLineChar(c) { - if isFirstLineChar { - if value[len(value)-1] == ' ' { - value[len(value)-1] = '\n' - } else { - value = append(value, '\n') + var notSpaceIdx int + for i := len(value) - 1; i > 0; i-- { + if value[i] == ' ' { + continue } + notSpaceIdx = i + break + } + value = value[:notSpaceIdx+1] + if isFirstLineChar { + value = append(value, '\n') } else { value = append(value, ' ') } From f7e9e1b64bbaf738df5282c5ff21a3d3de69ba63 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 00:28:10 +0900 Subject: [PATCH 05/14] fix parsing of tab character in double-quote --- scanner/scanner.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scanner/scanner.go b/scanner/scanner.go index 10f117c8..8e95fef2 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -420,6 +420,10 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { s.progressLine(ctx) idx++ continue + case '\t': + progress = 1 + ctx.addOriginBuf(nextChar) + value = append(value, nextChar) case ' ': // skip escape character. default: @@ -428,6 +432,29 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { idx += progress s.progressColumn(ctx, progress) continue + } else if c == '\t' { + var ( + foundNotSpaceChar bool + progress int + ) + for i := idx + 1; i < size; i++ { + if src[i] == ' ' || src[i] == '\t' { + progress++ + continue + } + if src[i] == '\n' { + break + } + foundNotSpaceChar = true + } + if foundNotSpaceChar { + value = append(value, c) + s.progressColumn(ctx, 1) + } else { + idx += progress + s.progressColumn(ctx, progress) + } + continue } else if c != '"' { value = append(value, c) isFirstLineChar = false From 0ff4b711b1a7d9bc3e55ce9afee5220685e65c23 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 00:29:46 +0900 Subject: [PATCH 06/14] add guard --- scanner/scanner.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 8e95fef2..87518712 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -309,7 +309,9 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { notSpaceIdx = i break } - value = value[:notSpaceIdx+1] + if notSpaceIdx > 0 { + value = value[:notSpaceIdx+1] + } if isFirstLineChar { value = append(value, '\n') } else { From e7484891d6144519c1ae226b307c9240759a9a65 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 01:16:50 +0900 Subject: [PATCH 07/14] use clear instead of resetBuffer --- scanner/context.go | 9 ++++++++- scanner/scanner.go | 2 +- testdata/yaml-test-suite/yaml.go | 11 +++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 015a66c2..5b1034f1 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -277,7 +277,7 @@ func (c *Context) bufferedSrc() []rune { if c.hasTrimAllEndNewlineOpt() { // If the '-' flag is specified, all trailing newline characters will be removed. src = []rune(strings.TrimRight(string(src), "\n")) - } else { + } else if !c.hasKeepAllEndNewlineOpt() { // Normally, all but one of the trailing newline characters are removed. var newLineCharCount int for i := len(src) - 1; i >= 0; i-- { @@ -302,6 +302,9 @@ func (c *Context) bufferedSrc() []rune { // so it is treated as an empty string. src = []rune{} } + if c.hasKeepAllEndNewlineOpt() && len(src) == 0 { + src = []rune{'\n'} + } } return src } @@ -310,6 +313,10 @@ func (c *Context) hasTrimAllEndNewlineOpt() bool { return strings.HasPrefix(c.docOpt, "-") || strings.HasSuffix(c.docOpt, "-") || c.isRawFolded } +func (c *Context) hasKeepAllEndNewlineOpt() bool { + return strings.HasPrefix(c.docOpt, "+") || strings.HasSuffix(c.docOpt, "+") +} + func (c *Context) bufferedToken(pos *token.Position) *token.Token { if c.idx == 0 { return nil diff --git a/scanner/scanner.go b/scanner/scanner.go index 87518712..60853ea0 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -633,7 +633,7 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error { ctx.addBuf(c) value := ctx.bufferedSrc() ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos())) - ctx.resetBuffer() + ctx.clear() s.progressColumn(ctx, 1) } else if s.isNewLineChar(c) { ctx.addBuf(c) diff --git a/testdata/yaml-test-suite/yaml.go b/testdata/yaml-test-suite/yaml.go index 73ac92f6..240a6486 100644 --- a/testdata/yaml-test-suite/yaml.go +++ b/testdata/yaml-test-suite/yaml.go @@ -8,6 +8,7 @@ import ( "io/fs" "os" "path/filepath" + "runtime" "sort" "strings" ) @@ -20,9 +21,15 @@ type TestSuite struct { Error bool } +func curDir() string { + _, file, _, _ := runtime.Caller(0) //nolint:dogsled + return filepath.Dir(file) +} + func TestSuites() ([]*TestSuite, error) { + dir := curDir() testMap := make(map[string]*TestSuite) - if err := filepath.Walk(".", func(path string, info fs.FileInfo, err error) error { + if err := filepath.Walk(dir, func(path string, info fs.FileInfo, err error) error { if strings.HasSuffix(path, ".go") { // this file. return nil @@ -33,7 +40,7 @@ func TestSuites() ([]*TestSuite, error) { if err != nil { return err } - name := strings.TrimPrefix(path, "yaml-test-suite/") + name := strings.TrimPrefix(path, dir+"/") name = strings.TrimSuffix(name, "/"+filepath.Base(name)) if _, exists := testMap[name]; !exists { testMap[name] = &TestSuite{} From e7738cac321e5cae848cb6b770075e47240d9f0b Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 02:07:37 +0900 Subject: [PATCH 08/14] fix parser --- parser/parser.go | 21 ++++++++++++++++++--- scanner/context.go | 1 + testdata/yaml_test.go | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index dc43f658..e71613be 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1002,9 +1002,10 @@ func (p *parser) parse(ctx *context) (*ast.File, error) { } else if len(file.Docs) == 0 { file.Docs = append(file.Docs, ast.Document(nil, node)) } else { - lastNode := file.Docs[len(file.Docs)-1] - if lastNode.GetToken().Position.Column != node.GetToken().Position.Column { - return nil, errors.ErrSyntax("value is not allowed in this context", node.GetToken()) + lastNode := p.comparableColumnNode(file.Docs[len(file.Docs)-1]) + curNode := p.comparableColumnNode(node) + if lastNode.GetToken().Position.Column != curNode.GetToken().Position.Column { + return nil, errors.ErrSyntax("value is not allowed in this context", curNode.GetToken()) } file.Docs = append(file.Docs, ast.Document(nil, node)) } @@ -1012,6 +1013,20 @@ func (p *parser) parse(ctx *context) (*ast.File, error) { return file, nil } +func (p *parser) comparableColumnNode(n ast.Node) ast.Node { + switch nn := n.(type) { + case *ast.MappingNode: + if len(nn.Values) != 0 { + return nn.Values[0].Key + } + case *ast.MappingValueNode: + return nn.Key + case *ast.DocumentNode: + return p.comparableColumnNode(nn.Body) + } + return n +} + type Mode uint const ( diff --git a/scanner/context.go b/scanner/context.go index 5b1034f1..908a384b 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -323,6 +323,7 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token { } source := c.bufferedSrc() if len(source) == 0 { + c.buf = c.buf[:0] // clear value's buffer only. return nil } var tk *token.Token diff --git a/testdata/yaml_test.go b/testdata/yaml_test.go index da4461ba..d3ca8160 100644 --- a/testdata/yaml_test.go +++ b/testdata/yaml_test.go @@ -1093,7 +1093,7 @@ func TestRegisterCustomUnmarshaler(t *testing.T) { return nil }) var v T - if err := yaml.Unmarshal([]byte(`"foo: "bar"`), &v); err != nil { + if err := yaml.Unmarshal([]byte(`"foo": "bar"`), &v); err != nil { t.Fatal(err) } if !bytes.Equal(v.Foo, []byte("override")) { From 776a4f2d7551e6c7480ace533859f0e606ac40bd Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 18:57:30 +0900 Subject: [PATCH 09/14] fix anchor case --- ast/ast.go | 17 +++++++ parser/context.go | 11 ++++- parser/parser.go | 108 +++++++++++++++++++++++++----------------- parser/parser_test.go | 7 ++- 4 files changed, 93 insertions(+), 50 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index af0a4335..3a161695 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1287,6 +1287,7 @@ type MappingValueNode struct { Start *token.Token Key MapKeyNode Value Node + Anchor *AnchorNode FootComment *CommentGroupNode } @@ -1597,6 +1598,10 @@ type AnchorNode struct { Value Node } +func (n *AnchorNode) stringWithoutComment() string { + return n.Value.String() +} + func (n *AnchorNode) SetName(name string) error { if n.Name == nil { return ErrInvalidAnchorName @@ -1622,6 +1627,10 @@ func (n *AnchorNode) GetToken() *token.Token { return n.Start } +func (n *AnchorNode) GetValue() any { + return n.Value.GetToken().Value +} + // AddColumn add column number to child nodes recursively func (n *AnchorNode) AddColumn(col int) { n.Start.AddColumn(col) @@ -1687,6 +1696,10 @@ func (n *AliasNode) GetToken() *token.Token { return n.Start } +func (n *AliasNode) GetValue() any { + return n.Value.GetToken().Value +} + // AddColumn add column number to child nodes recursively func (n *AliasNode) AddColumn(col int) { n.Start.AddColumn(col) @@ -1749,6 +1762,10 @@ type TagNode struct { Value Node } +func (n *TagNode) stringWithoutComment() string { + return n.Value.String() +} + // Read implements (io.Reader).Read func (n *TagNode) Read(p []byte) (int, error) { return readNode(p, n) diff --git a/parser/context.go b/parser/context.go index cc7d3027..2ed9aad4 100644 --- a/parser/context.go +++ b/parser/context.go @@ -7,8 +7,9 @@ import ( // context context at parsing type context struct { - path string - isFlow bool + path string + isFlow bool + isMapKey bool } var pathSpecialChars = []string{ @@ -49,6 +50,12 @@ func (c *context) withFlow(isFlow bool) *context { return &ctx } +func (c *context) withMapKey() *context { + ctx := *c + ctx.isMapKey = true + return &ctx +} + func newContext() *context { return &context{ path: "$", diff --git a/parser/parser.go b/parser/parser.go index e71613be..715a381f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -429,7 +429,7 @@ func (p *parser) mapKeyText(n ast.Node) string { } func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { - key, err := p.parseMapKey(ctx) + key, err := p.parseMapKey(ctx.withMapKey()) if err != nil { return nil, err } @@ -497,30 +497,55 @@ func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { if antk == nil { return nil, errors.ErrSyntax("required ':' and map value", ntk) } - if antk.Type != token.MappingValueType { - return nil, errors.ErrSyntax("required ':' and map value", antk) - } p.progressIgnoreComment(1) - value, err := p.parseToken(ctx, p.currentToken()) + var comment *ast.CommentGroupNode + if tk := p.currentToken(); tk.Type == token.CommentType { + comment = p.parseCommentOnly(ctx) + } + value, err := p.parseMappingValue(ctx) if err != nil { return nil, err } - switch value.Type() { - case ast.MappingType: - c, _ := value.(*ast.MappingNode) - comment := c.GetComment() - for idx, v := range c.Values { + if comment != nil { + comment.SetPath(value.GetPath()) + if err := value.SetComment(comment); err != nil { + return nil, err + } + } + switch v := value.(type) { + case *ast.MappingNode: + comment := v.GetComment() + for idx, val := range v.Values { if idx == 0 && comment != nil { - if err := v.SetComment(comment); err != nil { + if err := val.SetComment(comment); err != nil { return nil, err } } - node.Values = append(node.Values, v) + node.Values = append(node.Values, val) + } + case *ast.MappingValueNode: + node.Values = append(node.Values, v) + case *ast.AnchorNode: + switch anchorV := v.Value.(type) { + case *ast.MappingNode: + comment := anchorV.GetComment() + for idx, val := range anchorV.Values { + if idx == 0 && comment != nil { + if err := val.SetComment(comment); err != nil { + return nil, err + } + } + val.Anchor = v + node.Values = append(node.Values, val) + } + case *ast.MappingValueNode: + anchorV.Anchor = v + node.Values = append(node.Values, anchorV) + default: + return nil, fmt.Errorf("failed to parse mapping value node. anchor node is %s", anchorV.Type()) } - case ast.MappingValueType: - node.Values = append(node.Values, value.(*ast.MappingValueNode)) default: - return nil, fmt.Errorf("failed to parse mapping value node node is %s", value.Type()) + return nil, fmt.Errorf("failed to parse mapping value node. node is %s", value.Type()) } ntk = p.nextNotCommentToken() antk = p.afterNextNotCommentToken() @@ -710,24 +735,17 @@ func (p *parser) parseMapKey(ctx *context) (ast.MapKeyNode, error) { return ast.MergeKey(tk), nil case token.MappingKeyType: return p.parseMappingKey(ctx) - case token.AliasType: - return p.parseAlias(ctx) + case token.TagType: + return p.parseTag(ctx) } return nil, errors.ErrSyntax("unexpected mapping key", tk) } -func (p *parser) parseStringValue(tk *token.Token) *ast.StringNode { - switch tk.Type { - case token.StringType, - token.SingleQuoteType, - token.DoubleQuoteType: - return ast.String(tk) - } - return nil -} - func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.ScalarNode, error) { - node := p.parseScalarValue(tk) + node, err := p.parseScalarValue(ctx, tk) + if err != nil { + return nil, err + } if node == nil { return nil, nil } @@ -741,28 +759,32 @@ func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast return node, nil } -func (p *parser) parseScalarValue(tk *token.Token) ast.ScalarNode { - if node := p.parseStringValue(tk); node != nil { - return node - } +func (p *parser) parseScalarValue(ctx *context, tk *token.Token) (ast.ScalarNode, error) { switch tk.Type { case token.NullType: - return ast.Null(tk) + return ast.Null(tk), nil case token.BoolType: - return ast.Bool(tk) + return ast.Bool(tk), nil case token.IntegerType, token.BinaryIntegerType, token.OctetIntegerType, token.HexIntegerType: - return ast.Integer(tk) + return ast.Integer(tk), nil case token.FloatType: - return ast.Float(tk) + return ast.Float(tk), nil case token.InfinityType: - return ast.Infinity(tk) + return ast.Infinity(tk), nil case token.NanType: - return ast.Nan(tk) + return ast.Nan(tk), nil + case token.StringType, token.SingleQuoteType, + token.DoubleQuoteType: + return ast.String(tk), nil + case token.AnchorType: + return p.parseAnchor(ctx) + case token.AliasType: + return p.parseAlias(ctx) } - return nil + return nil, nil } func (p *parser) parseDirective(ctx *context) (*ast.DirectiveNode, error) { @@ -935,15 +957,13 @@ func (p *parser) createNodeFromToken(ctx *context, tk *token.Token) (ast.Node, e if tk == nil { return nil, nil } - if tk.NextType() == token.MappingValueType { - node, err := p.parseMappingValue(ctx) - return node, err + if !ctx.isMapKey && tk.NextType() == token.MappingValueType { + return p.parseMappingValue(ctx) } if tk.Type == token.AliasType { aliasValueTk := p.nextToken() if aliasValueTk != nil && aliasValueTk.NextType() == token.MappingValueType { - node, err := p.parseMappingValue(ctx) - return node, err + return p.parseMappingValue(ctx) } } node, err := p.parseScalarValueWithComment(ctx, tk) diff --git a/parser/parser_test.go b/parser/parser_test.go index 66ad5e30..7552e002 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1110,12 +1110,11 @@ b - c `, ` -[4:1] required ':' and map value +[3:1] unexpected key name 2 | a: 1 - 3 | b -> 4 | - c +> 3 | b ^ -`, + 4 | - c`, }, { `a: [`, From 666aeb307ad2c0b23bd8a7b0bcf53313dbdbcdca Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 19:18:17 +0900 Subject: [PATCH 10/14] fix single quote --- scanner/scanner.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/scanner/scanner.go b/scanner/scanner.go index 60853ea0..5b149db7 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -232,7 +232,22 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) { c := src[idx] ctx.addOriginBuf(c) if s.isNewLineChar(c) { - value = append(value, ' ') + notSpaceIdx := -1 + for i := len(value) - 1; i >= 0; i-- { + if value[i] == ' ' { + continue + } + notSpaceIdx = i + break + } + if len(value) > notSpaceIdx { + value = value[:notSpaceIdx+1] + } + if isFirstLineChar { + value = append(value, '\n') + } else { + value = append(value, ' ') + } isFirstLineChar = true isNewLine = true s.progressLine(ctx) @@ -301,15 +316,15 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { c := src[idx] ctx.addOriginBuf(c) if s.isNewLineChar(c) { - var notSpaceIdx int - for i := len(value) - 1; i > 0; i-- { + notSpaceIdx := -1 + for i := len(value) - 1; i >= 0; i-- { if value[i] == ' ' { continue } notSpaceIdx = i break } - if notSpaceIdx > 0 { + if len(value) > notSpaceIdx { value = value[:notSpaceIdx+1] } if isFirstLineChar { From 8608ab6e75f58fa27bc1c9953a9f79761e395742 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 19:37:08 +0900 Subject: [PATCH 11/14] fix --- scanner/context.go | 5 ++++- scanner/scanner.go | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/scanner/context.go b/scanner/context.go index 908a384b..6898d082 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -151,6 +151,7 @@ func (c *Context) addDocumentIndent(column int) { } // Since addBuf ignore space character, add to the buffer directly. c.buf = append(c.buf, ' ') + c.notSpaceCharPos = len(c.buf) } } @@ -295,7 +296,9 @@ func (c *Context) bufferedSrc() []rune { } // If the text ends with a space character, remove all of them. - src = []rune(strings.TrimRight(string(src), " ")) + if c.hasTrimAllEndNewlineOpt() { + src = []rune(strings.TrimRight(string(src), " ")) + } if string(src) == "\n" { // If the content consists only of a newline, // it can be considered as the document ending without any specified value, diff --git a/scanner/scanner.go b/scanner/scanner.go index 5b149db7..a0e74bf9 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -639,13 +639,17 @@ func (s *Scanner) trimCommentFromDocumentOpt(text string, header rune) (string, func (s *Scanner) scanDocument(ctx *Context, c rune) error { ctx.addOriginBuf(c) if ctx.isEOS() { + if s.isFirstCharAtLine && c == ' ' { + ctx.addDocumentIndent(s.column) + } else { + ctx.addBuf(c) + } ctx.updateDocumentLineIndentColumn(s.column) if err := ctx.validateDocumentLineIndentColumn(); err != nil { invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos()) s.progressColumn(ctx, 1) return ErrInvalidToken(invalidTk) } - ctx.addBuf(c) value := ctx.bufferedSrc() ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos())) ctx.clear() From a3cba537084a6025fa225d9030d9ec7b3aad739b Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 22:45:04 +0900 Subject: [PATCH 12/14] fix yaml-test-suite --- yaml_test_suite_test.go | 235 +++++++++++++++++++++++++++++++++------- 1 file changed, 193 insertions(+), 42 deletions(-) diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index b627fb4e..4ee82540 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -13,18 +13,187 @@ import ( "github.com/goccy/go-yaml/testdata/yaml-test-suite" ) -const ( - wip = true - successCountThreshold = 199 -) +var failureTestNames = []string{ + "anchors-on-empty-scalars", + "aliases-in-flow-objects", + "aliases-in-explicit-block-mapping", + "aliases-in-implicit-block-mapping", + "allowed-characters-in-alias", + "anchor-before-sequence-entry-on-same-line", + "anchor-for-empty-node", + "anchor-plus-alias", + "anchors-in-mapping", + "anchors-with-colon-in-name", + "bare-document-after-document-end-marker", + "block-mapping-with-missing-keys", + "block-mapping-with-missing-values", + "block-mapping-with-multiline-scalars", + "block-scalar-with-more-spaces-than-first-content-line", + "block-scalar-with-wrong-indented-line-after-spaces-only", + "colon-at-the-beginning-of-adjacent-flow-scalar", + "comment-between-plain-scalar-lines", + "comment-in-flow-sequence-before-comma", + "comment-without-whitespace-after-doublequoted-scalar", + "construct-binary", + "dash-in-flow-sequence", + "directive-variants/00", + "directive-variants/01", + "double-quoted-scalar-with-escaped-single-quote", + "duplicate-yaml-directive", + "escaped-slash-in-double-quotes", + "explicit-key-and-value-seperated-by-comment", + "extra-words-on-yaml-directive", + "empty-implicit-key-in-single-pair-flow-sequences", + "empty-keys-in-block-and-flow-mapping", + "empty-lines-at-end-of-document", + "flow-mapping-separate-values", + "flow-sequence-in-flow-mapping", + "flow-collections-over-many-lines/01", + "flow-mapping-colon-on-line-after-key/02", + "flow-mapping-edge-cases", + "flow-sequence-with-invalid-comma-at-the-beginning", + "flow-sequence-with-invalid-extra-comma", + "folded-block-scalar", + "folded-block-scalar-1-3", + "implicit-flow-mapping-key-on-one-line", + "invalid-anchor-in-zero-indented-sequence", + "invalid-comment-after-comma", + "invalid-comment-after-end-of-flow-sequence", + "invalid-document-end-marker-in-single-quoted-string", + "invalid-document-markers-in-flow-style", + "invalid-document-start-marker-in-doublequoted-tring", + "invalid-escape-in-double-quoted-string", + "invalid-item-after-end-of-flow-sequence", + "invalid-mapping-after-sequence", + "invalid-mapping-in-plain-single-line-value", + "invalid-nested-mapping", + "invalid-scalar-after-sequence", + "invalid-tag", + "key-with-anchor-after-missing-explicit-mapping-value", + "leading-tab-content-in-literals/00", + "leading-tab-content-in-literals/01", + "leading-tabs-in-double-quoted/02", + "leading-tabs-in-double-quoted/05", + "legal-tab-after-indentation", + "literal-block-scalar-with-more-spaces-in-first-line", + "literal-modifers/00", + "literal-modifers/01", + "literal-modifers/02", + "literal-modifers/03", + "literal-scalars", + "mapping-key-and-flow-sequence-item-anchors", + "mapping-starting-at-line", + "mapping-with-anchor-on-document-start-line", + "missing-document-end-marker-before-directive", + "mixed-block-mapping-explicit-to-implicit", + "multiline-double-quoted-implicit-keys", + "multiline-plain-flow-mapping-key", + "multiline-plain-flow-mapping-key-without-value", + "multiline-plain-value-with-tabs-on-empty-lines", + "multiline-scalar-at-top-level", + "multiline-scalar-at-top-level-1-3", + "multiline-single-quoted-implicit-keys", + "multiline-unidented-double-quoted-block-key", + "nested-implicit-complex-keys", + "need-document-footer-before-directives", + "node-anchor-in-sequence", + "node-anchor-not-indented", + "plain-dashes-in-flow-sequence", + "plain-url-in-flow-mapping", + "question-mark-at-start-of-flow-key", + "question-mark-edge-cases/00", + "question-mark-edge-cases/01", + "scalar-doc-with-in-content/01", + "scalar-value-with-two-anchors", + "single-character-streams/01", + "single-pair-implicit-entries", + "spec-example-2-11-mapping-between-sequences", + "spec-example-6-12-separation-spaces", + "spec-example-7-16-flow-mapping-entries", + "spec-example-7-3-completely-empty-flow-nodes", + "spec-example-8-18-implicit-block-mapping-entries", + "spec-example-8-19-compact-block-mappings", + "spec-example-2-24-global-tags", + "spec-example-2-25-unordered-sets", + "spec-example-2-26-ordered-mappings", + "spec-example-5-12-tabs-and-spaces", + "spec-example-5-3-block-structure-indicators", + "spec-example-5-9-directive-indicator", + "spec-example-6-1-indentation-spaces", + "spec-example-6-13-reserved-directives", + "spec-example-6-19-secondary-tag-handle", + "spec-example-6-2-indentation-indicators", + "spec-example-6-21-local-tag-prefix", + "spec-example-6-23-node-properties", + "spec-example-6-24-verbatim-tags", + "spec-example-6-28-non-specific-tags", + "spec-example-6-3-separation-spaces", + "spec-example-6-4-line-prefixes", + "spec-example-6-6-line-folding", + "spec-example-6-6-line-folding-1-3", + "spec-example-6-7-block-folding", + "spec-example-6-8-flow-folding", + "spec-example-7-12-plain-lines", + "spec-example-7-19-single-pair-flow-mappings", + "spec-example-7-2-empty-content", + "spec-example-7-20-single-pair-explicit-entry", + "spec-example-7-24-flow-nodes", + "spec-example-7-6-double-quoted-lines", + "spec-example-7-9-single-quoted-lines", + "spec-example-8-10-folded-lines-8-13-final-empty-lines", + "spec-example-8-15-block-sequence-entry-types", + "spec-example-8-17-explicit-block-mapping-entries", + "spec-example-8-2-block-indentation-indicator", + "spec-example-8-22-block-collection-nodes", + "spec-example-8-7-literal-scalar", + "spec-example-8-7-literal-scalar-1-3", + "spec-example-8-8-literal-content", + "spec-example-9-3-bare-documents", + "spec-example-9-4-explicit-documents", + "spec-example-9-5-directives-documents", + "spec-example-9-6-stream", + "spec-example-9-6-stream-1-3", + "syntax-character-edge-cases/00", + "tab-at-beginning-of-line-followed-by-a-flow-mapping", + "tab-indented-top-flow", + "tabs-in-various-contexts/001", + "tabs-in-various-contexts/002", + "tabs-in-various-contexts/004", + "tabs-in-various-contexts/005", + "tabs-in-various-contexts/006", + "tabs-in-various-contexts/008", + "tabs-in-various-contexts/010", + "tabs-that-look-like-indentation/00", + "tabs-that-look-like-indentation/01", + "tabs-that-look-like-indentation/02", + "tabs-that-look-like-indentation/03", + "tabs-that-look-like-indentation/04", + "tabs-that-look-like-indentation/05", + "tabs-that-look-like-indentation/07", + "tabs-that-look-like-indentation/08", + "tags-for-block-objects", + "tags-for-flow-objects", + "tags-for-root-objects", + "tags-in-explicit-mapping", + "tags-in-implicit-mapping", + "tags-on-empty-scalars", + "three-dashes-and-content-without-space", + "trailing-line-of-spaces/01", + "various-combinations-of-explicit-block-mappings", + "various-trailing-comments", + "various-trailing-comments-1-3", + "wrong-indented-flow-sequence", + "wrong-indented-multiline-quoted-scalar", + "zero-indented-sequences-in-explicit-mapping-keys", +} + +var failureTestNameMap map[string]struct{} -func fatal(t *testing.T, msg string, args ...any) { - t.Helper() - if wip { - t.Logf(msg, args...) - return +func init() { + failureTestNameMap = make(map[string]struct{}) + for _, name := range failureTestNames { + failureTestNameMap[name] = struct{}{} } - t.Fatalf(msg, args...) } func TestYAMLTestSuite(t *testing.T) { @@ -33,28 +202,22 @@ func TestYAMLTestSuite(t *testing.T) { t.Fatal(err) } - var ( - success int - failure int - ) for _, test := range tests { + if _, exists := failureTestNameMap[test.Name]; exists { + continue + } t.Run(test.Name, func(t *testing.T) { defer func() { if e := recover(); e != nil { - failure++ - fatal(t, "panic occurred.\n[input]\n%s\nstack[%s]", string(test.InYAML), debug.Stack()) - return + t.Fatalf("panic occurred.\n[input]\n%s\nstack[%s]", string(test.InYAML), debug.Stack()) } }() if test.Error { var v any if err := yaml.Unmarshal(test.InYAML, &v); err == nil { - failure++ - fatal(t, "expected error.\n[input]\n%s\n", string(test.InYAML)) - return + t.Fatalf("expected error.\n[input]\n%s\n", string(test.InYAML)) } - success++ return } @@ -66,41 +229,29 @@ func TestYAMLTestSuite(t *testing.T) { if err == io.EOF { break } - failure++ - fatal(t, err.Error()) //nolint: govet - return + t.Fatal(err) } if len(test.InJSON) <= idx { - failure++ - fatal(t, "expected json value is nothing but got %v", v) - return + t.Fatalf("expected json value is nothing but got %v", v) } expected, err := json.Marshal(test.InJSON[idx]) if err != nil { - fatal(t, "failed to encode json value: %v", err) - return + t.Fatalf("failed to encode json value: %v", err) } got, err := json.Marshal(v) if err != nil { - fatal(t, "failed to encode json value: %v", err) - return + t.Fatalf("failed to encode json value: %v", err) } if !bytes.Equal(expected, got) { - failure++ - fatal(t, "json mismatch [%s]:\n[expected]\n%s\n[got]\n%s\n", test.Name, string(expected), string(got)) - return + t.Fatalf("json mismatch [%s]:\n[expected]\n%s\n[got]\n%s\n", test.Name, string(expected), string(got)) } idx++ } - success++ }) } total := len(tests) - if success+failure == total { - t.Logf("yaml-test-suite result: success/total = %d/%d (%f %%)\n", success, total, float32(success)/float32(total)*100) - } - if success < successCountThreshold { - // degrade occurred. - t.Fatalf("expected success count is over %d but got %d", successCountThreshold, success) - } + failed := len(failureTestNames) + passed := total - failed + passedRate := float32(passed) / float32(total) * 100 + t.Logf("total:[%d] passed:[%d] failure:[%d] passedRate:[%f%%]", total, passed, failed, passedRate) } From fc96d44b25218d437a67f3ec277aeba77aaedcee Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 22:51:22 +0900 Subject: [PATCH 13/14] add comment --- yaml_test_suite_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 4ee82540..01e74b01 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -178,13 +178,13 @@ var failureTestNames = []string{ "tags-in-implicit-mapping", "tags-on-empty-scalars", "three-dashes-and-content-without-space", - "trailing-line-of-spaces/01", - "various-combinations-of-explicit-block-mappings", - "various-trailing-comments", - "various-trailing-comments-1-3", - "wrong-indented-flow-sequence", - "wrong-indented-multiline-quoted-scalar", - "zero-indented-sequences-in-explicit-mapping-keys", + "trailing-line-of-spaces/01", // last '\n' character is needed ? + "various-combinations-of-explicit-block-mappings", // no json + "various-trailing-comments", // no json + "various-trailing-comments-1-3", // no json + "wrong-indented-flow-sequence", // error ? + "wrong-indented-multiline-quoted-scalar", // error ? + "zero-indented-sequences-in-explicit-mapping-keys", // no json } var failureTestNameMap map[string]struct{} From 8c8ef18ba3b2e9a2d4ff7ecb5abeb86bd3b3615f Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 17 Nov 2024 22:52:18 +0900 Subject: [PATCH 14/14] fix lint error --- yaml_test_suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 01e74b01..570984d4 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -41,7 +41,7 @@ var failureTestNames = []string{ "double-quoted-scalar-with-escaped-single-quote", "duplicate-yaml-directive", "escaped-slash-in-double-quotes", - "explicit-key-and-value-seperated-by-comment", + "explicit-key-and-value-seperated-by-comment", //nolint: misspell "extra-words-on-yaml-directive", "empty-implicit-key-in-single-pair-flow-sequences", "empty-keys-in-block-and-flow-mapping",