From 438f2d0c3786d61fe505dfbcb6998b0103cb906d Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Mon, 9 Dec 2024 18:45:26 +0900 Subject: [PATCH] Fix parsing of directive value (#578) * fix directive * fix tag value handling * remove passed test cases * add comment --- ast/ast.go | 38 +++++++++--- decode.go | 10 +++ parser/parser.go | 131 ++++++++++++++++++++++++++++++++++++---- parser/token.go | 93 +++++++++++++++++++--------- scanner/scanner.go | 14 ++++- yaml_test_suite_test.go | 37 +++--------- 6 files changed, 245 insertions(+), 78 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 9c5844a..1ad8f15 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1726,8 +1726,12 @@ func (n *AliasNode) MarshalYAML() ([]byte, error) { // DirectiveNode type of directive node type DirectiveNode struct { *BaseNode + // Start is '%' token. Start *token.Token - Value Node + // Name is directive name e.g.) "YAML" or "TAG". + Name Node + // Values is directive values e.g.) "1.2" or "!!" and "tag:clarkevans.com,2002:app/". + Values []Node } // Read implements (io.Reader).Read @@ -1745,14 +1749,21 @@ func (n *DirectiveNode) GetToken() *token.Token { // AddColumn add column number to child nodes recursively func (n *DirectiveNode) AddColumn(col int) { - if n.Value != nil { - n.Value.AddColumn(col) + if n.Name != nil { + n.Name.AddColumn(col) + } + for _, value := range n.Values { + value.AddColumn(col) } } // String directive to text func (n *DirectiveNode) String() string { - return fmt.Sprintf("%s%s", n.Start.Value, n.Value.String()) + values := make([]string, 0, len(n.Values)) + for _, val := range n.Values { + values = append(values, val.String()) + } + return strings.Join(append([]string{"%" + n.Name.String()}, values...), " ") } // MarshalYAML encodes to a YAML text @@ -1763,8 +1774,9 @@ func (n *DirectiveNode) MarshalYAML() ([]byte, error) { // TagNode type of tag node type TagNode struct { *BaseNode - Start *token.Token - Value Node + Directive *DirectiveNode + Start *token.Token + Value Node } func (n *TagNode) GetValue() any { @@ -1940,7 +1952,10 @@ func Walk(v Visitor, node Node) { Walk(v, n.Value) case *DirectiveNode: walkComment(v, n.BaseNode) - Walk(v, n.Value) + Walk(v, n.Name) + for _, value := range n.Values { + Walk(v, value) + } case *TagNode: walkComment(v, n.BaseNode) Walk(v, n.Value) @@ -2026,7 +2041,14 @@ func (f *parentFinder) walk(parent, node Node) Node { case *LiteralNode: return f.walk(node, n.Value) case *DirectiveNode: - return f.walk(node, n.Value) + if found := f.walk(node, n.Name); found != nil { + return found + } + for _, value := range n.Values { + if found := f.walk(node, value); found != nil { + return found + } + } case *TagNode: return f.walk(node, n.Value) case *DocumentNode: diff --git a/decode.go b/decode.go index faac222..2bdbafa 100644 --- a/decode.go +++ b/decode.go @@ -354,6 +354,16 @@ func (d *Decoder) nodeToValue(node ast.Node) (any, error) { case *ast.NanNode: return n.GetValue(), nil case *ast.TagNode: + if n.Directive != nil { + v, err := d.nodeToValue(n.Value) + if err != nil { + return nil, err + } + if v == nil { + return "", nil + } + return fmt.Sprint(v), nil + } switch token.ReservedTagKeyword(n.Start.Value) { case token.TimestampTag: t, _ := d.castToTime(n.Value) diff --git a/parser/parser.go b/parser/parser.go index 46a1c7d..b2367eb 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -57,10 +57,28 @@ func ParseFile(filename string, mode Mode, opts ...Option) (*ast.File, error) { return f, nil } +type YAMLVersion string + +const ( + YAML10 YAMLVersion = "1.0" + YAML11 YAMLVersion = "1.1" + YAML12 YAMLVersion = "1.2" + YAML13 YAMLVersion = "1.3" +) + +var yamlVersionMap = map[string]YAMLVersion{ + "1.0": YAML10, + "1.1": YAML11, + "1.2": YAML12, + "1.3": YAML13, +} + type parser struct { - tokens []*Token - pathMap map[string]ast.Node - allowDuplicateMapKey bool + tokens []*Token + pathMap map[string]ast.Node + yamlVersion YAMLVersion + allowDuplicateMapKey bool + secondaryTagDirective *ast.DirectiveNode } func newParser(tokens token.Tokens, mode Mode, opts []Option) (*parser, error) { @@ -122,6 +140,10 @@ func (p *parser) parseDocument(ctx *context, docGroup *TokenGroup) (*ast.Documen if docGroup.Last().Type() == token.DocumentEndType { end = docGroup.Last().RawToken() tokens = tokens[:len(tokens)-1] + defer func() { + // clear yaml version value if DocumentEnd token (...) is specified. + p.yamlVersion = "" + }() } if len(tokens) == 0 { @@ -162,6 +184,13 @@ func (p *parser) parseToken(ctx *context, tk *Token) (ast.Node, error) { } ctx.goNext() return node, nil + case TokenGroupDirectiveName: + node, err := p.parseDirectiveName(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err + } + ctx.goNext() + return node, nil case TokenGroupAnchor: node, err := p.parseAnchor(ctx.withGroup(tk.Group), tk.Group) if err != nil { @@ -838,14 +867,26 @@ func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { ctx.goNext() comment := p.parseHeadComment(ctx) - value, err := p.parseTagValue(ctx, tagRawTk, ctx.currentToken()) - if err != nil { - return nil, err + + var tagValue ast.Node + if p.secondaryTagDirective != nil { + value, err := newStringNode(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + tagValue = value + node.Directive = p.secondaryTagDirective + } else { + value, err := p.parseTagValue(ctx, tagRawTk, ctx.currentToken()) + if err != nil { + return nil, err + } + tagValue = value } - if err := setHeadComment(comment, value); err != nil { + if err := setHeadComment(comment, tagValue); err != nil { return nil, err } - node.Value = value + node.Value = tagValue return node, nil } @@ -1046,16 +1087,82 @@ func (p *parser) parseSequenceValue(ctx *context, seqTk *Token) (ast.Node, error } func (p *parser) parseDirective(ctx *context, g *TokenGroup) (*ast.DirectiveNode, error) { - node, err := newDirectiveNode(ctx, g.First()) + directiveNameGroup := g.First().Group + directive, err := p.parseDirectiveName(ctx.withGroup(directiveNameGroup)) if err != nil { return nil, err } - value, err := p.parseToken(ctx, g.Last()) + + switch directive.Name.String() { + case "YAML": + if len(g.Tokens) != 2 { + return nil, errors.ErrSyntax("unexpected format YAML directive", g.First().RawToken()) + } + valueTk := g.Tokens[1] + valueRawTk := valueTk.RawToken() + value := valueRawTk.Value + ver, exists := yamlVersionMap[value] + if !exists { + return nil, errors.ErrSyntax(fmt.Sprintf("unknown YAML version %q", value), valueRawTk) + } + if p.yamlVersion != "" { + return nil, errors.ErrSyntax("YAML version has already been specified", valueRawTk) + } + p.yamlVersion = ver + versionNode, err := newStringNode(ctx, valueTk) + if err != nil { + return nil, err + } + directive.Values = append(directive.Values, versionNode) + case "TAG": + if len(g.Tokens) != 3 { + return nil, errors.ErrSyntax("unexpected format TAG directive", g.First().RawToken()) + } + tagKey, err := newStringNode(ctx, g.Tokens[1]) + if err != nil { + return nil, err + } + if tagKey.Value == "!!" { + p.secondaryTagDirective = directive + } + tagValue, err := newStringNode(ctx, g.Tokens[2]) + if err != nil { + return nil, err + } + directive.Values = append(directive.Values, tagKey, tagValue) + default: + if len(g.Tokens) > 1 { + for _, tk := range g.Tokens[1:] { + value, err := newStringNode(ctx, tk) + if err != nil { + return nil, err + } + directive.Values = append(directive.Values, value) + } + } + } + return directive, nil +} + +func (p *parser) parseDirectiveName(ctx *context) (*ast.DirectiveNode, error) { + directive, err := newDirectiveNode(ctx, ctx.currentToken()) if err != nil { return nil, err } - node.Value = value - return node, nil + ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find directive value", directive.GetToken()) + } + + directiveName, err := p.parseScalarValue(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + if directiveName == nil { + return nil, errors.ErrSyntax("unexpected directive. directive name is not scalar value", ctx.currentToken().RawToken()) + } + directive.Name = directiveName + return directive, nil } func (p *parser) parseComment(ctx *context) (ast.Node, error) { diff --git a/parser/token.go b/parser/token.go index c7bc5e8..3d1c06f 100644 --- a/parser/token.go +++ b/parser/token.go @@ -3,6 +3,7 @@ package parser import ( "fmt" "os" + "strings" "github.com/goccy/go-yaml/internal/errors" "github.com/goccy/go-yaml/token" @@ -13,6 +14,7 @@ type TokenGroupType int const ( TokenGroupNone TokenGroupType = iota TokenGroupDirective + TokenGroupDirectiveName TokenGroupDocument TokenGroupDocumentBody TokenGroupAnchor @@ -31,6 +33,8 @@ func (t TokenGroupType) String() string { return "none" case TokenGroupDirective: return "directive" + case TokenGroupDirectiveName: + return "directive_name" case TokenGroupDocument: return "document" case TokenGroupDocumentBody: @@ -353,8 +357,37 @@ func createScalarTagTokenGroups(tokens []*Token) []*Token { continue } tag := tk.RawToken() - switch token.ReservedTagKeyword(tag.Value) { - case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: + if strings.HasPrefix(tag.Value, "!!") { + // secondary tag. + switch token.ReservedTagKeyword(tag.Value) { + case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: + if len(tokens) <= i+1 { + ret = append(ret, tk) + continue + } + if tk.Line() != tokens[i+1].Line() { + ret = append(ret, tk) + continue + } + if tokens[i+1].GroupType() == TokenGroupAnchorName { + ret = append(ret, tk) + continue + } + if isScalarType(tokens[i+1]) { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupScalarTag, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + } else { + ret = append(ret, tk) + } + default: + ret = append(ret, tk) + } + } else { if len(tokens) <= i+1 { ret = append(ret, tk) continue @@ -367,19 +400,13 @@ func createScalarTagTokenGroups(tokens []*Token) []*Token { ret = append(ret, tk) continue } - if isScalarType(tokens[i+1]) { - ret = append(ret, &Token{ - Group: &TokenGroup{ - Type: TokenGroupScalarTag, - Tokens: []*Token{tk, tokens[i+1]}, - }, - }) - i++ - } else { - ret = append(ret, tk) - } - default: - ret = append(ret, tk) + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupScalarTag, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ } } return ret @@ -522,22 +549,34 @@ func createDirectiveTokenGroups(tokens []*Token) ([]*Token, error) { if i+1 >= len(tokens) { return nil, errors.ErrSyntax("undefined directive value", tk.RawToken()) } - if i+2 >= len(tokens) { - return nil, errors.ErrSyntax("unexpected directive value. document not started", tk.RawToken()) - } - if tokens[i+2].Type() != token.DocumentHeaderType { - return nil, errors.ErrSyntax("unexpected directive value. document not started", tk.RawToken()) - } - if tk.Line() != tokens[i+1].Line() { - return nil, errors.ErrSyntax("undefined directive value", tk.RawToken()) - } - ret = append(ret, &Token{ + directiveName := &Token{ Group: &TokenGroup{ - Type: TokenGroupDirective, + Type: TokenGroupDirectiveName, Tokens: []*Token{tk, tokens[i+1]}, }, - }) + } i++ + var valueTks []*Token + for j := i + 1; j < len(tokens); j++ { + if tokens[j].Line() != tk.Line() { + break + } + valueTks = append(valueTks, tokens[j]) + i++ + } + if i+1 >= len(tokens) || tokens[i+1].Type() != token.DocumentHeaderType { + return nil, errors.ErrSyntax("unexpected directive value. document not started", tk.RawToken()) + } + if len(valueTks) != 0 { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDirective, + Tokens: append([]*Token{directiveName}, valueTks...), + }, + }) + } else { + ret = append(ret, directiveName) + } default: ret = append(ret, tk) } diff --git a/scanner/scanner.go b/scanner/scanner.go index e00669c..8da8123 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -615,7 +615,7 @@ func (s *Scanner) scanWhiteSpace(ctx *Context) bool { if ctx.isDocument() { return false } - if !s.isAnchor && !s.isAlias && !s.isFirstCharAtLine { + if !s.isAnchor && !s.isDirective && !s.isAlias && !s.isFirstCharAtLine { return false } @@ -624,6 +624,12 @@ func (s *Scanner) scanWhiteSpace(ctx *Context) bool { ctx.addOriginBuf(' ') return true } + if s.isDirective { + s.addBufferedTokenIfExists(ctx) + s.progressColumn(ctx, 1) + ctx.addOriginBuf(' ') + return true + } s.addBufferedTokenIfExists(ctx) s.isAnchor = false @@ -656,7 +662,7 @@ func (s *Scanner) isMergeKey(ctx *Context) bool { } func (s *Scanner) scanTag(ctx *Context) bool { - if ctx.existsBuffer() { + if ctx.existsBuffer() || s.isDirective { return false } @@ -1176,7 +1182,9 @@ func (s *Scanner) scanDirective(ctx *Context) bool { return false } - ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos())) + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf('%') + ctx.addToken(token.Directive(string(ctx.obuf), s.pos())) s.progressColumn(ctx, 1) ctx.clear() s.isDirective = true diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 0e1020d..16cdeab 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -18,18 +18,10 @@ var failureTestNames = []string{ "aliases-in-flow-objects", // no json. "aliases-in-explicit-block-mapping", // no json. "block-mapping-with-missing-keys", // no json. - "block-scalar-with-more-spaces-than-first-content-line", "colon-at-the-beginning-of-adjacent-flow-scalar", "comment-without-whitespace-after-doublequoted-scalar", "construct-binary", "dash-in-flow-sequence", - "directive-variants/00", - "directive-variants/01", // pass yamlv3. - "double-quoted-scalar-with-escaped-single-quote", - "duplicate-yaml-directive", // pass yamlv3. - "escaped-slash-in-double-quotes", - "explicit-key-and-value-seperated-by-comment", //nolint: misspell // pass yamlv3. - "extra-words-on-yaml-directive", // pass yamlv3. "empty-implicit-key-in-single-pair-flow-sequences", // no json. "empty-keys-in-block-and-flow-mapping", // no json. "empty-lines-at-end-of-document", // no json. @@ -45,18 +37,15 @@ var failureTestNames = []string{ "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", "invalid-comma-in-tag", - "invalid-tag", // pass yamlv3. - "leading-tabs-in-double-quoted/02", // pass yamlv3. - "leading-tabs-in-double-quoted/05", // pass yamlv3. - "legal-tab-after-indentation", // pass yamlv3. - "literal-block-scalar-with-more-spaces-in-first-line", // pass yamlv3. - "literal-modifers/00", // pass yamlv3. - "literal-modifers/01", // pass yamlv3. - "literal-modifers/02", // pass yamlv3. - "literal-modifers/03", // pass yamlv3. - "literal-scalars", // pass yamlv3. - "mapping-key-and-flow-sequence-item-anchors", // no json. - "multiline-double-quoted-implicit-keys", // pass yamlv3. + "invalid-tag", // pass yamlv3. + "legal-tab-after-indentation", // pass yamlv3. + "literal-modifers/00", // pass yamlv3. + "literal-modifers/01", // pass yamlv3. + "literal-modifers/02", // pass yamlv3. + "literal-modifers/03", // pass yamlv3. + "literal-scalars", // pass yamlv3. + "mapping-key-and-flow-sequence-item-anchors", // no json. + "multiline-double-quoted-implicit-keys", // pass yamlv3. "multiline-plain-flow-mapping-key", "multiline-plain-value-with-tabs-on-empty-lines", // pass yamlv3. "multiline-scalar-at-top-level", // pass yamlv3. @@ -69,7 +58,6 @@ var failureTestNames = []string{ "plain-url-in-flow-mapping", // pass yamlv3. "question-mark-edge-cases/00", // no json. "question-mark-edge-cases/01", // no json. - "scalar-doc-with-in-content/01", // pass yamlv3. "scalar-value-with-two-anchors", // pass yamlv3. "single-character-streams/01", // no json. "single-pair-implicit-entries", // no json. @@ -79,13 +67,9 @@ var failureTestNames = []string{ "spec-example-7-3-completely-empty-flow-nodes", // no json. "spec-example-8-18-implicit-block-mapping-entries", // no json. "spec-example-8-19-compact-block-mappings", // no json. - "spec-example-6-19-secondary-tag-handle", // pass yamlv3. - "spec-example-6-24-verbatim-tags", // pass yamlv3. "spec-example-6-6-line-folding", // pass yamlv3. "spec-example-6-6-line-folding-1-3", // pass yamlv3. - "spec-example-6-8-flow-folding", // pass yamlv3. "spec-example-8-10-folded-lines-8-13-final-empty-lines", // pass yamlv3. - "spec-example-8-17-explicit-block-mapping-entries", // pass yamlv3. "spec-example-8-2-block-indentation-indicator", "spec-example-9-3-bare-documents", "spec-example-9-4-explicit-documents", @@ -97,14 +81,11 @@ var failureTestNames = []string{ "tabs-in-various-contexts/003", "tabs-that-look-like-indentation/00", "tabs-that-look-like-indentation/01", - "tabs-that-look-like-indentation/02", // pass yamlv3. "tabs-that-look-like-indentation/03", "tabs-that-look-like-indentation/04", "tabs-that-look-like-indentation/05", // pass yamlv3. "tabs-that-look-like-indentation/07", - "tabs-that-look-like-indentation/08", // pass yamlv3. "tag-shorthand-used-in-documents-but-only-defined-in-the-first", - "tags-for-block-objects", // pass yamlv3. "tags-on-empty-scalars", // no json. "trailing-line-of-spaces/01", // last '\n' character is needed ? "various-combinations-of-explicit-block-mappings", // no json.