From 6a4fdd3b6bd472b5d6b0214a67a4c0a3c4810040 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 26 Nov 2024 00:20:04 +0900 Subject: [PATCH 1/2] refactor parser --- ast/ast.go | 8 + decode.go | 20 +- parser/color.go | 28 + parser/context.go | 91 ++- parser/node.go | 193 +++++ parser/parser.go | 1523 ++++++++++++++++++--------------------- parser/parser_test.go | 36 +- parser/token.go | 654 +++++++++++++++++ scanner/scanner.go | 12 + yaml_test_suite_test.go | 1 + 10 files changed, 1710 insertions(+), 856 deletions(-) create mode 100644 parser/color.go create mode 100644 parser/node.go create mode 100644 parser/token.go diff --git a/ast/ast.go b/ast/ast.go index 3a16169..52edfdf 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -1762,6 +1762,14 @@ type TagNode struct { Value Node } +func (n *TagNode) GetValue() any { + scalar, ok := n.Value.(ScalarNode) + if !ok { + return nil + } + return scalar.GetValue() +} + func (n *TagNode) stringWithoutComment() string { return n.Value.String() } diff --git a/decode.go b/decode.go index fe990e5..b367ccf 100644 --- a/decode.go +++ b/decode.go @@ -215,6 +215,14 @@ func (d *Decoder) addHeadOrLineCommentToMap(node ast.Node) { } commentPath := node.GetPath() if minCommentLine < targetLine { + switch n := node.(type) { + case *ast.MappingNode: + if len(n.Values) != 0 { + commentPath = n.Values[0].Key.GetPath() + } + case *ast.MappingValueNode: + commentPath = n.Key.GetPath() + } d.addCommentToMap(commentPath, HeadComment(texts...)) } else { d.addCommentToMap(commentPath, LineComment(texts[0])) @@ -255,14 +263,20 @@ func (d *Decoder) addFootCommentToMap(node ast.Node) { ) switch n := node.(type) { case *ast.SequenceNode: - if len(n.Values) != 0 { - footCommentPath = n.Values[len(n.Values)-1].GetPath() - } footComment = n.FootComment + if n.FootComment != nil { + footCommentPath = n.FootComment.GetPath() + } case *ast.MappingNode: footComment = n.FootComment + if n.FootComment != nil { + footCommentPath = n.FootComment.GetPath() + } case *ast.MappingValueNode: footComment = n.FootComment + if n.FootComment != nil { + footCommentPath = n.FootComment.GetPath() + } } if footComment == nil { return diff --git a/parser/color.go b/parser/color.go new file mode 100644 index 0000000..aeee0dc --- /dev/null +++ b/parser/color.go @@ -0,0 +1,28 @@ +package parser + +import "fmt" + +const ( + colorFgHiBlack int = iota + 90 + colorFgHiRed + colorFgHiGreen + colorFgHiYellow + colorFgHiBlue + colorFgHiMagenta + colorFgHiCyan +) + +var colorTable = []int{ + colorFgHiRed, + colorFgHiGreen, + colorFgHiYellow, + colorFgHiBlue, + colorFgHiMagenta, + colorFgHiCyan, +} + +func colorize(idx int, content string) string { + colorIdx := idx % len(colorTable) + color := colorTable[colorIdx] + return fmt.Sprintf("\x1b[1;%dm", color) + content + "\x1b[22;0m" +} diff --git a/parser/context.go b/parser/context.go index 2ed9aad..1ac7f25 100644 --- a/parser/context.go +++ b/parser/context.go @@ -3,13 +3,21 @@ package parser import ( "fmt" "strings" + + "github.com/goccy/go-yaml/token" ) // context context at parsing type context struct { + tokenRef *tokenRef path string isFlow bool - isMapKey bool +} + +type tokenRef struct { + tokens []*Token + size int + idx int } var pathSpecialChars = []string{ @@ -32,6 +40,44 @@ func normalizePath(path string) string { return path } +func (c *context) currentToken() *Token { + if c.tokenRef.idx >= c.tokenRef.size { + return nil + } + return c.tokenRef.tokens[c.tokenRef.idx] +} + +func (c *context) isComment() bool { + return c.currentToken().Type() == token.CommentType +} + +func (c *context) nextToken() *Token { + if c.tokenRef.idx+1 >= c.tokenRef.size { + return nil + } + return c.tokenRef.tokens[c.tokenRef.idx+1] +} + +func (c *context) nextNotCommentToken() *Token { + for i := c.tokenRef.idx + 1; i < c.tokenRef.size; i++ { + tk := c.tokenRef.tokens[i] + if tk.Type() == token.CommentType { + continue + } + return tk + } + return nil +} + +func (c *context) withGroup(g *TokenGroup) *context { + ctx := *c + ctx.tokenRef = &tokenRef{ + tokens: g.Tokens, + size: len(g.Tokens), + } + return &ctx +} + func (c *context) withChild(path string) *context { ctx := *c ctx.path = c.path + "." + normalizePath(path) @@ -50,14 +96,45 @@ func (c *context) withFlow(isFlow bool) *context { return &ctx } -func (c *context) withMapKey() *context { - ctx := *c - ctx.isMapKey = true - return &ctx -} - func newContext() *context { return &context{ path: "$", } } + +func (c *context) goNext() { + ref := c.tokenRef + if ref.size <= ref.idx+1 { + ref.idx = ref.size + } else { + ref.idx++ + } +} + +func (c *context) next() bool { + return c.tokenRef.idx < c.tokenRef.size +} + +func (c *context) insertToken(tk *Token) { + idx := c.tokenRef.idx + if c.tokenRef.size < idx { + return + } + if c.tokenRef.size == idx { + curToken := c.tokenRef.tokens[c.tokenRef.size-1] + tk.RawToken().Next = curToken.RawToken() + curToken.RawToken().Prev = tk.RawToken() + + c.tokenRef.tokens = append(c.tokenRef.tokens, tk) + c.tokenRef.size = len(c.tokenRef.tokens) + return + } + + curToken := c.tokenRef.tokens[idx] + tk.RawToken().Next = curToken.RawToken() + curToken.RawToken().Prev = tk.RawToken() + + c.tokenRef.tokens = append(c.tokenRef.tokens[:idx+1], c.tokenRef.tokens[idx:]...) + c.tokenRef.tokens[idx] = tk + c.tokenRef.size = len(c.tokenRef.tokens) +} diff --git a/parser/node.go b/parser/node.go new file mode 100644 index 0000000..7e872b4 --- /dev/null +++ b/parser/node.go @@ -0,0 +1,193 @@ +package parser + +import ( + "github.com/goccy/go-yaml/ast" + "github.com/goccy/go-yaml/token" +) + +func newMappingNode(ctx *context, tk *Token, isFlow bool, values ...*ast.MappingValueNode) (*ast.MappingNode, error) { + node := ast.Mapping(tk.RawToken(), isFlow, values...) + node.SetPath(ctx.path) + return node, nil +} + +func newMappingValueNode(ctx *context, tk *Token, key ast.MapKeyNode, value ast.Node) (*ast.MappingValueNode, error) { + node := ast.MappingValue(tk.RawToken(), key, value) + node.SetPath(ctx.path) + if key.GetToken().Position.Line == value.GetToken().Position.Line { + // originally key was commented, but now that null value has been added, value must be commented. + if err := setLineComment(ctx, value, tk); err != nil { + return nil, err + } + } else { + if err := setLineComment(ctx, key, tk); err != nil { + return nil, err + } + } + return node, nil +} + +func newMappingKeyNode(ctx *context, tk *Token) (*ast.MappingKeyNode, error) { + node := ast.MappingKey(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newAnchorNode(ctx *context, tk *Token) (*ast.AnchorNode, error) { + node := ast.Anchor(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newAliasNode(ctx *context, tk *Token) (*ast.AliasNode, error) { + node := ast.Alias(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newDirectiveNode(ctx *context, tk *Token) (*ast.DirectiveNode, error) { + node := ast.Directive(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newMergeKeyNode(ctx *context, tk *Token) (*ast.MergeKeyNode, error) { + node := ast.MergeKey(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newNullNode(ctx *context, tk *Token) (*ast.NullNode, error) { + node := ast.Null(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newBoolNode(ctx *context, tk *Token) (*ast.BoolNode, error) { + node := ast.Bool(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newIntegerNode(ctx *context, tk *Token) (*ast.IntegerNode, error) { + node := ast.Integer(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newFloatNode(ctx *context, tk *Token) (*ast.FloatNode, error) { + node := ast.Float(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newInfinityNode(ctx *context, tk *Token) (*ast.InfinityNode, error) { + node := ast.Infinity(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newNanNode(ctx *context, tk *Token) (*ast.NanNode, error) { + node := ast.Nan(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newStringNode(ctx *context, tk *Token) (*ast.StringNode, error) { + node := ast.String(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newLiteralNode(ctx *context, tk *Token) (*ast.LiteralNode, error) { + node := ast.Literal(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newTagNode(ctx *context, tk *Token) (*ast.TagNode, error) { + node := ast.Tag(tk.RawToken()) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func newSequenceNode(ctx *context, tk *Token, isFlow bool) (*ast.SequenceNode, error) { + node := ast.Sequence(tk.RawToken(), isFlow) + node.SetPath(ctx.path) + if err := setLineComment(ctx, node, tk); err != nil { + return nil, err + } + return node, nil +} + +func setLineComment(ctx *context, node ast.Node, tk *Token) error { + if tk.LineComment == nil { + return nil + } + comment := ast.CommentGroup([]*token.Token{tk.LineComment}) + comment.SetPath(ctx.path) + if err := node.SetComment(comment); err != nil { + return err + } + return nil +} + +func setHeadComment(cm *ast.CommentGroupNode, value ast.Node) error { + if cm == nil { + return nil + } + switch n := value.(type) { + case *ast.MappingNode: + if len(n.Values) != 0 && value.GetComment() == nil { + cm.SetPath(n.Values[0].GetPath()) + return n.Values[0].SetComment(cm) + } + case *ast.MappingValueNode: + cm.SetPath(n.GetPath()) + return n.SetComment(cm) + } + cm.SetPath(value.GetPath()) + return value.SetComment(cm) +} diff --git a/parser/parser.go b/parser/parser.go index 715a381..247c750 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -11,15 +11,59 @@ import ( "github.com/goccy/go-yaml/token" ) +type Mode uint + +const ( + ParseComments Mode = 1 << iota // parse comments and add them to AST +) + +// ParseBytes parse from byte slice, and returns ast.File +func ParseBytes(bytes []byte, mode Mode, opts ...Option) (*ast.File, error) { + tokens := lexer.Tokenize(string(bytes)) + f, err := Parse(tokens, mode, opts...) + if err != nil { + return nil, err + } + return f, nil +} + +// Parse parse from token instances, and returns ast.File +func Parse(tokens token.Tokens, mode Mode, opts ...Option) (*ast.File, error) { + if tk := tokens.InvalidToken(); tk != nil { + return nil, errors.ErrSyntax(tk.Error, tk) + } + p, err := newParser(tokens, mode, opts) + if err != nil { + return nil, err + } + f, err := p.parse(newContext()) + if err != nil { + return nil, err + } + return f, nil +} + +// Parse parse from filename, and returns ast.File +func ParseFile(filename string, mode Mode, opts ...Option) (*ast.File, error) { + file, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + f, err := ParseBytes(file, mode, opts...) + if err != nil { + return nil, err + } + f.Name = filename + return f, nil +} + type parser struct { - idx int - size int - tokens token.Tokens + tokens []*Token pathMap map[string]ast.Node allowDuplicateMapKey bool } -func newParser(tokens token.Tokens, mode Mode, opts []Option) *parser { +func newParser(tokens token.Tokens, mode Mode, opts []Option) (*parser, error) { filteredTokens := []*token.Token{} if mode&ParseComments != 0 { filteredTokens = tokens @@ -33,670 +77,612 @@ func newParser(tokens token.Tokens, mode Mode, opts []Option) *parser { filteredTokens = append(filteredTokens, tk) } } + tks, err := createGroupedTokens(token.Tokens(filteredTokens)) + if err != nil { + return nil, err + } p := &parser{ - idx: 0, - size: len(filteredTokens), - tokens: token.Tokens(filteredTokens), + tokens: tks, pathMap: make(map[string]ast.Node), } for _, opt := range opts { opt(p) } - return p + return p, nil } -func (p *parser) next() bool { - return p.idx < p.size -} - -func (p *parser) previousToken() *token.Token { - if p.idx > 0 { - return p.tokens[p.idx-1] +func (p *parser) parse(ctx *context) (*ast.File, error) { + file := &ast.File{Docs: []*ast.DocumentNode{}} + for _, token := range p.tokens { + doc, err := p.parseDocument(ctx, token.Group) + if err != nil { + return nil, err + } + file.Docs = append(file.Docs, doc) } - return nil + return file, nil } -func (p *parser) insertToken(idx int, tk *token.Token) { - if p.size < idx { - return - } - if p.size == idx { - curToken := p.tokens[p.size-1] - tk.Next = curToken - curToken.Prev = tk - - p.tokens = append(p.tokens, tk) - p.size = len(p.tokens) - return +func (p *parser) parseDocument(ctx *context, docGroup *TokenGroup) (*ast.DocumentNode, error) { + if len(docGroup.Tokens) == 0 { + return ast.Document(docGroup.RawToken(), nil), nil } - curToken := p.tokens[idx] - tk.Next = curToken - curToken.Prev = tk + p.pathMap = make(map[string]ast.Node) - p.tokens = append(p.tokens[:idx+1], p.tokens[idx:]...) - p.tokens[idx] = tk - p.size = len(p.tokens) -} + var ( + tokens = docGroup.Tokens + start *token.Token + end *token.Token + ) + if docGroup.First().Type() == token.DocumentHeaderType { + start = docGroup.First().RawToken() + tokens = tokens[1:] + } + if docGroup.Last().Type() == token.DocumentEndType { + end = docGroup.Last().RawToken() + tokens = tokens[:len(tokens)-1] + } -func (p *parser) currentToken() *token.Token { - if p.idx >= p.size { - return nil + if len(tokens) == 0 { + return ast.Document(docGroup.RawToken(), nil), nil } - return p.tokens[p.idx] -} -func (p *parser) nextToken() *token.Token { - if p.idx+1 >= p.size { - return nil + body, err := p.parseDocumentBody(ctx.withGroup(&TokenGroup{ + Type: TokenGroupDocumentBody, + Tokens: tokens, + })) + if err != nil { + return nil, err } - return p.tokens[p.idx+1] + node := ast.Document(start, body) + node.End = end + return node, nil } -func (p *parser) nextNotCommentToken() *token.Token { - for i := p.idx + 1; i < p.size; i++ { - tk := p.tokens[i] - if tk.Type == token.CommentType { - continue - } - return tk +func (p *parser) parseDocumentBody(ctx *context) (ast.Node, error) { + node, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err } - return nil + if ctx.next() { + return nil, errors.ErrSyntax("value is not allowed in this context", ctx.currentToken().RawToken()) + } + return node, nil } -func (p *parser) afterNextNotCommentToken() *token.Token { - notCommentTokenCount := 0 - for i := p.idx + 1; i < p.size; i++ { - tk := p.tokens[i] - if tk.Type == token.CommentType { - continue +func (p *parser) parseToken(ctx *context, tk *Token) (ast.Node, error) { + switch tk.GroupType() { + case TokenGroupMapKey, TokenGroupMapKeyValue: + return p.parseMap(ctx) + case TokenGroupDirective: + node, err := p.parseDirective(ctx.withGroup(tk.Group), tk.Group) + if err != nil { + return nil, err + } + ctx.goNext() + return node, nil + case TokenGroupAnchor: + node, err := p.parseAnchor(ctx.withGroup(tk.Group), tk.Group) + if err != nil { + return nil, err + } + ctx.goNext() + return node, nil + case TokenGroupAnchorName: + anchor, err := p.parseAnchorName(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err + } + ctx.goNext() + value, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + anchor.Value = value + return anchor, nil + case TokenGroupAlias: + node, err := p.parseAlias(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err } - notCommentTokenCount++ - if notCommentTokenCount == 2 { - return tk + ctx.goNext() + return node, nil + case TokenGroupLiteral, TokenGroupFolded: + node, err := p.parseLiteral(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err + } + ctx.goNext() + return node, nil + case TokenGroupScalarTag: + node, err := p.parseTag(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err } + ctx.goNext() + return node, nil } - return nil -} - -func (p *parser) isCurrentCommentToken() bool { - tk := p.currentToken() - if tk == nil { - return false + switch tk.Type() { + case token.CommentType: + return p.parseComment(ctx) + case token.TagType: + return p.parseTag(ctx) + case token.MappingStartType: + return p.parseFlowMap(ctx) + case token.SequenceStartType: + return p.parseFlowSequence(ctx) + case token.SequenceEntryType: + return p.parseSequence(ctx) + case token.SequenceEndType: + // SequenceEndType is always validated in parseFlowSequence. + // Therefore, if this is found in other cases, it is treated as a syntax error. + return nil, errors.ErrSyntax("could not find '[' character corresponding to ']'", tk.RawToken()) + case token.MappingEndType: + // MappingEndType is always validated in parseFlowMap. + // Therefore, if this is found in other cases, it is treated as a syntax error. + return nil, errors.ErrSyntax("could not find '{' character corresponding to '}'", tk.RawToken()) + case token.MappingValueType: + return nil, errors.ErrSyntax("found an invalid key for this map", tk.RawToken()) } - return tk.Type == token.CommentType -} - -func (p *parser) progressIgnoreComment(num int) { - if p.size <= p.idx+num { - p.idx = p.size - } else { - p.idx += num + node, err := p.parseScalarValue(ctx, tk) + if err != nil { + return nil, err } + ctx.goNext() + return node, nil } -func (p *parser) progress(num int) { - if p.isCurrentCommentToken() { - return +func (p *parser) parseScalarValue(ctx *context, tk *Token) (ast.ScalarNode, error) { + if tk.Group != nil { + switch tk.GroupType() { + case TokenGroupAnchor: + return p.parseAnchor(ctx.withGroup(tk.Group), tk.Group) + case TokenGroupAnchorName: + anchor, err := p.parseAnchorName(ctx.withGroup(tk.Group)) + if err != nil { + return nil, err + } + ctx.goNext() + value, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + anchor.Value = value + return anchor, nil + case TokenGroupAlias: + return p.parseAlias(ctx.withGroup(tk.Group)) + case TokenGroupLiteral, TokenGroupFolded: + return p.parseLiteral(ctx.withGroup(tk.Group)) + case TokenGroupScalarTag: + return p.parseTag(ctx.withGroup(tk.Group)) + default: + return nil, errors.ErrSyntax("unexpected scalar value", tk.RawToken()) + } + } + switch tk.Type() { + case token.MergeKeyType: + return newMergeKeyNode(ctx, tk) + case token.NullType: + return newNullNode(ctx, tk) + case token.BoolType: + return newBoolNode(ctx, tk) + case token.IntegerType, token.BinaryIntegerType, token.OctetIntegerType, token.HexIntegerType: + return newIntegerNode(ctx, tk) + case token.FloatType: + return newFloatNode(ctx, tk) + case token.InfinityType: + return newInfinityNode(ctx, tk) + case token.NanType: + return newNanNode(ctx, tk) + case token.StringType, token.SingleQuoteType, token.DoubleQuoteType: + return newStringNode(ctx, tk) } - p.progressIgnoreComment(num) + return nil, errors.ErrSyntax("unexpected scalar value type", tk.RawToken()) } -func (p *parser) parseMapping(ctx *context) (*ast.MappingNode, error) { - mapTk := p.currentToken() - node := ast.Mapping(mapTk, true) - node.SetPath(ctx.path) - p.progress(1) // skip MappingStart token +func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) { + node, err := newMappingNode(ctx, ctx.currentToken(), true) + if err != nil { + return nil, err + } + ctx.goNext() // skip MappingStart token isFirst := true - for p.next() { - tk := p.currentToken() - if tk.Type == token.MappingEndType { - node.End = tk + for ctx.next() { + tk := ctx.currentToken() + if tk.Type() == token.MappingEndType { + node.End = tk.RawToken() break - } else if tk.Type == token.CollectEntryType { - p.progress(1) + } + + if tk.Type() == token.CollectEntryType { + ctx.goNext() } else if !isFirst { - return nil, errors.ErrSyntax("',' or '}' must be specified", tk) + return nil, errors.ErrSyntax("',' or '}' must be specified", tk.RawToken()) } - if tk := p.currentToken(); tk != nil && tk.Type == token.MappingEndType { + if tk := ctx.currentToken(); tk.Type() == token.MappingEndType { // this case is here: "{ elem, }". // In this case, ignore the last element and break mapping parsing. - node.End = tk + node.End = tk.RawToken() break } - value, err := p.parseMappingValue(ctx.withFlow(true)) - if err != nil { - return nil, err - } - mvnode, ok := value.(*ast.MappingValueNode) - if !ok { - return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken()) + mapKeyTk := ctx.currentToken() + switch mapKeyTk.GroupType() { + case TokenGroupMapKeyValue: + value, err := p.parseMapKeyValue(ctx.withGroup(mapKeyTk.Group), mapKeyTk.Group) + if err != nil { + return nil, err + } + node.Values = append(node.Values, value) + ctx.goNext() + case TokenGroupMapKey: + key, err := p.parseMapKey(ctx.withGroup(mapKeyTk.Group), mapKeyTk.Group) + if err != nil { + return nil, err + } + colonTk := mapKeyTk.Group.Last() + if p.isFlowMapDelim(ctx.nextToken()) { + nullToken := p.createNullToken(colonTk) + ctx.insertToken(nullToken) + value, err := newNullNode(ctx, nullToken) + if err != nil { + return nil, err + } + mapValue, err := newMappingValueNode(ctx, colonTk, key, value) + if err != nil { + return nil, err + } + ctx.goNext() + node.Values = append(node.Values, mapValue) + ctx.goNext() + } else { + ctx.goNext() + value, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + mapValue, err := newMappingValueNode(ctx, colonTk, key, value) + if err != nil { + return nil, err + } + node.Values = append(node.Values, mapValue) + } + default: + if !p.isFlowMapDelim(ctx.nextToken()) { + return nil, errors.ErrSyntax("could not find flow map content", mapKeyTk.RawToken()) + } + key, err := p.parseScalarValue(ctx, mapKeyTk) + if err != nil { + return nil, err + } + nullToken := p.createNullToken(mapKeyTk) + ctx.insertToken(nullToken) + value, err := newNullNode(ctx, nullToken) + if err != nil { + return nil, err + } + ctx.goNext() + mapValue, err := newMappingValueNode(ctx, mapKeyTk, key, value) + if err != nil { + return nil, err + } + node.Values = append(node.Values, mapValue) + ctx.goNext() } - node.Values = append(node.Values, mvnode) - p.progress(1) isFirst = false } - if node.End == nil || node.End.Type != token.MappingEndType { + if node.End == nil { return nil, errors.ErrSyntax("could not find flow mapping end token '}'", node.Start) } + ctx.goNext() // skip mapping end token. return node, nil } -func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) { - node := ast.Sequence(p.currentToken(), true) - node.SetPath(ctx.path) - p.progress(1) // skip SequenceStart token +func (p *parser) isFlowMapDelim(tk *Token) bool { + return tk.Type() == token.MappingEndType || tk.Type() == token.CollectEntryType +} - isFirst := true - for p.next() { - tk := p.currentToken() - if tk.Type == token.SequenceEndType { - node.End = tk - break - } else if tk.Type == token.CollectEntryType { - p.progress(1) - } else if !isFirst { - return nil, errors.ErrSyntax("',' or ']' must be specified", tk) +func (p *parser) parseMap(ctx *context) (*ast.MappingNode, error) { + keyTk := ctx.currentToken() + if keyTk.Group == nil { + return nil, errors.ErrSyntax("unexpected map key", keyTk.RawToken()) + } + var keyValueNode *ast.MappingValueNode + if keyTk.GroupType() == TokenGroupMapKeyValue { + node, err := p.parseMapKeyValue(ctx.withGroup(keyTk.Group), keyTk.Group) + if err != nil { + return nil, err } - - if tk := p.currentToken(); tk != nil && tk.Type == token.SequenceEndType { - // this case is here: "[ elem, ]". - // In this case, ignore the last element and break sequence parsing. - node.End = tk - break + keyValueNode = node + ctx.goNext() + if err := p.validateMapKeyValueNextToken(ctx, keyTk, ctx.currentToken()); err != nil { + return nil, err } - - value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))).withFlow(true), p.currentToken()) + } else { + key, err := p.parseMapKey(ctx.withGroup(keyTk.Group), keyTk.Group) if err != nil { return nil, err } - node.Values = append(node.Values, value) - p.progress(1) - isFirst = false - } - if node.End == nil || node.End.Type != token.SequenceEndType { - return nil, errors.ErrSyntax("sequence end token ']' not found", node.Start) - } - return node, nil -} + ctx.goNext() -func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { - tagToken := p.currentToken() - node := ast.Tag(tagToken) - node.SetPath(ctx.path) - p.progress(1) // skip tag token - var ( - value ast.Node - err error - ) - switch token.ReservedTagKeyword(tagToken.Value) { - case token.MappingTag, token.OrderedMapTag: - tk := p.currentToken() - if tk.Type == token.CommentType { - tk = p.nextNotCommentToken() - } - if tk != nil && tk.Type == token.MappingStartType { - value, err = p.parseMapping(ctx) - } else { - value, err = p.parseMappingValue(ctx) + valueTk := ctx.currentToken() + if keyTk.Line() == valueTk.Line() && valueTk.Type() == token.SequenceEntryType { + return nil, errors.ErrSyntax("block sequence entries are not allowed in this context", valueTk.RawToken()) } - case token.IntegerTag, - token.FloatTag, - token.StringTag, - token.BinaryTag, - token.TimestampTag, - token.BooleanTag, - token.NullTag: - typ := p.currentToken().Type - if typ == token.LiteralType || typ == token.FoldedType { - value, err = p.parseLiteral(ctx) - } else { - value, err = p.parseScalarValueWithComment(ctx, p.currentToken()) + ctx := ctx.withChild(p.mapKeyText(key)) + value, err := p.parseMapValue(ctx, key, keyTk.Group.Last()) + if err != nil { + return nil, err } - case token.SequenceTag, - token.SetTag: - err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken) - default: - if strings.HasPrefix(tagToken.Value, "!!") { - err = errors.ErrSyntax(fmt.Sprintf("unknown secondary tag name %q specified", tagToken.Value), tagToken) - } else { - value, err = p.parseToken(ctx, p.currentToken()) + node, err := newMappingValueNode(ctx, keyTk.Group.Last(), key, value) + if err != nil { + return nil, err } + keyValueNode = node } + mapNode, err := newMappingNode(ctx, &Token{Token: keyValueNode.GetToken()}, false, keyValueNode) if err != nil { return nil, err } - node.Value = value - return node, nil -} - -func (p *parser) removeLeftSideNewLineCharacter(src string) string { - // CR or LF or CRLF - return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") -} - -func (p *parser) existsNewLineCharacter(src string) bool { - if strings.Index(src, "\n") > 0 { - return true - } - if strings.Index(src, "\r") > 0 { - return true - } - return false -} - -func (p *parser) validateMapKey(tk *token.Token, keyPath string) error { - if !p.allowDuplicateMapKey { - if n, exists := p.pathMap[keyPath]; exists { - pos := n.GetToken().Position - return errors.ErrSyntax( - fmt.Sprintf("mapping key %q already defined at [%d:%d]", tk.Value, pos.Line, pos.Column), - tk, - ) + var tk *Token + if ctx.isComment() { + tk = ctx.nextNotCommentToken() + } else { + tk = ctx.currentToken() + } + for tk.Column() == keyTk.Column() { + if !p.isMapToken(tk) { + return nil, errors.ErrSyntax("non-map value is specified", tk.RawToken()) + } + cm := p.parseHeadComment(ctx) + if tk.Type() == token.MappingEndType { + // a: { + // b: c + // } <= + ctx.goNext() + break } - } - if tk.Type != token.StringType { - return nil - } - origin := p.removeLeftSideNewLineCharacter(tk.Origin) - if p.existsNewLineCharacter(origin) { - return errors.ErrSyntax("unexpected key name", tk) - } - return nil -} - -func (p *parser) createNullToken(base *token.Token) *token.Token { - pos := *(base.Position) - pos.Column++ - return token.New("null", "null", &pos) -} - -func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonToken *token.Token) (ast.Node, error) { - node, err := p.createMapValueNode(ctx, key, colonToken) - if err != nil { - return nil, err - } - if node != nil && node.GetPath() == "" { - node.SetPath(ctx.path) - } - return node, nil -} - -func (p *parser) createMapValueNode(ctx *context, key ast.MapKeyNode, colonToken *token.Token) (ast.Node, error) { - tk := p.currentToken() - if tk == nil { - nullToken := p.createNullToken(colonToken) - p.insertToken(p.idx, nullToken) - return ast.Null(nullToken), nil - } else if tk.Type == token.CollectEntryType { - // implicit null value. - return ast.Null(tk), nil - } - var comment *ast.CommentGroupNode - if tk.Type == token.CommentType { - comment = p.parseCommentOnly(ctx) - if comment != nil { - comment.SetPath(ctx.withChild(p.mapKeyText(key)).path) + node, err := p.parseMap(ctx) + if err != nil { + return nil, err } - tk = p.currentToken() - } - if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType { - // in this case, - // ---- - // key: - // next - - nullToken := p.createNullToken(colonToken) - p.insertToken(p.idx, nullToken) - nullNode := ast.Null(nullToken) - - if comment != nil { - _ = nullNode.SetComment(comment) - } else { - // If there is a comment, it is already bound to the key node, - // so remove the comment from the key to bind it to the null value. - keyComment := key.GetComment() - if keyComment != nil { - if err := key.SetComment(nil); err != nil { - return nil, err - } - _ = nullNode.SetComment(keyComment) + if len(node.Values) != 0 { + if err := setHeadComment(cm, node.Values[0]); err != nil { + return nil, err } } - return nullNode, nil - } - - if tk.Position.Column < key.GetToken().Position.Column { - // in this case, - // ---- - // key: - // next - nullToken := p.createNullToken(colonToken) - p.insertToken(p.idx, nullToken) - nullNode := ast.Null(nullToken) - if comment != nil { - _ = nullNode.SetComment(comment) + mapNode.Values = append(mapNode.Values, node.Values...) + if node.FootComment != nil { + mapNode.Values[len(mapNode.Values)-1].FootComment = node.FootComment } - return nullNode, nil - } - - value, err := p.parseToken(ctx, p.currentToken()) - if err != nil { - return nil, err + tk = ctx.currentToken() } - if comment != nil { - nextLineComment := key.GetToken().Position.Line < comment.GetToken().Position.Line - if n, ok := value.(*ast.MappingNode); ok && nextLineComment && len(n.Values) > 1 { - _ = n.Values[0].SetComment(comment) - } else { - _ = value.SetComment(comment) + if ctx.isComment() { + if keyTk.Column() <= ctx.currentToken().Column() { + // If the comment is in the same or deeper column as the last element column in map value, + // treat it as a footer comment for the last element. + if len(mapNode.Values) == 1 { + mapNode.Values[0].FootComment = p.parseFootComment(ctx, keyTk.Column()) + mapNode.Values[0].FootComment.SetPath(mapNode.Values[0].Key.GetPath()) + } else { + mapNode.FootComment = p.parseFootComment(ctx, keyTk.Column()) + mapNode.FootComment.SetPath(mapNode.GetPath()) + } } } - return value, nil + return mapNode, nil } -func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error { - keyTk := key.GetToken() - valueTk := value.GetToken() - - if keyTk.Position.Line == valueTk.Position.Line && valueTk.Type == token.SequenceEntryType { - return errors.ErrSyntax("block sequence entries are not allowed in this context", valueTk) - } - if keyTk.Position.Column != valueTk.Position.Column { +func (p *parser) validateMapKeyValueNextToken(ctx *context, keyTk, tk *Token) error { + if tk.Column() <= keyTk.Column() { return nil } - if value.Type() != ast.StringType { + if ctx.isFlow && tk.Type() == token.CollectEntryType { return nil } - ntk := p.nextToken() - if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) { - return errors.ErrSyntax("could not find expected ':' token", valueTk) - } - return nil + // a: b + // c <= this token is invalid. + return errors.ErrSyntax("value is not allowed in this context. map key-value is pre-defined", tk.RawToken()) } -func (p *parser) mapKeyText(n ast.Node) string { - switch nn := n.(type) { - case *ast.MappingKeyNode: - return p.mapKeyText(nn.Value) - case *ast.TagNode: - return p.mapKeyText(nn.Value) +func (p *parser) isMapToken(tk *Token) bool { + if tk.Group == nil { + return tk.Type() == token.MappingStartType || tk.Type() == token.MappingEndType } - return n.GetToken().Value + g := tk.Group + return g.Type == TokenGroupMapKey || g.Type == TokenGroupMapKeyValue } -func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { - key, err := p.parseMapKey(ctx.withMapKey()) +func (p *parser) parseMapKeyValue(ctx *context, g *TokenGroup) (*ast.MappingValueNode, error) { + if g.Type != TokenGroupMapKeyValue { + return nil, errors.ErrSyntax("unexpected map key-value pair", g.RawToken()) + } + if g.First().Group == nil { + return nil, errors.ErrSyntax("unexpected map key", g.RawToken()) + } + keyGroup := g.First().Group + key, err := p.parseMapKey(ctx.withGroup(keyGroup), keyGroup) if err != nil { return nil, err } - keyText := p.mapKeyText(key) - keyPath := ctx.withChild(keyText).path - key.SetPath(keyPath) - if err := p.validateMapKey(key.GetToken(), keyPath); err != nil { + value, err := p.parseToken(ctx.withChild(p.mapKeyText(key)), g.Last()) + if err != nil { return nil, err } - p.pathMap[keyPath] = key - p.progress(1) // progress to mapping value token - if ctx.isFlow { - // if "{key}" or "{key," style, returns MappingValueNode. - node, err := p.parseFlowMapNullValue(ctx, key) + return newMappingValueNode(ctx, keyGroup.Last(), key, value) +} + +func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error) { + if g.Type != TokenGroupMapKey { + return nil, errors.ErrSyntax("unexpected map key", g.RawToken()) + } + if g.Last().Type() != token.MappingValueType { + return nil, errors.ErrSyntax("expected map key-value delimiter ':'", g.Last().RawToken()) + } + if g.First().Type() == token.MappingKeyType { + mapKeyTk := g.First() + ctx := ctx.withGroup(mapKeyTk.Group) + key, err := newMappingKeyNode(ctx, mapKeyTk) if err != nil { return nil, err } - if node != nil { - return node, nil - } - } - tk := p.currentToken() // get mapping value (':') token. - if tk == nil { - return nil, errors.ErrSyntax("unexpected map", key.GetToken()) - } - p.progress(1) // progress to value token - if ctx.isFlow { - // if "{key:}" or "{key:," style, returns MappingValueNode. - node, err := p.parseFlowMapNullValue(ctx, key) + ctx.goNext() // skip mapping key token + + scalar, err := p.parseScalarValue(ctx, ctx.currentToken()) if err != nil { return nil, err } - if node != nil { - return node, nil + key.Value = scalar + keyText := p.mapKeyText(scalar) + keyPath := ctx.withChild(keyText).path + key.SetPath(keyPath) + if err := p.validateMapKey(key.GetToken(), keyPath); err != nil { + return nil, err } - } - if err := p.setSameLineCommentIfExists(ctx.withChild(keyText), key); err != nil { - return nil, err - } - if key.GetComment() != nil { - // if current token is comment, GetComment() is not nil. - // then progress to value token - p.progressIgnoreComment(1) + p.pathMap[keyPath] = key + return key, nil } - value, err := p.parseMapValue(ctx.withChild(keyText), key, tk) + scalar, err := p.parseScalarValue(ctx, g.First()) if err != nil { return nil, err } - if err := p.validateMapValue(ctx, key, value); err != nil { - return nil, err - } - - mvnode := ast.MappingValue(tk, key, value) - mvnode.SetPath(ctx.withChild(keyText).path) - node := ast.Mapping(tk, false, mvnode) - node.SetPath(ctx.withChild(keyText).path) - - ntk := p.nextNotCommentToken() - antk := p.afterNextNotCommentToken() - for ntk != nil && ntk.Position.Column == key.GetToken().Position.Column { - if ntk.Type == token.DocumentHeaderType || ntk.Type == token.DocumentEndType { - break - } - if antk == nil { - return nil, errors.ErrSyntax("required ':' and map value", ntk) - } - p.progressIgnoreComment(1) - var comment *ast.CommentGroupNode - if tk := p.currentToken(); tk.Type == token.CommentType { - comment = p.parseCommentOnly(ctx) - } - value, err := p.parseMappingValue(ctx) - if err != nil { - return nil, err - } - if comment != nil { - comment.SetPath(value.GetPath()) - if err := value.SetComment(comment); err != nil { - return nil, err - } - } - switch v := value.(type) { - case *ast.MappingNode: - comment := v.GetComment() - for idx, val := range v.Values { - if idx == 0 && comment != nil { - if err := val.SetComment(comment); err != nil { - return nil, err - } - } - node.Values = append(node.Values, val) - } - case *ast.MappingValueNode: - node.Values = append(node.Values, v) - case *ast.AnchorNode: - switch anchorV := v.Value.(type) { - case *ast.MappingNode: - comment := anchorV.GetComment() - for idx, val := range anchorV.Values { - if idx == 0 && comment != nil { - if err := val.SetComment(comment); err != nil { - return nil, err - } - } - val.Anchor = v - node.Values = append(node.Values, val) - } - case *ast.MappingValueNode: - anchorV.Anchor = v - node.Values = append(node.Values, anchorV) - default: - return nil, fmt.Errorf("failed to parse mapping value node. anchor node is %s", anchorV.Type()) - } - default: - return nil, fmt.Errorf("failed to parse mapping value node. node is %s", value.Type()) - } - ntk = p.nextNotCommentToken() - antk = p.afterNextNotCommentToken() + key, ok := scalar.(ast.MapKeyNode) + if !ok { + return nil, errors.ErrSyntax("cannot take map-key node", scalar.GetToken()) } - if err := p.validateMapNextToken(ctx, node); err != nil { + keyText := p.mapKeyText(key) + keyPath := ctx.withChild(keyText).path + key.SetPath(keyPath) + if err := p.validateMapKey(key.GetToken(), keyPath); err != nil { return nil, err } - if len(node.Values) == 1 { - mapKeyCol := mvnode.Key.GetToken().Position.Column - commentTk := p.nextToken() - if commentTk != nil && commentTk.Type == token.CommentType && mapKeyCol <= commentTk.Position.Column { - // If the comment is in the same or deeper column as the last element column in map value, - // treat it as a footer comment for the last element. - comment := p.parseFootComment(ctx, mapKeyCol) - mvnode.FootComment = comment + p.pathMap[keyPath] = key + return key, nil +} + +func (p *parser) validateMapKey(tk *token.Token, keyPath string) error { + if !p.allowDuplicateMapKey { + if n, exists := p.pathMap[keyPath]; exists { + pos := n.GetToken().Position + return errors.ErrSyntax( + fmt.Sprintf("mapping key %q already defined at [%d:%d]", tk.Value, pos.Line, pos.Column), + tk, + ) } - return mvnode, nil } - mapCol := node.GetToken().Position.Column - commentTk := p.nextToken() - if commentTk != nil && commentTk.Type == token.CommentType && mapCol <= commentTk.Position.Column { - // If the comment is in the same or deeper column as the last element column in map value, - // treat it as a footer comment for the last element. - comment := p.parseFootComment(ctx, mapCol) - node.FootComment = comment + if tk.Type != token.StringType { + return nil } - return node, nil + origin := p.removeLeftSideNewLineCharacter(tk.Origin) + if p.existsNewLineCharacter(origin) { + return errors.ErrSyntax("unexpected key name", tk) + } + return nil +} + +func (p *parser) removeLeftSideNewLineCharacter(src string) string { + // CR or LF or CRLF + return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") } -func (p *parser) validateMapNextToken(ctx *context, node *ast.MappingNode) error { - keyTk := node.Start - if len(node.Values) != 0 { - keyTk = node.Values[len(node.Values)-1].Key.GetToken() +func (p *parser) existsNewLineCharacter(src string) bool { + if strings.Index(src, "\n") > 0 { + return true } - tk := p.nextNotCommentToken() - if tk == nil { - return nil + if strings.Index(src, "\r") > 0 { + return true } + return false +} - if ctx.isFlow && (tk.Type == token.CollectEntryType || tk.Type == token.SequenceEndType || tk.Type == token.MappingEndType) { - // a: { - // key: value - // } , <= if context is flow mode, "," or "]" or "}" is allowed. - return nil +func (p *parser) mapKeyText(n ast.Node) string { + if n == nil { + return "" } - - if tk.Position.Line > keyTk.Position.Line && tk.Position.Column > keyTk.Position.Column { - // a: b - // c <= this token is invalid. - return errors.ErrSyntax("value is not allowed in this context", tk) + switch nn := n.(type) { + case *ast.MappingKeyNode: + return p.mapKeyText(nn.Value) + case *ast.TagNode: + return p.mapKeyText(nn.Value) + case *ast.AnchorNode: + return p.mapKeyText(nn.Value) + case *ast.AliasNode: + return p.mapKeyText(nn.Value) } - return nil + return n.GetToken().Value } -func (p *parser) parseFlowMapNullValue(ctx *context, key ast.MapKeyNode) (*ast.MappingValueNode, error) { - tk := p.currentToken() +func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) (ast.Node, error) { + tk := ctx.currentToken() if tk == nil { - return nil, errors.ErrSyntax("unexpected map", key.GetToken()) - } - if tk.Type != token.MappingEndType && tk.Type != token.CollectEntryType { - return nil, nil + nullToken := p.createNullToken(colonTk) + ctx.insertToken(nullToken) + nullNode, err := newNullNode(ctx, nullToken) + if err != nil { + return nil, err + } + ctx.goNext() + return nullNode, nil + } else if tk.Type() == token.CollectEntryType { + // implicit null value. + return newNullNode(ctx, tk) } - nullTk := p.createNullToken(tk) - p.insertToken(p.idx, nullTk) - value, err := p.parseToken(ctx, nullTk) - if err != nil { - return nil, err + + if ctx.isComment() { + tk = ctx.nextNotCommentToken() } - node := ast.MappingValue(tk, key, value) - node.SetPath(ctx.withChild(p.mapKeyText(key)).path) - return node, nil -} -func (p *parser) parseSequenceEntry(ctx *context) (*ast.SequenceNode, error) { - tk := p.currentToken() - sequenceNode := ast.Sequence(tk, false) - sequenceNode.SetPath(ctx.path) - curColumn := tk.Position.Column - for tk.Type == token.SequenceEntryType { - p.progress(1) // skip sequence token - entryTk := tk - tk = p.currentToken() - if tk == nil { - sequenceNode.Values = append(sequenceNode.Values, ast.Null(p.createNullToken(entryTk))) - break - } - var comment *ast.CommentGroupNode - if tk.Type == token.CommentType { - comment = p.parseCommentOnly(ctx) - tk = p.currentToken() - if tk.Type == token.SequenceEntryType { - p.progress(1) // skip sequence token - } - } - value, err := p.parseToken(ctx.withIndex(uint(len(sequenceNode.Values))), p.currentToken()) + if tk.Column() == key.GetToken().Position.Column && p.isMapToken(tk) { + // in this case, + // ---- + // key: + // next + + nullToken := p.createNullToken(colonTk) + ctx.insertToken(nullToken) + nullNode, err := newNullNode(ctx, nullToken) if err != nil { return nil, err } - if comment != nil { - comment.SetPath(ctx.withIndex(uint(len(sequenceNode.Values))).path) - sequenceNode.ValueHeadComments = append(sequenceNode.ValueHeadComments, comment) - } else { - sequenceNode.ValueHeadComments = append(sequenceNode.ValueHeadComments, nil) - } - sequenceNode.Values = append(sequenceNode.Values, value) - tk = p.nextNotCommentToken() - if tk == nil { - break - } - if tk.Type != token.SequenceEntryType { - break - } - if tk.Position.Column != curColumn { - break + ctx.goNext() + return nullNode, nil + } + + if tk.Column() < key.GetToken().Position.Column { + // in this case, + // ---- + // key: + // next + nullToken := p.createNullToken(colonTk) + ctx.insertToken(nullToken) + nullNode, err := newNullNode(ctx, nullToken) + if err != nil { + return nil, err } - p.progressIgnoreComment(1) + + ctx.goNext() + return nullNode, nil } - commentTk := p.nextToken() - if commentTk != nil && commentTk.Type == token.CommentType && curColumn <= commentTk.Position.Column { - // If the comment is in the same or deeper column as the last element column in sequence value, - // treat it as a footer comment for the last element. - comment := p.parseFootComment(ctx, curColumn) - sequenceNode.FootComment = comment + + value, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err } - return sequenceNode, nil + return value, nil } -func (p *parser) parseAnchor(ctx *context) (*ast.AnchorNode, error) { - tk := p.currentToken() - anchor := ast.Anchor(tk) - anchor.SetPath(ctx.path) - ntk := p.nextToken() - if ntk == nil { - return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk) - } - p.progress(1) // skip anchor token - anchorNameTk := p.currentToken() - anchorNameNode, err := p.parseScalarValueWithComment(ctx, anchorNameTk) +func (p *parser) parseAnchor(ctx *context, g *TokenGroup) (*ast.AnchorNode, error) { + anchorNameGroup := g.First().Group + anchor, err := p.parseAnchorName(ctx.withGroup(anchorNameGroup)) if err != nil { return nil, err } - if anchorNameNode == nil { - return nil, errors.ErrSyntax("unexpected anchor. anchor name is not scalar value", anchorNameTk) - } - anchor.Name = anchorNameNode - ntk = p.nextToken() - if ntk == nil { - return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", p.currentToken()) - } - p.progress(1) - value, err := p.parseToken(ctx, p.currentToken()) + ctx.goNext() + value, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err } @@ -704,387 +690,264 @@ func (p *parser) parseAnchor(ctx *context) (*ast.AnchorNode, error) { return anchor, nil } -func (p *parser) parseAlias(ctx *context) (*ast.AliasNode, error) { - tk := p.currentToken() - alias := ast.Alias(tk) - alias.SetPath(ctx.path) - ntk := p.nextToken() - if ntk == nil { - return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk) - } - p.progress(1) // skip alias token - aliasNameTk := p.currentToken() - aliasNameNode, err := p.parseScalarValueWithComment(ctx, aliasNameTk) +func (p *parser) parseAnchorName(ctx *context) (*ast.AnchorNode, error) { + anchor, err := newAnchorNode(ctx, ctx.currentToken()) if err != nil { return nil, err } - if aliasNameNode == nil { - return nil, errors.ErrSyntax("unexpected alias. alias name is not scalar value", aliasNameTk) - } - alias.Value = aliasNameNode - return alias, nil -} - -func (p *parser) parseMapKey(ctx *context) (ast.MapKeyNode, error) { - tk := p.currentToken() - if value, _ := p.parseScalarValueWithComment(ctx, tk); value != nil { - return value, nil + ctx.goNext() + anchorName, err := p.parseScalarValue(ctx, ctx.currentToken()) + if err != nil { + return nil, err } - switch tk.Type { - case token.MergeKeyType: - return ast.MergeKey(tk), nil - case token.MappingKeyType: - return p.parseMappingKey(ctx) - case token.TagType: - return p.parseTag(ctx) + if anchorName == nil { + return nil, errors.ErrSyntax("unexpected anchor. anchor name is not scalar value", ctx.currentToken().RawToken()) } - return nil, errors.ErrSyntax("unexpected mapping key", tk) + anchor.Name = anchorName + return anchor, nil } -func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.ScalarNode, error) { - node, err := p.parseScalarValue(ctx, tk) +func (p *parser) parseAlias(ctx *context) (*ast.AliasNode, error) { + alias, err := newAliasNode(ctx, ctx.currentToken()) if err != nil { return nil, err } - if node == nil { - return nil, nil - } - node.SetPath(ctx.path) - if p.isSameLineComment(p.nextToken(), node) { - p.progress(1) - if err := p.setSameLineCommentIfExists(ctx, node); err != nil { - return nil, err - } - } - return node, nil -} - -func (p *parser) parseScalarValue(ctx *context, tk *token.Token) (ast.ScalarNode, error) { - switch tk.Type { - case token.NullType: - return ast.Null(tk), nil - case token.BoolType: - return ast.Bool(tk), nil - case token.IntegerType, - token.BinaryIntegerType, - token.OctetIntegerType, - token.HexIntegerType: - return ast.Integer(tk), nil - case token.FloatType: - return ast.Float(tk), nil - case token.InfinityType: - return ast.Infinity(tk), nil - case token.NanType: - return ast.Nan(tk), nil - case token.StringType, token.SingleQuoteType, - token.DoubleQuoteType: - return ast.String(tk), nil - case token.AnchorType: - return p.parseAnchor(ctx) - case token.AliasType: - return p.parseAlias(ctx) - } - return nil, nil -} + ctx.goNext() -func (p *parser) parseDirective(ctx *context) (*ast.DirectiveNode, error) { - node := ast.Directive(p.currentToken()) - p.progress(1) // skip directive token - value, err := p.parseToken(ctx, p.currentToken()) + aliasName, err := p.parseScalarValue(ctx, ctx.currentToken()) if err != nil { return nil, err } - node.Value = value - p.progress(1) - tk := p.currentToken() - if tk == nil { - // Since current token is nil, use the previous token to specify - // the syntax error location. - return nil, errors.ErrSyntax("unexpected directive value. document not started", p.previousToken()) - } - if tk.Type != token.DocumentHeaderType { - return nil, errors.ErrSyntax("unexpected directive value. document not started", p.currentToken()) + if aliasName == nil { + return nil, errors.ErrSyntax("unexpected alias. alias name is not scalar value", ctx.currentToken().RawToken()) } - return node, nil + alias.Value = aliasName + return alias, nil } func (p *parser) parseLiteral(ctx *context) (*ast.LiteralNode, error) { - node := ast.Literal(p.currentToken()) - p.progress(1) // skip literal/folded token + node, err := newLiteralNode(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + ctx.goNext() // skip literal/folded token - tk := p.currentToken() + tk := ctx.currentToken() if tk == nil { - node.Value = ast.String(token.New("", "", node.Start.Position)) - return node, nil - } - var comment *ast.CommentGroupNode - if tk.Type == token.CommentType { - comment = p.parseCommentOnly(ctx) - comment.SetPath(ctx.path) - if err := node.SetComment(comment); err != nil { + value, err := newStringNode(ctx, &Token{Token: token.New("", "", node.Start.Position)}) + if err != nil { return nil, err } - tk = p.currentToken() + node.Value = value + return node, nil } value, err := p.parseToken(ctx, tk) if err != nil { return nil, err } - snode, ok := value.(*ast.StringNode) + str, ok := value.(*ast.StringNode) if !ok { return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken()) } - node.Value = snode + node.Value = str return node, nil } -func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool { - if tk == nil { - return false - } - if tk.Type != token.CommentType { - return false - } - return tk.Position.Line == node.GetToken().Position.Line -} - -func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error { - tk := p.currentToken() - if !p.isSameLineComment(tk, node) { - return nil - } - comment := ast.CommentGroup([]*token.Token{tk}) - comment.SetPath(ctx.path) - if err := node.SetComment(comment); err != nil { - return err +func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { + tagTk := ctx.currentToken() + tagRawTk := tagTk.RawToken() + node, err := newTagNode(ctx, tagTk) + if err != nil { + return nil, err } - return nil -} + ctx.goNext() -func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) { - p.pathMap = make(map[string]ast.Node) - startTk := p.currentToken() - p.progress(1) // skip document header token - body, err := p.parseToken(ctx, p.currentToken()) + comment := p.parseHeadComment(ctx) + value, err := p.parseTagValue(ctx, tagRawTk, ctx.currentToken()) if err != nil { return nil, err } - node := ast.Document(startTk, body) - if ntk := p.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType { - node.End = ntk - p.progress(1) + if err := setHeadComment(comment, value); err != nil { + return nil, err } + node.Value = value return node, nil } -func (p *parser) parseCommentOnly(ctx *context) *ast.CommentGroupNode { - commentTokens := []*token.Token{} - for { - tk := p.currentToken() - if tk == nil { - break +func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (ast.Node, error) { + if tk == nil { + return newNullNode(ctx, p.createNullToken(&Token{Token: tagRawTk})) + } + switch token.ReservedTagKeyword(tagRawTk.Value) { + case token.MappingTag, token.OrderedMapTag: + if !p.isMapToken(tk) { + return nil, errors.ErrSyntax("could not find map", tk.RawToken()) } - if tk.Type != token.CommentType { - break + if tk.Type() == token.MappingStartType { + return p.parseFlowMap(ctx) + } + return p.parseMap(ctx) + case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: + if tk.GroupType() == TokenGroupLiteral || tk.GroupType() == TokenGroupFolded { + return p.parseLiteral(ctx.withGroup(tk.Group)) + } + scalar, err := p.parseScalarValue(ctx, tk) + if err != nil { + return nil, err } - commentTokens = append(commentTokens, tk) - p.progressIgnoreComment(1) // skip comment token + ctx.goNext() + return scalar, nil + case token.SequenceTag, token.SetTag: + return nil, errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagRawTk.Value), tagRawTk) + } + if strings.HasPrefix(tagRawTk.Value, "!!") { + return nil, errors.ErrSyntax(fmt.Sprintf("unknown secondary tag name %q specified", tagRawTk.Value), tagRawTk) } - return ast.CommentGroup(commentTokens) + return p.parseToken(ctx, tk) } -func (p *parser) parseFootComment(ctx *context, col int) *ast.CommentGroupNode { - commentTokens := []*token.Token{} - for { - p.progressIgnoreComment(1) - commentTokens = append(commentTokens, p.currentToken()) +func (p *parser) parseFlowSequence(ctx *context) (*ast.SequenceNode, error) { + node, err := newSequenceNode(ctx, ctx.currentToken(), true) + if err != nil { + return nil, err + } + ctx.goNext() // skip SequenceStart token - nextTk := p.nextToken() - if nextTk == nil { + isFirst := true + for ctx.next() { + tk := ctx.currentToken() + if tk.Type() == token.SequenceEndType { + node.End = tk.RawToken() break } - if nextTk.Type != token.CommentType { - break + + if tk.Type() == token.CollectEntryType { + ctx.goNext() + } else if !isFirst { + return nil, errors.ErrSyntax("',' or ']' must be specified", tk.RawToken()) } - if col > nextTk.Position.Column { + + if tk := ctx.currentToken(); tk != nil && tk.Type() == token.SequenceEndType { + // this case is here: "[ elem, ]". + // In this case, ignore the last element and break sequence parsing. + node.End = tk.RawToken() break } - } - return ast.CommentGroup(commentTokens) -} -func (p *parser) parseComment(ctx *context) (ast.Node, error) { - group := p.parseCommentOnly(ctx) - node, err := p.parseToken(ctx, p.currentToken()) - if err != nil { - return nil, err - } - if node == nil { - return group, nil + value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))).withFlow(true), ctx.currentToken()) + if err != nil { + return nil, err + } + node.Values = append(node.Values, value) + isFirst = false } - group.SetPath(node.GetPath()) - if err := node.SetComment(group); err != nil { - return nil, err + if node.End == nil { + return nil, errors.ErrSyntax("sequence end token ']' not found", node.Start) } + ctx.goNext() // skip sequence end token. return node, nil } -func (p *parser) parseMappingKey(ctx *context) (*ast.MappingKeyNode, error) { - keyTk := p.currentToken() - node := ast.MappingKey(keyTk) - node.SetPath(ctx.path) - p.progress(1) // skip mapping key token - value, err := p.parseToken(ctx, p.currentToken()) +func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) { + seqTk := ctx.currentToken() + seqNode, err := newSequenceNode(ctx, seqTk, false) if err != nil { return nil, err } - node.Value = value - return node, nil -} -func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) { - node, err := p.createNodeFromToken(ctx, tk) - if err != nil { - return nil, err - } - if node != nil && node.GetPath() == "" { - node.SetPath(ctx.path) - } - return node, nil -} + tk := seqTk + for tk.Type() == token.SequenceEntryType && tk.Column() == seqTk.Column() { + seqTk := tk + comment := p.parseHeadComment(ctx) + ctx.goNext() // skip sequence entry token -func (p *parser) createNodeFromToken(ctx *context, tk *token.Token) (ast.Node, error) { - if tk == nil { - return nil, nil - } - if !ctx.isMapKey && tk.NextType() == token.MappingValueType { - return p.parseMappingValue(ctx) - } - if tk.Type == token.AliasType { - aliasValueTk := p.nextToken() - if aliasValueTk != nil && aliasValueTk.NextType() == token.MappingValueType { - return p.parseMappingValue(ctx) + valueTk := ctx.currentToken() + if valueTk == nil { + node, err := newNullNode(ctx, p.createNullToken(seqTk)) + if err != nil { + return nil, err + } + seqNode.Values = append(seqNode.Values, node) + break } - } - node, err := p.parseScalarValueWithComment(ctx, tk) - if err != nil { - return nil, err - } - if node != nil { - return node, nil - } - switch tk.Type { - case token.CommentType: - return p.parseComment(ctx) - case token.MappingKeyType: - return p.parseMappingKey(ctx) - case token.DocumentHeaderType: - return p.parseDocument(ctx) - case token.MappingStartType: - return p.parseMapping(ctx) - case token.SequenceStartType: - return p.parseSequence(ctx) - case token.SequenceEndType: - // SequenceEndType is always validated in parseSequence. - // Therefore, if this is found in other cases, it is treated as a syntax error. - return nil, errors.ErrSyntax("could not find '[' character corresponding to ']'", tk) - case token.SequenceEntryType: - return p.parseSequenceEntry(ctx) - case token.AnchorType: - return p.parseAnchor(ctx) - case token.AliasType: - return p.parseAlias(ctx) - case token.DirectiveType: - return p.parseDirective(ctx) - case token.TagType: - return p.parseTag(ctx) - case token.LiteralType, token.FoldedType: - return p.parseLiteral(ctx) - case token.MappingValueType: - return nil, errors.ErrSyntax("found an invalid key for this map", tk) - } - return nil, nil -} -func (p *parser) parse(ctx *context) (*ast.File, error) { - file := &ast.File{Docs: []*ast.DocumentNode{}} - for p.next() { - node, err := p.parseToken(ctx, p.currentToken()) + value, err := p.parseToken(ctx.withIndex(uint(len(seqNode.Values))), valueTk) if err != nil { return nil, err } - p.progressIgnoreComment(1) - if node == nil { - continue - } - if doc, ok := node.(*ast.DocumentNode); ok { - file.Docs = append(file.Docs, doc) - } else if len(file.Docs) == 0 { - file.Docs = append(file.Docs, ast.Document(nil, node)) + seqNode.ValueHeadComments = append(seqNode.ValueHeadComments, comment) + seqNode.Values = append(seqNode.Values, value) + + if ctx.isComment() { + tk = ctx.nextNotCommentToken() } else { - lastNode := p.comparableColumnNode(file.Docs[len(file.Docs)-1]) - curNode := p.comparableColumnNode(node) - if lastNode.GetToken().Position.Column != curNode.GetToken().Position.Column { - return nil, errors.ErrSyntax("value is not allowed in this context", curNode.GetToken()) - } - file.Docs = append(file.Docs, ast.Document(nil, node)) + tk = ctx.currentToken() } } - return file, nil -} - -func (p *parser) comparableColumnNode(n ast.Node) ast.Node { - switch nn := n.(type) { - case *ast.MappingNode: - if len(nn.Values) != 0 { - return nn.Values[0].Key + if ctx.isComment() { + if seqTk.Column() <= ctx.currentToken().Column() { + // If the comment is in the same or deeper column as the last element column in sequence value, + // treat it as a footer comment for the last element. + seqNode.FootComment = p.parseFootComment(ctx, seqTk.Column()) + if len(seqNode.Values) != 0 { + seqNode.FootComment.SetPath(seqNode.Values[len(seqNode.Values)-1].GetPath()) + } } - case *ast.MappingValueNode: - return nn.Key - case *ast.DocumentNode: - return p.comparableColumnNode(nn.Body) } - return n + return seqNode, nil } -type Mode uint - -const ( - ParseComments Mode = 1 << iota // parse comments and add them to AST -) - -// ParseBytes parse from byte slice, and returns ast.File -func ParseBytes(bytes []byte, mode Mode, opts ...Option) (*ast.File, error) { - tokens := lexer.Tokenize(string(bytes)) - f, err := Parse(tokens, mode, opts...) +func (p *parser) parseDirective(ctx *context, g *TokenGroup) (*ast.DirectiveNode, error) { + node, err := newDirectiveNode(ctx, g.First()) if err != nil { return nil, err } - return f, nil -} - -// Parse parse from token instances, and returns ast.File -func Parse(tokens token.Tokens, mode Mode, opts ...Option) (*ast.File, error) { - if tk := tokens.InvalidToken(); tk != nil { - return nil, errors.ErrSyntax(tk.Error, tk) - } - f, err := newParser(tokens, mode, opts).parse(newContext()) + value, err := p.parseToken(ctx, g.Last()) if err != nil { return nil, err } - return f, nil + node.Value = value + return node, nil } -// Parse parse from filename, and returns ast.File -func ParseFile(filename string, mode Mode, opts ...Option) (*ast.File, error) { - file, err := os.ReadFile(filename) +func (p *parser) parseComment(ctx *context) (ast.Node, error) { + cm := p.parseHeadComment(ctx) + node, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err } - f, err := ParseBytes(file, mode, opts...) - if err != nil { + if err := setHeadComment(cm, node); err != nil { return nil, err } - f.Name = filename - return f, nil + return node, nil +} + +func (p *parser) parseHeadComment(ctx *context) *ast.CommentGroupNode { + tks := []*token.Token{} + for ctx.isComment() { + tks = append(tks, ctx.currentToken().RawToken()) + ctx.goNext() + } + if len(tks) == 0 { + return nil + } + return ast.CommentGroup(tks) +} + +func (p *parser) parseFootComment(ctx *context, col int) *ast.CommentGroupNode { + tks := []*token.Token{} + for ctx.isComment() && col <= ctx.currentToken().Column() { + tks = append(tks, ctx.currentToken().RawToken()) + ctx.goNext() + } + if len(tks) == 0 { + return nil + } + return ast.CommentGroup(tks) +} + +func (p *parser) createNullToken(base *Token) *Token { + pos := *(base.RawToken().Position) + pos.Column++ + return &Token{Token: token.New("null", "null", &pos)} } diff --git a/parser/parser_test.go b/parser/parser_test.go index 7552e00..9493e2d 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1021,15 +1021,15 @@ a { `%YAML 1.1 {}`, ` -[1:2] unexpected directive value. document not started +[1:1] unexpected directive value. document not started > 1 | %YAML 1.1 {} - ^ + ^ `, }, { `{invalid`, ` -[1:2] unexpected map +[1:2] could not find flow map content > 1 | {invalid ^ `, @@ -1059,7 +1059,7 @@ a: 1 b `, ` -[3:1] required ':' and map value +[3:1] non-map value is specified 2 | a: 1 > 3 | b ^ @@ -1071,7 +1071,7 @@ a: 'b' c: d `, ` -[3:3] value is not allowed in this context +[3:3] value is not allowed in this context. map key-value is pre-defined 2 | a: 'b' > 3 | c: d ^ @@ -1083,7 +1083,7 @@ a: 'b' - c `, ` -[3:3] value is not allowed in this context +[3:3] value is not allowed in this context. map key-value is pre-defined 2 | a: 'b' > 3 | - c ^ @@ -1096,7 +1096,7 @@ a: 'b' - c `, ` -[4:3] value is not allowed in this context +[4:3] value is not allowed in this context. map key-value is pre-defined 2 | a: 'b' 3 | # comment > 4 | - c @@ -1110,7 +1110,7 @@ b - c `, ` -[3:1] unexpected key name +[3:1] non-map value is specified 2 | a: 1 > 3 | b ^ @@ -1146,11 +1146,10 @@ a: - b: - `, ` -[3:4] block sequence entries are not allowed in this context - 2 | a: - -> 3 | b: - +[2:4] block sequence entries are not allowed in this context +> 2 | a: - ^ -`, + 3 | b: -`, }, { ` @@ -1475,7 +1474,8 @@ foo: ` expected := ` foo: - bar: null # comment + bar: null + # comment baz: 1` f, err := parser.ParseBytes([]byte(content), parser.ParseComments) if err != nil { @@ -1504,7 +1504,8 @@ baz: 1` } expected := ` foo: - bar: null # comment + bar: null +# comment baz: 1` got := f.Docs[0].String() if got != strings.TrimPrefix(expected, "\n") { @@ -1597,11 +1598,14 @@ multiple: t.Fatalf("failed to get map values. got %d", len(mapNode.Values)) } - singleNode, ok := mapNode.Values[0].Value.(*ast.MappingValueNode) + singleNode, ok := mapNode.Values[0].Value.(*ast.MappingNode) if !ok { t.Fatalf("failed to get single node. got %T", mapNode.Values[0].Value) } - if singleNode.GetComment().GetToken().Value != " foo comment" { + if len(singleNode.Values) != 1 { + t.Fatalf("failed to get single node values. got %d", len(singleNode.Values)) + } + if singleNode.Values[0].GetComment().GetToken().Value != " foo comment" { t.Fatalf("failed to get comment from single. got %q", singleNode.GetComment().GetToken().Value) } diff --git a/parser/token.go b/parser/token.go new file mode 100644 index 0000000..fab77f3 --- /dev/null +++ b/parser/token.go @@ -0,0 +1,654 @@ +package parser + +import ( + "fmt" + "os" + + "github.com/goccy/go-yaml/internal/errors" + "github.com/goccy/go-yaml/token" +) + +type TokenGroupType int + +const ( + TokenGroupNone TokenGroupType = iota + TokenGroupComment + TokenGroupDirective + TokenGroupDocument + TokenGroupDocumentBody + TokenGroupAnchor + TokenGroupAnchorName + TokenGroupAlias + TokenGroupLiteral + TokenGroupFolded + TokenGroupScalarTag + TokenGroupMapKey + TokenGroupMapKeyValue +) + +func (t TokenGroupType) String() string { + switch t { + case TokenGroupNone: + return "none" + case TokenGroupComment: + return "comment" + case TokenGroupDirective: + return "directive" + case TokenGroupDocument: + return "document" + case TokenGroupDocumentBody: + return "document_body" + case TokenGroupAnchor: + return "anchor" + case TokenGroupAnchorName: + return "anchor_name" + case TokenGroupAlias: + return "alias" + case TokenGroupLiteral: + return "literal" + case TokenGroupFolded: + return "folded" + case TokenGroupScalarTag: + return "scalar_tag" + case TokenGroupMapKey: + return "map_key" + case TokenGroupMapKeyValue: + return "map_key_value" + } + return "none" +} + +type Token struct { + Token *token.Token + Group *TokenGroup + LineComment *token.Token +} + +func (t *Token) RawToken() *token.Token { + if t == nil { + return nil + } + if t.Token != nil { + return t.Token + } + return t.Group.RawToken() +} + +func (t *Token) Type() token.Type { + if t == nil { + return 0 + } + if t.Token != nil { + return t.Token.Type + } + return t.Group.TokenType() +} + +func (t *Token) GroupType() TokenGroupType { + if t == nil { + return TokenGroupNone + } + if t.Token != nil { + return TokenGroupNone + } + return t.Group.Type +} + +func (t *Token) Line() int { + if t == nil { + return 0 + } + if t.Token != nil { + return t.Token.Position.Line + } + return t.Group.Line() +} + +func (t *Token) Column() int { + if t == nil { + return 0 + } + if t.Token != nil { + return t.Token.Position.Column + } + return t.Group.Column() +} + +func (t *Token) SetGroupType(typ TokenGroupType) { + if t.Group == nil { + return + } + t.Group.Type = typ +} + +func (t *Token) Dump() { + ctx := new(groupTokenRenderContext) + if t.Token != nil { + fmt.Fprint(os.Stdout, t.Token.Value) + return + } + t.Group.dump(ctx) + fmt.Fprintf(os.Stdout, "\n") +} + +func (t *Token) dump(ctx *groupTokenRenderContext) { + if t.Token != nil { + fmt.Fprint(os.Stdout, t.Token.Value) + return + } + t.Group.dump(ctx) +} + +type groupTokenRenderContext struct { + num int +} + +type TokenGroup struct { + Type TokenGroupType + Tokens []*Token +} + +func (g *TokenGroup) First() *Token { + if len(g.Tokens) == 0 { + return nil + } + return g.Tokens[0] +} + +func (g *TokenGroup) Last() *Token { + if len(g.Tokens) == 0 { + return nil + } + return g.Tokens[len(g.Tokens)-1] +} + +func (g *TokenGroup) dump(ctx *groupTokenRenderContext) { + num := ctx.num + fmt.Fprint(os.Stdout, colorize(num, "(")) + ctx.num++ + for _, tk := range g.Tokens { + tk.dump(ctx) + } + fmt.Fprint(os.Stdout, colorize(num, ")")) +} + +func (g *TokenGroup) RawToken() *token.Token { + if len(g.Tokens) == 0 { + return nil + } + return g.Tokens[0].RawToken() +} + +func (g *TokenGroup) Line() int { + if len(g.Tokens) == 0 { + return 0 + } + return g.Tokens[0].Line() +} + +func (g *TokenGroup) Column() int { + if len(g.Tokens) == 0 { + return 0 + } + return g.Tokens[0].Column() +} + +func (g *TokenGroup) TokenType() token.Type { + if len(g.Tokens) == 0 { + return 0 + } + return g.Tokens[0].Type() +} + +func createGroupedTokens(tokens token.Tokens) ([]*Token, error) { + var err error + tks := newTokens(tokens) + tks = createLineCommentTokenGroups(tks) + tks, err = createLiteralAndFoldedTokenGroups(tks) + if err != nil { + return nil, err + } + tks, err = createAnchorAndAliasTokenGroups(tks) + if err != nil { + return nil, err + } + tks = createScalarTagTokenGroups(tks) + tks, err = createAnchorWithScalarTagTokenGroups(tks) + if err != nil { + return nil, err + } + tks, err = createMapKeyTokenGroups(tks) + if err != nil { + return nil, err + } + tks = createMapKeyValueTokenGroups(tks) + tks, err = createDirectiveTokenGroups(tks) + if err != nil { + return nil, err + } + tks, err = createDocumentTokens(tks) + if err != nil { + return nil, err + } + return tks, nil +} + +func newTokens(tks token.Tokens) []*Token { + ret := make([]*Token, 0, len(tks)) + for _, tk := range tks { + ret = append(ret, &Token{Token: tk}) + } + return ret +} + +func createLineCommentTokenGroups(tokens []*Token) []*Token { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.CommentType: + if i > 0 && tokens[i-1].Line() == tk.Line() { + tokens[i-1].LineComment = tk.RawToken() + } else { + ret = append(ret, tk) + } + default: + ret = append(ret, tk) + } + } + return ret +} + +func createLiteralAndFoldedTokenGroups(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.LiteralType: + tks := []*Token{tk} + if i+1 < len(tokens) { + tks = append(tks, tokens[i+1]) + } + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupLiteral, + Tokens: tks, + }, + }) + i++ + case token.FoldedType: + tks := []*Token{tk} + if i+1 < len(tokens) { + tks = append(tks, tokens[i+1]) + } + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupFolded, + Tokens: tks, + }, + }) + i++ + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createAnchorAndAliasTokenGroups(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.AnchorType: + if i+1 >= len(tokens) { + return nil, errors.ErrSyntax("undefined anchor name", tk.RawToken()) + } + if i+2 >= len(tokens) { + return nil, errors.ErrSyntax("undefined anchor value", tk.RawToken()) + } + anchorName := &Token{ + Group: &TokenGroup{ + Type: TokenGroupAnchorName, + Tokens: []*Token{tk, tokens[i+1]}, + }, + } + valueTk := tokens[i+2] + if tk.Line() == valueTk.Line() && isScalarType(valueTk) { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupAnchor, + Tokens: []*Token{anchorName, valueTk}, + }, + }) + i++ + } else { + ret = append(ret, anchorName) + } + i++ + case token.AliasType: + if i+1 == len(tokens) { + return nil, errors.ErrSyntax("undefined alias name", tk.RawToken()) + } + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupAlias, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createScalarTagTokenGroups(tokens []*Token) []*Token { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + if tk.Type() != token.TagType { + ret = append(ret, tk) + continue + } + tag := tk.RawToken() + switch token.ReservedTagKeyword(tag.Value) { + case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: + if len(tokens) <= i+1 { + ret = append(ret, tk) + continue + } + if tk.Line() != tokens[i+1].Line() { + ret = append(ret, tk) + continue + } + if tokens[i+1].GroupType() == TokenGroupAnchorName { + ret = append(ret, tk) + continue + } + if isScalarType(tokens[i+1]) { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupScalarTag, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + } else { + ret = append(ret, tk) + } + default: + ret = append(ret, tk) + } + } + return ret +} + +func createAnchorWithScalarTagTokenGroups(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.GroupType() { + case TokenGroupAnchorName: + if i+1 >= len(tokens) { + return nil, errors.ErrSyntax("undefined anchor value", tk.RawToken()) + } + valueTk := tokens[i+1] + if tk.Line() == valueTk.Line() && valueTk.GroupType() == TokenGroupScalarTag { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupAnchor, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + } else { + ret = append(ret, tk) + } + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createMapKeyTokenGroups(tokens []*Token) ([]*Token, error) { + tks, err := createMapKeyByMappingKey(tokens) + if err != nil { + return nil, err + } + return createMapKeyByMappingValue(tks) +} + +func createMapKeyByMappingKey(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.MappingKeyType: + if i+1 >= len(tokens) { + return nil, errors.ErrSyntax("undefined map key", tk.RawToken()) + } + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupMapKey, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createMapKeyByMappingValue(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.MappingValueType: + if i == 0 { + return nil, errors.ErrSyntax("unexpected key name", tk.RawToken()) + } + mapKeyTk := tokens[i-1] + if isNotMapKeyType(mapKeyTk) { + return nil, errors.ErrSyntax("found an invalid key for this map", tokens[i].RawToken()) + } + newTk := &Token{Token: mapKeyTk.Token, Group: mapKeyTk.Group} + mapKeyTk.Token = nil + mapKeyTk.Group = &TokenGroup{ + Type: TokenGroupMapKey, + Tokens: []*Token{newTk, tk}, + } + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createMapKeyValueTokenGroups(tokens []*Token) []*Token { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.GroupType() { + case TokenGroupMapKey: + if len(tokens) <= i+1 { + ret = append(ret, tk) + continue + } + valueTk := tokens[i+1] + if tk.Line() != valueTk.Line() { + ret = append(ret, tk) + continue + } + if valueTk.GroupType() == TokenGroupAnchorName { + ret = append(ret, tk) + continue + } + if valueTk.Type() == token.TagType && valueTk.GroupType() != TokenGroupScalarTag { + ret = append(ret, tk) + continue + } + + if isScalarType(valueTk) || valueTk.Type() == token.TagType { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupMapKeyValue, + Tokens: []*Token{tk, valueTk}, + }, + }) + i++ + } else { + ret = append(ret, tk) + continue + } + default: + ret = append(ret, tk) + } + } + return ret +} + +func createDirectiveTokenGroups(tokens []*Token) ([]*Token, error) { + ret := make([]*Token, 0, len(tokens)) + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.DirectiveType: + if i+1 >= len(tokens) { + return nil, errors.ErrSyntax("undefined directive value", tk.RawToken()) + } + if i+2 >= len(tokens) { + return nil, errors.ErrSyntax("unexpected directive value. document not started", tk.RawToken()) + } + if tokens[i+2].Type() != token.DocumentHeaderType { + return nil, errors.ErrSyntax("unexpected directive value. document not started", tk.RawToken()) + } + if tk.Line() != tokens[i+1].Line() { + return nil, errors.ErrSyntax("undefined directive value", tk.RawToken()) + } + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDirective, + Tokens: []*Token{tk, tokens[i+1]}, + }, + }) + i++ + default: + ret = append(ret, tk) + } + } + return ret, nil +} + +func createDocumentTokens(tokens []*Token) ([]*Token, error) { + var ret []*Token + for i := 0; i < len(tokens); i++ { + tk := tokens[i] + switch tk.Type() { + case token.DocumentHeaderType: + if i != 0 { + ret = append(ret, &Token{ + Group: &TokenGroup{Tokens: tokens[:i]}, + }) + } + if i+1 == len(tokens) { + // if current token is last token, add DocumentHeader only tokens to ret. + return append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDocument, + Tokens: []*Token{tk}, + }, + }), nil + } + if tokens[i+1].Type() == token.DocumentHeaderType { + return append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDocument, + Tokens: []*Token{tk}, + }, + }), nil + } + tks, err := createDocumentTokens(tokens[i+1:]) + if err != nil { + return nil, err + } + if len(tks) != 0 { + tks[0].SetGroupType(TokenGroupDocument) + tks[0].Group.Tokens = append([]*Token{tk}, tks[0].Group.Tokens...) + return append(ret, tks...), nil + } + return append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDocument, + Tokens: []*Token{tk}, + }, + }), nil + case token.DocumentEndType: + if i != 0 { + ret = append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDocument, + Tokens: tokens[0 : i+1], + }, + }) + } + if i+1 == len(tokens) { + return ret, nil + } + if isScalarType(tokens[i+1]) { + return nil, errors.ErrSyntax("unexpected end content", tokens[i+1].RawToken()) + } + + tks, err := createDocumentTokens(tokens[i+1:]) + if err != nil { + return nil, err + } + return append(ret, tks...), nil + } + } + return append(ret, &Token{ + Group: &TokenGroup{ + Type: TokenGroupDocument, + Tokens: tokens, + }, + }), nil +} + +func isScalarType(tk *Token) bool { + typ := tk.Type() + return typ == token.AnchorType || + typ == token.AliasType || + typ == token.LiteralType || + typ == token.FoldedType || + typ == token.NullType || + typ == token.BoolType || + typ == token.IntegerType || + typ == token.BinaryIntegerType || + typ == token.OctetIntegerType || + typ == token.HexIntegerType || + typ == token.FloatType || + typ == token.InfinityType || + typ == token.NanType || + typ == token.StringType || + typ == token.SingleQuoteType || + typ == token.DoubleQuoteType +} + +func isNotMapKeyType(tk *Token) bool { + typ := tk.Type() + return typ == token.DirectiveType || + typ == token.DocumentHeaderType || + typ == token.DocumentEndType || + typ == token.CollectEntryType || + typ == token.MappingStartType || + typ == token.MappingValueType || + typ == token.MappingEndType || + typ == token.SequenceStartType || + typ == token.SequenceEntryType || + typ == token.SequenceEndType +} diff --git a/scanner/scanner.go b/scanner/scanner.go index a0e74bf..ed43245 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -46,6 +46,7 @@ type Scanner struct { indentLevel int isFirstCharAtLine bool isAnchor bool + isDirective bool startedFlowSequenceNum int startedFlowMapNum int indentState IndentState @@ -104,6 +105,7 @@ func (s *Scanner) progressLine(ctx *Context) { s.indentNum = 0 s.isFirstCharAtLine = true s.isAnchor = false + s.isDirective = false s.progress(ctx, 1) } @@ -812,6 +814,9 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool { func (s *Scanner) scanMapDelim(ctx *Context) bool { nc := ctx.nextChar() + if s.isDirective { + return false + } if s.startedFlowMapNum <= 0 && nc != ' ' && nc != '\t' && !s.isNewLineChar(nc) && !ctx.isNextEOS() { return false } @@ -844,6 +849,12 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool { if ctx.repeatNum('-') != 3 { return false } + if ctx.size > ctx.idx+3 { + c := ctx.src[ctx.idx+3] + if c != ' ' && c != '\t' && c != '\n' && c != '\r' { + return false + } + } s.addBufferedTokenIfExists(ctx) ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos())) @@ -1046,6 +1057,7 @@ func (s *Scanner) scanDirective(ctx *Context) bool { ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos())) s.progressColumn(ctx, 1) ctx.clear() + s.isDirective = true return true } diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 570984d..286becb 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -171,6 +171,7 @@ var failureTestNames = []string{ "tabs-that-look-like-indentation/05", "tabs-that-look-like-indentation/07", "tabs-that-look-like-indentation/08", + "tag-shorthand-used-in-documents-but-only-defined-in-the-first", "tags-for-block-objects", "tags-for-flow-objects", "tags-for-root-objects", From e3c410377381078d8914550f8e900124148ade6d Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 26 Nov 2024 11:02:31 +0900 Subject: [PATCH 2/2] refactor parser --- parser/context.go | 35 ++++++++++++++++++++++--------- parser/parser.go | 52 +++++++---------------------------------------- parser/token.go | 3 --- 3 files changed, 32 insertions(+), 58 deletions(-) diff --git a/parser/context.go b/parser/context.go index 1ac7f25..683a033 100644 --- a/parser/context.go +++ b/parser/context.go @@ -115,26 +115,41 @@ func (c *context) next() bool { return c.tokenRef.idx < c.tokenRef.size } +func (c *context) insertNullToken(tk *Token) *Token { + nullToken := c.createNullToken(tk) + c.insertToken(nullToken) + c.goNext() + + return nullToken +} + +func (c *context) createNullToken(base *Token) *Token { + pos := *(base.RawToken().Position) + pos.Column++ + return &Token{Token: token.New("null", "null", &pos)} +} + func (c *context) insertToken(tk *Token) { - idx := c.tokenRef.idx - if c.tokenRef.size < idx { + ref := c.tokenRef + idx := ref.idx + if ref.size < idx { return } - if c.tokenRef.size == idx { - curToken := c.tokenRef.tokens[c.tokenRef.size-1] + if ref.size == idx { + curToken := ref.tokens[ref.size-1] tk.RawToken().Next = curToken.RawToken() curToken.RawToken().Prev = tk.RawToken() - c.tokenRef.tokens = append(c.tokenRef.tokens, tk) - c.tokenRef.size = len(c.tokenRef.tokens) + ref.tokens = append(ref.tokens, tk) + ref.size = len(ref.tokens) return } - curToken := c.tokenRef.tokens[idx] + curToken := ref.tokens[idx] tk.RawToken().Next = curToken.RawToken() curToken.RawToken().Prev = tk.RawToken() - c.tokenRef.tokens = append(c.tokenRef.tokens[:idx+1], c.tokenRef.tokens[idx:]...) - c.tokenRef.tokens[idx] = tk - c.tokenRef.size = len(c.tokenRef.tokens) + ref.tokens = append(ref.tokens[:idx+1], ref.tokens[idx:]...) + ref.tokens[idx] = tk + ref.size = len(ref.tokens) } diff --git a/parser/parser.go b/parser/parser.go index 247c750..47f6dae 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -325,9 +325,7 @@ func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) { } colonTk := mapKeyTk.Group.Last() if p.isFlowMapDelim(ctx.nextToken()) { - nullToken := p.createNullToken(colonTk) - ctx.insertToken(nullToken) - value, err := newNullNode(ctx, nullToken) + value, err := newNullNode(ctx, ctx.insertNullToken(colonTk)) if err != nil { return nil, err } @@ -335,7 +333,6 @@ func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) { if err != nil { return nil, err } - ctx.goNext() node.Values = append(node.Values, mapValue) ctx.goNext() } else { @@ -358,13 +355,10 @@ func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) { if err != nil { return nil, err } - nullToken := p.createNullToken(mapKeyTk) - ctx.insertToken(nullToken) - value, err := newNullNode(ctx, nullToken) + value, err := newNullNode(ctx, ctx.insertNullToken(mapKeyTk)) if err != nil { return nil, err } - ctx.goNext() mapValue, err := newMappingValueNode(ctx, mapKeyTk, key, value) if err != nil { return nil, err @@ -619,17 +613,7 @@ func (p *parser) mapKeyText(n ast.Node) string { func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) (ast.Node, error) { tk := ctx.currentToken() if tk == nil { - nullToken := p.createNullToken(colonTk) - ctx.insertToken(nullToken) - nullNode, err := newNullNode(ctx, nullToken) - if err != nil { - return nil, err - } - ctx.goNext() - return nullNode, nil - } else if tk.Type() == token.CollectEntryType { - // implicit null value. - return newNullNode(ctx, tk) + return newNullNode(ctx, ctx.insertNullToken(colonTk)) } if ctx.isComment() { @@ -641,15 +625,7 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) // ---- // key: // next - - nullToken := p.createNullToken(colonTk) - ctx.insertToken(nullToken) - nullNode, err := newNullNode(ctx, nullToken) - if err != nil { - return nil, err - } - ctx.goNext() - return nullNode, nil + return newNullNode(ctx, ctx.insertNullToken(colonTk)) } if tk.Column() < key.GetToken().Position.Column { @@ -657,15 +633,7 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) // ---- // key: // next - nullToken := p.createNullToken(colonTk) - ctx.insertToken(nullToken) - nullNode, err := newNullNode(ctx, nullToken) - if err != nil { - return nil, err - } - - ctx.goNext() - return nullNode, nil + return newNullNode(ctx, ctx.insertNullToken(colonTk)) } value, err := p.parseToken(ctx, ctx.currentToken()) @@ -776,7 +744,7 @@ func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (ast.Node, error) { if tk == nil { - return newNullNode(ctx, p.createNullToken(&Token{Token: tagRawTk})) + return newNullNode(ctx, ctx.createNullToken(&Token{Token: tagRawTk})) } switch token.ReservedTagKeyword(tagRawTk.Value) { case token.MappingTag, token.OrderedMapTag: @@ -863,7 +831,7 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) { valueTk := ctx.currentToken() if valueTk == nil { - node, err := newNullNode(ctx, p.createNullToken(seqTk)) + node, err := newNullNode(ctx, ctx.createNullToken(seqTk)) if err != nil { return nil, err } @@ -945,9 +913,3 @@ func (p *parser) parseFootComment(ctx *context, col int) *ast.CommentGroupNode { } return ast.CommentGroup(tks) } - -func (p *parser) createNullToken(base *Token) *Token { - pos := *(base.RawToken().Position) - pos.Column++ - return &Token{Token: token.New("null", "null", &pos)} -} diff --git a/parser/token.go b/parser/token.go index fab77f3..be00c92 100644 --- a/parser/token.go +++ b/parser/token.go @@ -12,7 +12,6 @@ type TokenGroupType int const ( TokenGroupNone TokenGroupType = iota - TokenGroupComment TokenGroupDirective TokenGroupDocument TokenGroupDocumentBody @@ -30,8 +29,6 @@ func (t TokenGroupType) String() string { switch t { case TokenGroupNone: return "none" - case TokenGroupComment: - return "comment" case TokenGroupDirective: return "directive" case TokenGroupDocument: