From 45889c98b0a0967240eb595a1bd6896e2f575106 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 30 Nov 2024 14:47:52 +0900 Subject: [PATCH] Fix parser (#557) * fix document separator with directive * fix number value with local tag * fix decoding string for null value * fix single pair flow mapping * fix test case * fix plain lines with tab indent * fix test case * fix sequence with null * fix test case * fix invalid map-value --- decode.go | 9 +++- parser/parser.go | 110 ++++++++++++++++++++++++++++++++++------ parser/token.go | 4 ++ scanner/context.go | 15 ++++++ scanner/scanner.go | 33 +++++++++--- yaml_test_suite_test.go | 12 +---- 6 files changed, 149 insertions(+), 34 deletions(-) diff --git a/decode.go b/decode.go index f02fc05..faac222 100644 --- a/decode.go +++ b/decode.go @@ -405,7 +405,14 @@ func (d *Decoder) nodeToValue(node ast.Node) (any, error) { } return nil, errors.ErrSyntax(fmt.Sprintf("cannot convert %q to boolean", fmt.Sprint(v)), n.Value.GetToken()) case token.StringTag: - return d.nodeToValue(n.Value) + v, err := d.nodeToValue(n.Value) + if err != nil { + return nil, err + } + if v == nil { + return "", nil + } + return fmt.Sprint(v), nil case token.MappingTag: return d.nodeToValue(n.Value) default: diff --git a/parser/parser.go b/parser/parser.go index 20908f7..5173393 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -442,11 +442,18 @@ func (p *parser) parseMap(ctx *context) (*ast.MappingNode, error) { tk = ctx.currentToken() } for tk.Column() == keyTk.Column() { + typ := tk.Type() + if ctx.isFlow && typ == token.SequenceEndType { + // [ + // key: value + // ] <= + break + } if !p.isMapToken(tk) { return nil, errors.ErrSyntax("non-map value is specified", tk.RawToken()) } cm := p.parseHeadComment(ctx) - if tk.Type() == token.MappingEndType { + if typ == token.MappingEndType { // a: { // b: c // } <= @@ -644,6 +651,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) keyCol := key.GetToken().Position.Column keyLine := key.GetToken().Position.Line + if tk.Column() != keyCol && tk.Line() == keyLine && (tk.GroupType() == TokenGroupMapKey || tk.GroupType() == TokenGroupMapKeyValue) { + // a: b: + // ^ + // + // a: b: c + // ^ + return nil, errors.ErrSyntax("mapping value is not allowed in this context", tk.RawToken()) + } + if tk.Column() == keyCol && p.isMapToken(tk) { // in this case, // ---- @@ -673,9 +689,6 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName { // key: // &anchor - // - // key: - // &anchor return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken()) } @@ -932,17 +945,7 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) { comment := p.parseHeadComment(ctx) ctx.goNext() // skip sequence entry token - valueTk := ctx.currentToken() - if valueTk == nil { - node, err := newNullNode(ctx, ctx.createNullToken(seqTk)) - if err != nil { - return nil, err - } - seqNode.Values = append(seqNode.Values, node) - break - } - - value, err := p.parseToken(ctx.withIndex(uint(len(seqNode.Values))), valueTk) + value, err := p.parseSequenceValue(ctx.withIndex(uint(len(seqNode.Values))), seqTk) if err != nil { return nil, err } @@ -968,6 +971,83 @@ func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) { return seqNode, nil } +func (p *parser) parseSequenceValue(ctx *context, seqTk *Token) (ast.Node, error) { + tk := ctx.currentToken() + if tk == nil { + return newNullNode(ctx, ctx.insertNullToken(seqTk)) + } + + if ctx.isComment() { + tk = ctx.nextNotCommentToken() + } + seqCol := seqTk.Column() + seqLine := seqTk.Line() + + if tk.Column() == seqCol && tk.Type() == token.SequenceEntryType { + // in this case, + // ---- + // - + // - + return newNullNode(ctx, ctx.insertNullToken(seqTk)) + } + + if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName && + ctx.nextToken().Column() == seqCol && ctx.nextToken().Type() == token.SequenceEntryType { + // in this case, + // ---- + // - &anchor + // - + group := &TokenGroup{ + Type: TokenGroupAnchor, + Tokens: []*Token{tk, ctx.createNullToken(tk)}, + } + anchor, err := p.parseAnchor(ctx.withGroup(group), group) + if err != nil { + return nil, err + } + ctx.goNext() + return anchor, nil + } + + if tk.Column() <= seqCol && tk.GroupType() == TokenGroupAnchorName { + // - + // &anchor + return nil, errors.ErrSyntax("anchor is not allowed in this sequence context", tk.RawToken()) + } + + if tk.Column() < seqCol { + // in this case, + // ---- + // - + // next + return newNullNode(ctx, ctx.insertNullToken(seqTk)) + } + + if tk.Line() == seqLine && tk.GroupType() == TokenGroupAnchorName && + ctx.nextToken().Column() < seqCol { + // in this case, + // ---- + // - &anchor + // next + group := &TokenGroup{ + Type: TokenGroupAnchor, + Tokens: []*Token{tk, ctx.createNullToken(tk)}, + } + anchor, err := p.parseAnchor(ctx.withGroup(group), group) + if err != nil { + return nil, err + } + ctx.goNext() + return anchor, nil + } + + value, err := p.parseToken(ctx, ctx.currentToken()) + if err != nil { + return nil, err + } + return value, nil +} + func (p *parser) parseDirective(ctx *context, g *TokenGroup) (*ast.DirectiveNode, error) { node, err := newDirectiveNode(ctx, g.First()) if err != nil { diff --git a/parser/token.go b/parser/token.go index 2fd3127..c7bc5e8 100644 --- a/parser/token.go +++ b/parser/token.go @@ -630,6 +630,10 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) { } func isScalarType(tk *Token) bool { + switch tk.GroupType() { + case TokenGroupMapKey, TokenGroupMapKeyValue: + return false + } typ := tk.Type() return typ == token.AnchorType || typ == token.AliasType || diff --git a/scanner/context.go b/scanner/context.go index 6d5e7eb..2529713 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -352,10 +352,25 @@ func (c *Context) bufferedToken(pos *token.Position) *token.Token { } else { tk = token.New(string(source), string(c.obuf), pos) } + c.setTokenTypeByPrevTag(tk) c.resetBuffer() return tk } +func (c *Context) setTokenTypeByPrevTag(tk *token.Token) { + lastTk := c.lastToken() + if lastTk == nil { + return + } + if lastTk.Type != token.TagType { + return + } + tag := token.ReservedTagKeyword(lastTk.Value) + if _, exists := token.ReservedTagKeywordMap[tag]; !exists { + tk.Type = token.StringType + } +} + func (c *Context) lastToken() *token.Token { if len(c.tokens) != 0 { return c.tokens[len(c.tokens)-1] diff --git a/scanner/scanner.go b/scanner/scanner.go index 428854a..bc2710f 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -515,8 +515,17 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { } func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error { + if s.foundDocumentSeparatorMarker(src) { + return ErrInvalidToken( + token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()), + ) + } + return nil +} + +func (s *Scanner) foundDocumentSeparatorMarker(src []rune) bool { if len(src) < 3 { - return nil + return false } var marker string if len(src) == 3 { @@ -526,12 +535,7 @@ func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) erro return r == ' ' || r == '\t' || r == '\n' || r == '\r' }) } - if marker == "---" || marker == "..." { - return ErrInvalidToken( - token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()), - ) - } - return nil + return marker == "---" || marker == "..." } func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) { @@ -701,6 +705,14 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error { ctx.addBuf(c) ctx.updateDocumentNewLineState() s.progressLine(ctx) + if ctx.next() { + if s.foundDocumentSeparatorMarker(ctx.src[ctx.idx:]) { + value := ctx.bufferedSrc() + ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos())) + ctx.clear() + s.breakDocument(ctx) + } + } } else if s.isFirstCharAtLine && c == ' ' { ctx.addDocumentIndent(s.column) s.progressColumn(ctx, 1) @@ -1319,6 +1331,13 @@ func (s *Scanner) scan(ctx *Context) error { return err } case '\t': + if ctx.existsBuffer() && s.lastDelimColumn == 0 { + // tab indent for plain text (yaml-test-suite's spec-example-7-12-plain-lines). + s.indentNum++ + ctx.addOriginBuf(c) + s.progressColumn(ctx, 1) + continue + } if err := s.scanTab(ctx, c); err != nil { return err } diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 27782ba..eae8f16 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -17,9 +17,7 @@ var failureTestNames = []string{ "anchors-on-empty-scalars", // no json. "aliases-in-flow-objects", // no json. "aliases-in-explicit-block-mapping", // no json. - "aliases-in-implicit-block-mapping", - "bare-document-after-document-end-marker", - "block-mapping-with-missing-keys", // no json. + "block-mapping-with-missing-keys", // no json. "block-mapping-with-missing-values", "block-mapping-with-multiline-scalars", "block-scalar-with-more-spaces-than-first-content-line", @@ -86,22 +84,14 @@ var failureTestNames = []string{ "spec-example-8-19-compact-block-mappings", // no json. "spec-example-6-19-secondary-tag-handle", "spec-example-6-24-verbatim-tags", - "spec-example-6-28-non-specific-tags", - "spec-example-6-4-line-prefixes", "spec-example-6-6-line-folding", "spec-example-6-6-line-folding-1-3", "spec-example-6-8-flow-folding", - "spec-example-7-12-plain-lines", - "spec-example-7-19-single-pair-flow-mappings", - "spec-example-7-20-single-pair-explicit-entry", - "spec-example-7-24-flow-nodes", "spec-example-8-10-folded-lines-8-13-final-empty-lines", - "spec-example-8-15-block-sequence-entry-types", "spec-example-8-17-explicit-block-mapping-entries", "spec-example-8-2-block-indentation-indicator", "spec-example-9-3-bare-documents", "spec-example-9-4-explicit-documents", - "spec-example-9-5-directives-documents", "spec-example-9-6-stream", "spec-example-9-6-stream-1-3", "syntax-character-edge-cases/00", // no json.