Skip to content

Commit

Permalink
Fix parser (#552)
Browse files Browse the repository at this point in the history
* fix set tag

* fix seq tag

* fix empty content test

* fix parsing of tab character before end delimiter of double-quote

* fix tab indent in double-quote

* fix tab indent in single-quote
  • Loading branch information
goccy authored Nov 28, 2024
1 parent 44b8854 commit 01f4bac
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 26 deletions.
55 changes: 55 additions & 0 deletions parser/node.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package parser

import (
"fmt"

"github.com/goccy/go-yaml/ast"
"github.com/goccy/go-yaml/internal/errors"
"github.com/goccy/go-yaml/token"
)

Expand Down Expand Up @@ -162,6 +165,58 @@ func newSequenceNode(ctx *context, tk *Token, isFlow bool) (*ast.SequenceNode, e
return node, nil
}

func newTagDefaultScalarValueNode(ctx *context, tag *token.Token) (ast.ScalarNode, error) {
pos := *(tag.Position)
pos.Column++

var (
tk *Token
node ast.ScalarNode
)
switch token.ReservedTagKeyword(tag.Value) {
case token.IntegerTag:
tk = &Token{Token: token.New("0", "0", &pos)}
n, err := newIntegerNode(ctx, tk)
if err != nil {
return nil, err
}
node = n
case token.FloatTag:
tk = &Token{Token: token.New("0", "0", &pos)}
n, err := newFloatNode(ctx, tk)
if err != nil {
return nil, err
}
node = n
case token.StringTag, token.BinaryTag, token.TimestampTag:
tk = &Token{Token: token.New("", "", &pos)}
n, err := newStringNode(ctx, tk)
if err != nil {
return nil, err
}
node = n
case token.BooleanTag:
tk = &Token{Token: token.New("false", "false", &pos)}
n, err := newBoolNode(ctx, tk)
if err != nil {
return nil, err
}
node = n
case token.NullTag:
tk = &Token{Token: token.New("null", "null", &pos)}
n, err := newNullNode(ctx, tk)
if err != nil {
return nil, err
}
node = n
default:
return nil, errors.ErrSyntax(fmt.Sprintf("cannot assign default value for %q tag", tag.Value), tag)
}
ctx.insertToken(tk)
ctx.goNext()
return node, nil
}

func setLineComment(ctx *context, node ast.Node, tk *Token) error {
if tk.LineComment == nil {
return nil
Expand Down
40 changes: 31 additions & 9 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,10 @@ func (p *parser) parseScalarValue(ctx *context, tk *Token) (ast.ScalarNode, erro
return newNanNode(ctx, tk)
case token.StringType, token.SingleQuoteType, token.DoubleQuoteType:
return newStringNode(ctx, tk)
case token.TagType:
// this case applies when it is a scalar tag and its value does not exist.
// Examples of cases where the value does not exist include cases like `key: !!str,` or `!!str : value`.
return p.parseScalarTag(ctx)
}
return nil, errors.ErrSyntax("unexpected scalar value type", tk.RawToken())
}
Expand Down Expand Up @@ -526,15 +530,11 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error
if g.Type != TokenGroupMapKey {
return nil, errors.ErrSyntax("unexpected map key", g.RawToken())
}
if g.Last().Type() != token.MappingValueType {
return nil, errors.ErrSyntax("expected map key-value delimiter ':'", g.Last().RawToken())
}
if g.First().Type() == token.MappingKeyType {
mapKeyTk := g.First()
if mapKeyTk.Group == nil {
return nil, errors.ErrSyntax("could not find value for mapping key", mapKeyTk.RawToken())
if mapKeyTk.Group != nil {
ctx = ctx.withGroup(mapKeyTk.Group)
}
ctx := ctx.withGroup(mapKeyTk.Group)
key, err := newMappingKeyNode(ctx, mapKeyTk)
if err != nil {
return nil, err
Expand All @@ -558,6 +558,9 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error
p.pathMap[keyPath] = key
return key, nil
}
if g.Last().Type() != token.MappingValueType {
return nil, errors.ErrSyntax("expected map key-value delimiter ':'", g.Last().RawToken())
}

scalar, err := p.parseScalarValue(ctx, g.First())
if err != nil {
Expand Down Expand Up @@ -789,6 +792,20 @@ func (p *parser) parseLiteral(ctx *context) (*ast.LiteralNode, error) {
return node, nil
}

func (p *parser) parseScalarTag(ctx *context) (*ast.TagNode, error) {
tag, err := p.parseTag(ctx)
if err != nil {
return nil, err
}
if tag.Value == nil {
return nil, errors.ErrSyntax("specified not scalar tag", tag.GetToken())
}
if _, ok := tag.Value.(ast.ScalarNode); !ok {
return nil, errors.ErrSyntax("specified not scalar tag", tag.GetToken())
}
return tag, nil
}

func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) {
tagTk := ctx.currentToken()
tagRawTk := tagTk.RawToken()
Expand All @@ -815,7 +832,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (
return newNullNode(ctx, ctx.createNullToken(&Token{Token: tagRawTk}))
}
switch token.ReservedTagKeyword(tagRawTk.Value) {
case token.MappingTag, token.OrderedMapTag:
case token.MappingTag, token.SetTag:
if !p.isMapToken(tk) {
return nil, errors.ErrSyntax("could not find map", tk.RawToken())
}
Expand All @@ -826,15 +843,20 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (
case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag:
if tk.GroupType() == TokenGroupLiteral || tk.GroupType() == TokenGroupFolded {
return p.parseLiteral(ctx.withGroup(tk.Group))
} else if tk.Type() == token.CollectEntryType || tk.Type() == token.MappingValueType {
return newTagDefaultScalarValueNode(ctx, tagRawTk)
}
scalar, err := p.parseScalarValue(ctx, tk)
if err != nil {
return nil, err
}
ctx.goNext()
return scalar, nil
case token.SequenceTag, token.SetTag:
return nil, errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagRawTk.Value), tagRawTk)
case token.SequenceTag, token.OrderedMapTag:
if tk.Type() == token.SequenceStartType {
return p.parseFlowSequence(ctx)
}
return p.parseSequence(ctx)
}
if strings.HasPrefix(tagRawTk.Value, "!!") {
return nil, errors.ErrSyntax(fmt.Sprintf("unknown secondary tag name %q specified", tagRawTk.Value), tagRawTk)
Expand Down
36 changes: 29 additions & 7 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
isNewLine = true
s.progressLine(ctx)
continue
} else if c == ' ' && isFirstLineChar {
} else if isFirstLineChar && (c == ' ' || c == '\t') {
continue
} else if c != '\'' {
value = append(value, c)
Expand Down Expand Up @@ -340,7 +340,7 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
isNewLine = true
s.progressLine(ctx)
continue
} else if c == ' ' && isFirstLineChar {
} else if isFirstLineChar && (c == ' ' || c == '\t') {
continue
} else if c == '\\' {
isFirstLineChar = false
Expand Down Expand Up @@ -463,14 +463,16 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
progress++
continue
}
if src[i] == '\n' {
if s.isNewLineChar(src[i]) {
break
}
foundNotSpaceChar = true
}
if foundNotSpaceChar {
value = append(value, c)
s.progressColumn(ctx, 1)
if src[idx+1] != '"' {
s.progressColumn(ctx, 1)
}
} else {
idx += progress
s.progressColumn(ctx, progress)
Expand Down Expand Up @@ -569,20 +571,33 @@ func (s *Scanner) scanTag(ctx *Context) bool {
var progress int
for idx, c := range ctx.src[ctx.idx:] {
progress = idx + 1
ctx.addOriginBuf(c)
switch c {
case ' ':
ctx.addOriginBuf(c)
value := ctx.source(ctx.idx-1, ctx.idx+idx)
ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos()))
s.progressColumn(ctx, len([]rune(value)))
ctx.clear()
return true
case ',':
if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 {
value := ctx.source(ctx.idx-1, ctx.idx+idx)
ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos()))
s.progressColumn(ctx, len([]rune(value))-1) // progress column before collect-entry for scanning it at scanFlowEntry function.
ctx.clear()
return true
} else {
ctx.addOriginBuf(c)
}
case '\n', '\r':
ctx.addOriginBuf(c)
value := ctx.source(ctx.idx-1, ctx.idx+idx)
ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos()))
s.progressColumn(ctx, len([]rune(value))-1) // progress column before new-line-char for scanning new-line-char at scanNewLine function.
ctx.clear()
return true
default:
ctx.addOriginBuf(c)
}
}
s.progressColumn(ctx, progress)
Expand Down Expand Up @@ -853,6 +868,7 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool {
ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos()))
s.progressColumn(ctx, 3)
ctx.clear()
s.clearState()
return true
}

Expand Down Expand Up @@ -1031,7 +1047,9 @@ func (s *Scanner) scanMapKey(ctx *Context) bool {
return false
}

ctx.addToken(token.MappingKey(s.pos()))
tk := token.MappingKey(s.pos())
s.lastDelimColumn = tk.Position.Column
ctx.addToken(tk)
s.progressColumn(ctx, 1)
ctx.clear()
return true
Expand Down Expand Up @@ -1282,11 +1300,15 @@ func (s *Scanner) Init(text string) {
s.line = 1
s.column = 1
s.offset = 1
s.isFirstCharAtLine = true
s.clearState()
}

func (s *Scanner) clearState() {
s.prevLineIndentNum = 0
s.lastDelimColumn = 0
s.indentLevel = 0
s.indentNum = 0
s.isFirstCharAtLine = true
}

// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
Expand Down
11 changes: 1 addition & 10 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ var failureTestNames = []string{
"node-anchor-not-indented",
"plain-dashes-in-flow-sequence",
"plain-url-in-flow-mapping",
"question-mark-edge-cases/00", // panic
"question-mark-edge-cases/00",
"question-mark-edge-cases/01",
"scalar-doc-with-in-content/01",
"scalar-value-with-two-anchors",
Expand All @@ -91,9 +91,6 @@ var failureTestNames = []string{
"spec-example-7-3-completely-empty-flow-nodes",
"spec-example-8-18-implicit-block-mapping-entries",
"spec-example-8-19-compact-block-mappings",
"spec-example-2-25-unordered-sets",
"spec-example-2-26-ordered-mappings",
"spec-example-5-12-tabs-and-spaces",
"spec-example-6-19-secondary-tag-handle",
"spec-example-6-21-local-tag-prefix",
"spec-example-6-24-verbatim-tags",
Expand All @@ -104,16 +101,12 @@ var failureTestNames = []string{
"spec-example-6-8-flow-folding",
"spec-example-7-12-plain-lines",
"spec-example-7-19-single-pair-flow-mappings",
"spec-example-7-2-empty-content",
"spec-example-7-20-single-pair-explicit-entry",
"spec-example-7-24-flow-nodes",
"spec-example-7-6-double-quoted-lines",
"spec-example-7-9-single-quoted-lines",
"spec-example-8-10-folded-lines-8-13-final-empty-lines",
"spec-example-8-15-block-sequence-entry-types",
"spec-example-8-17-explicit-block-mapping-entries",
"spec-example-8-2-block-indentation-indicator",
"spec-example-8-22-block-collection-nodes",
"spec-example-9-3-bare-documents",
"spec-example-9-4-explicit-documents",
"spec-example-9-5-directives-documents",
Expand All @@ -133,8 +126,6 @@ var failureTestNames = []string{
"tabs-that-look-like-indentation/08",
"tag-shorthand-used-in-documents-but-only-defined-in-the-first",
"tags-for-block-objects",
"tags-for-flow-objects",
"tags-for-root-objects",
"tags-on-empty-scalars",
"trailing-line-of-spaces/01", // last '\n' character is needed ?
"various-combinations-of-explicit-block-mappings", // no json
Expand Down

0 comments on commit 01f4bac

Please sign in to comment.