diff --git a/parser/node.go b/parser/node.go index 7e872b4..c9ebefc 100644 --- a/parser/node.go +++ b/parser/node.go @@ -1,7 +1,10 @@ package parser import ( + "fmt" + "github.com/goccy/go-yaml/ast" + "github.com/goccy/go-yaml/internal/errors" "github.com/goccy/go-yaml/token" ) @@ -162,6 +165,58 @@ func newSequenceNode(ctx *context, tk *Token, isFlow bool) (*ast.SequenceNode, e return node, nil } +func newTagDefaultScalarValueNode(ctx *context, tag *token.Token) (ast.ScalarNode, error) { + pos := *(tag.Position) + pos.Column++ + + var ( + tk *Token + node ast.ScalarNode + ) + switch token.ReservedTagKeyword(tag.Value) { + case token.IntegerTag: + tk = &Token{Token: token.New("0", "0", &pos)} + n, err := newIntegerNode(ctx, tk) + if err != nil { + return nil, err + } + node = n + case token.FloatTag: + tk = &Token{Token: token.New("0", "0", &pos)} + n, err := newFloatNode(ctx, tk) + if err != nil { + return nil, err + } + node = n + case token.StringTag, token.BinaryTag, token.TimestampTag: + tk = &Token{Token: token.New("", "", &pos)} + n, err := newStringNode(ctx, tk) + if err != nil { + return nil, err + } + node = n + case token.BooleanTag: + tk = &Token{Token: token.New("false", "false", &pos)} + n, err := newBoolNode(ctx, tk) + if err != nil { + return nil, err + } + node = n + case token.NullTag: + tk = &Token{Token: token.New("null", "null", &pos)} + n, err := newNullNode(ctx, tk) + if err != nil { + return nil, err + } + node = n + default: + return nil, errors.ErrSyntax(fmt.Sprintf("cannot assign default value for %q tag", tag.Value), tag) + } + ctx.insertToken(tk) + ctx.goNext() + return node, nil +} + func setLineComment(ctx *context, node ast.Node, tk *Token) error { if tk.LineComment == nil { return nil diff --git a/parser/parser.go b/parser/parser.go index eab0fd8..eb96408 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -283,6 +283,10 @@ func (p *parser) parseScalarValue(ctx *context, tk *Token) (ast.ScalarNode, erro return newNanNode(ctx, tk) case token.StringType, token.SingleQuoteType, token.DoubleQuoteType: return newStringNode(ctx, tk) + case token.TagType: + // this case applies when it is a scalar tag and its value does not exist. + // Examples of cases where the value does not exist include cases like `key: !!str,` or `!!str : value`. + return p.parseScalarTag(ctx) } return nil, errors.ErrSyntax("unexpected scalar value type", tk.RawToken()) } @@ -526,15 +530,11 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error if g.Type != TokenGroupMapKey { return nil, errors.ErrSyntax("unexpected map key", g.RawToken()) } - if g.Last().Type() != token.MappingValueType { - return nil, errors.ErrSyntax("expected map key-value delimiter ':'", g.Last().RawToken()) - } if g.First().Type() == token.MappingKeyType { mapKeyTk := g.First() - if mapKeyTk.Group == nil { - return nil, errors.ErrSyntax("could not find value for mapping key", mapKeyTk.RawToken()) + if mapKeyTk.Group != nil { + ctx = ctx.withGroup(mapKeyTk.Group) } - ctx := ctx.withGroup(mapKeyTk.Group) key, err := newMappingKeyNode(ctx, mapKeyTk) if err != nil { return nil, err @@ -558,6 +558,9 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error p.pathMap[keyPath] = key return key, nil } + if g.Last().Type() != token.MappingValueType { + return nil, errors.ErrSyntax("expected map key-value delimiter ':'", g.Last().RawToken()) + } scalar, err := p.parseScalarValue(ctx, g.First()) if err != nil { @@ -789,6 +792,20 @@ func (p *parser) parseLiteral(ctx *context) (*ast.LiteralNode, error) { return node, nil } +func (p *parser) parseScalarTag(ctx *context) (*ast.TagNode, error) { + tag, err := p.parseTag(ctx) + if err != nil { + return nil, err + } + if tag.Value == nil { + return nil, errors.ErrSyntax("specified not scalar tag", tag.GetToken()) + } + if _, ok := tag.Value.(ast.ScalarNode); !ok { + return nil, errors.ErrSyntax("specified not scalar tag", tag.GetToken()) + } + return tag, nil +} + func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) { tagTk := ctx.currentToken() tagRawTk := tagTk.RawToken() @@ -815,7 +832,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) ( return newNullNode(ctx, ctx.createNullToken(&Token{Token: tagRawTk})) } switch token.ReservedTagKeyword(tagRawTk.Value) { - case token.MappingTag, token.OrderedMapTag: + case token.MappingTag, token.SetTag: if !p.isMapToken(tk) { return nil, errors.ErrSyntax("could not find map", tk.RawToken()) } @@ -826,6 +843,8 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) ( case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: if tk.GroupType() == TokenGroupLiteral || tk.GroupType() == TokenGroupFolded { return p.parseLiteral(ctx.withGroup(tk.Group)) + } else if tk.Type() == token.CollectEntryType || tk.Type() == token.MappingValueType { + return newTagDefaultScalarValueNode(ctx, tagRawTk) } scalar, err := p.parseScalarValue(ctx, tk) if err != nil { @@ -833,8 +852,11 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) ( } ctx.goNext() return scalar, nil - case token.SequenceTag, token.SetTag: - return nil, errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagRawTk.Value), tagRawTk) + case token.SequenceTag, token.OrderedMapTag: + if tk.Type() == token.SequenceStartType { + return p.parseFlowSequence(ctx) + } + return p.parseSequence(ctx) } if strings.HasPrefix(tagRawTk.Value, "!!") { return nil, errors.ErrSyntax(fmt.Sprintf("unknown secondary tag name %q specified", tagRawTk.Value), tagRawTk) diff --git a/scanner/scanner.go b/scanner/scanner.go index f702f6d..501534e 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -256,7 +256,7 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) { isNewLine = true s.progressLine(ctx) continue - } else if c == ' ' && isFirstLineChar { + } else if isFirstLineChar && (c == ' ' || c == '\t') { continue } else if c != '\'' { value = append(value, c) @@ -340,7 +340,7 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { isNewLine = true s.progressLine(ctx) continue - } else if c == ' ' && isFirstLineChar { + } else if isFirstLineChar && (c == ' ' || c == '\t') { continue } else if c == '\\' { isFirstLineChar = false @@ -463,14 +463,16 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { progress++ continue } - if src[i] == '\n' { + if s.isNewLineChar(src[i]) { break } foundNotSpaceChar = true } if foundNotSpaceChar { value = append(value, c) - s.progressColumn(ctx, 1) + if src[idx+1] != '"' { + s.progressColumn(ctx, 1) + } } else { idx += progress s.progressColumn(ctx, progress) @@ -569,20 +571,33 @@ func (s *Scanner) scanTag(ctx *Context) bool { var progress int for idx, c := range ctx.src[ctx.idx:] { progress = idx + 1 - ctx.addOriginBuf(c) switch c { case ' ': + ctx.addOriginBuf(c) value := ctx.source(ctx.idx-1, ctx.idx+idx) ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, len([]rune(value))) ctx.clear() return true + case ',': + if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 { + value := ctx.source(ctx.idx-1, ctx.idx+idx) + ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) + s.progressColumn(ctx, len([]rune(value))-1) // progress column before collect-entry for scanning it at scanFlowEntry function. + ctx.clear() + return true + } else { + ctx.addOriginBuf(c) + } case '\n', '\r': + ctx.addOriginBuf(c) value := ctx.source(ctx.idx-1, ctx.idx+idx) ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, len([]rune(value))-1) // progress column before new-line-char for scanning new-line-char at scanNewLine function. ctx.clear() return true + default: + ctx.addOriginBuf(c) } } s.progressColumn(ctx, progress) @@ -853,6 +868,7 @@ func (s *Scanner) scanDocumentStart(ctx *Context) bool { ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos())) s.progressColumn(ctx, 3) ctx.clear() + s.clearState() return true } @@ -1031,7 +1047,9 @@ func (s *Scanner) scanMapKey(ctx *Context) bool { return false } - ctx.addToken(token.MappingKey(s.pos())) + tk := token.MappingKey(s.pos()) + s.lastDelimColumn = tk.Position.Column + ctx.addToken(tk) s.progressColumn(ctx, 1) ctx.clear() return true @@ -1282,11 +1300,15 @@ func (s *Scanner) Init(text string) { s.line = 1 s.column = 1 s.offset = 1 + s.isFirstCharAtLine = true + s.clearState() +} + +func (s *Scanner) clearState() { s.prevLineIndentNum = 0 s.lastDelimColumn = 0 s.indentLevel = 0 s.indentNum = 0 - s.isFirstCharAtLine = true } // Scan scans the next token and returns the token collection. The source end is indicated by io.EOF. diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index bd35255..5a54112 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -79,7 +79,7 @@ var failureTestNames = []string{ "node-anchor-not-indented", "plain-dashes-in-flow-sequence", "plain-url-in-flow-mapping", - "question-mark-edge-cases/00", // panic + "question-mark-edge-cases/00", "question-mark-edge-cases/01", "scalar-doc-with-in-content/01", "scalar-value-with-two-anchors", @@ -91,9 +91,6 @@ var failureTestNames = []string{ "spec-example-7-3-completely-empty-flow-nodes", "spec-example-8-18-implicit-block-mapping-entries", "spec-example-8-19-compact-block-mappings", - "spec-example-2-25-unordered-sets", - "spec-example-2-26-ordered-mappings", - "spec-example-5-12-tabs-and-spaces", "spec-example-6-19-secondary-tag-handle", "spec-example-6-21-local-tag-prefix", "spec-example-6-24-verbatim-tags", @@ -104,16 +101,12 @@ var failureTestNames = []string{ "spec-example-6-8-flow-folding", "spec-example-7-12-plain-lines", "spec-example-7-19-single-pair-flow-mappings", - "spec-example-7-2-empty-content", "spec-example-7-20-single-pair-explicit-entry", "spec-example-7-24-flow-nodes", - "spec-example-7-6-double-quoted-lines", - "spec-example-7-9-single-quoted-lines", "spec-example-8-10-folded-lines-8-13-final-empty-lines", "spec-example-8-15-block-sequence-entry-types", "spec-example-8-17-explicit-block-mapping-entries", "spec-example-8-2-block-indentation-indicator", - "spec-example-8-22-block-collection-nodes", "spec-example-9-3-bare-documents", "spec-example-9-4-explicit-documents", "spec-example-9-5-directives-documents", @@ -133,8 +126,6 @@ var failureTestNames = []string{ "tabs-that-look-like-indentation/08", "tag-shorthand-used-in-documents-but-only-defined-in-the-first", "tags-for-block-objects", - "tags-for-flow-objects", - "tags-for-root-objects", "tags-on-empty-scalars", "trailing-line-of-spaces/01", // last '\n' character is needed ? "various-combinations-of-explicit-block-mappings", // no json