Skip to content

Commit

Permalink
fix parsing of invalid yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy committed Nov 1, 2024
1 parent 3d452b5 commit 4d5be04
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
4 changes: 4 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,10 @@ func (p *parser) parseLiteral(ctx *context) (*ast.LiteralNode, error) {
p.progress(1) // skip literal/folded token

tk := p.currentToken()
if tk == nil {
node.Value = ast.String(token.New("", "", node.Start.Position))
return node, nil
}
var comment *ast.CommentGroupNode
if tk.Type == token.CommentType {
comment = p.parseCommentOnly(ctx)
Expand Down
20 changes: 20 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func TestParser(t *testing.T) {
"a_mk: \n bd: 3\n",
"a: :a",
"{a: , b: c}",
"value: >\n",
"value: >\n\n",
"value: >\nother:",
"value: >\n\nother:",
}
for _, src := range sources {
if _, err := parser.Parse(lexer.Tokenize(src), 0); err != nil {
Expand Down Expand Up @@ -811,6 +815,22 @@ b: - 2
[1:4] found invalid token
> 1 | a: "\"key\": \"value:\"
^
`,
},
{
`foo: [${should not be allowed}]`,
`
[1:8] ',' or ']' must be specified
> 1 | foo: [${should not be allowed}]
^
`,
},
{
`foo: [$[should not be allowed]]`,
`
[1:8] ',' or ']' must be specified
> 1 | foo: [$[should not be allowed]]
^
`,
},
}
Expand Down
23 changes: 21 additions & 2 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,11 +620,22 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) {
s.progressLine(ctx)
}

func (s *Scanner) isFlowMode() bool {
if s.startedFlowSequenceNum > 0 {
return true
}
if s.startedFlowMapNum > 0 {
return true
}
return false
}

func (s *Scanner) scanFlowMapStart(ctx *Context) bool {
if ctx.existsBuffer() {
if ctx.existsBuffer() && !s.isFlowMode() {
return false
}

s.addBufferedTokenIfExists(ctx)
ctx.addOriginBuf('{')
ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
s.startedFlowMapNum++
Expand All @@ -648,10 +659,11 @@ func (s *Scanner) scanFlowMapEnd(ctx *Context) bool {
}

func (s *Scanner) scanFlowArrayStart(ctx *Context) bool {
if ctx.existsBuffer() {
if ctx.existsBuffer() && !s.isFlowMode() {
return false
}

s.addBufferedTokenIfExists(ctx)
ctx.addOriginBuf('[')
ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
s.startedFlowSequenceNum++
Expand Down Expand Up @@ -946,6 +958,13 @@ func (s *Scanner) scan(ctx *Context) error {
}
if ctx.isDocument() {
if s.isChangedToIndentStateDown() {
if tk := ctx.lastToken(); tk != nil {
// If literal/folded content is empty, no string token is added.
// Therefore, add an empty string token.
if tk.Type != token.StringType {
ctx.addToken(token.String("", "", s.pos()))
}
}
s.breakLiteral(ctx)
} else {
s.scanLiteral(ctx, c)
Expand Down

0 comments on commit 4d5be04

Please sign in to comment.