Skip to content

Commit

Permalink
Fix parsing of multiline mapping key (#579)
Browse files Browse the repository at this point in the history
* fix multiline mapping key

* remove passed test cases

* add boundary checking
  • Loading branch information
goccy authored Dec 9, 2024
1 parent 438f2d0 commit 0e95011
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 30 deletions.
64 changes: 46 additions & 18 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ func (p *parser) parseToken(ctx *context, tk *Token) (ast.Node, error) {
case token.TagType:
return p.parseTag(ctx)
case token.MappingStartType:
return p.parseFlowMap(ctx)
return p.parseFlowMap(ctx.withFlow(true))
case token.SequenceStartType:
return p.parseFlowSequence(ctx)
return p.parseFlowSequence(ctx.withFlow(true))
case token.SequenceEntryType:
return p.parseSequence(ctx)
case token.SequenceEndType:
Expand Down Expand Up @@ -588,7 +588,7 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error
keyText := p.mapKeyText(scalar)
keyPath := ctx.withChild(keyText).path
key.SetPath(keyPath)
if err := p.validateMapKey(key.GetToken(), keyPath); err != nil {
if err := p.validateMapKey(ctx, key.GetToken(), keyPath, g.Last()); err != nil {
return nil, err
}
p.pathMap[keyPath] = key
Expand All @@ -609,14 +609,14 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error
keyText := p.mapKeyText(key)
keyPath := ctx.withChild(keyText).path
key.SetPath(keyPath)
if err := p.validateMapKey(key.GetToken(), keyPath); err != nil {
if err := p.validateMapKey(ctx, key.GetToken(), keyPath, g.Last()); err != nil {
return nil, err
}
p.pathMap[keyPath] = key
return key, nil
}

func (p *parser) validateMapKey(tk *token.Token, keyPath string) error {
func (p *parser) validateMapKey(ctx *context, tk *token.Token, keyPath string, colonTk *Token) error {
if !p.allowDuplicateMapKey {
if n, exists := p.pathMap[keyPath]; exists {
pos := n.GetToken().Position
Expand All @@ -626,29 +626,57 @@ func (p *parser) validateMapKey(tk *token.Token, keyPath string) error {
)
}
}
if tk.Type != token.StringType {
origin := p.removeLeftWhiteSpace(tk.Origin)
if ctx.isFlow {
if tk.Type == token.StringType {
origin = p.removeRightWhiteSpace(origin)
if tk.Position.Line+p.newLineCharacterNum(origin) != colonTk.Line() {
return errors.ErrSyntax("map key definition includes an implicit line break", tk)
}
}
return nil
}
if tk.Type != token.StringType && tk.Type != token.SingleQuoteType && tk.Type != token.DoubleQuoteType {
return nil
}
origin := p.removeLeftSideNewLineCharacter(tk.Origin)
if p.existsNewLineCharacter(origin) {
return errors.ErrSyntax("unexpected key name", tk)
}
return nil
}

func (p *parser) removeLeftSideNewLineCharacter(src string) string {
func (p *parser) removeLeftWhiteSpace(src string) string {
// CR or LF or CRLF
return strings.TrimLeftFunc(src, func(r rune) bool {
return r == ' ' || r == '\r' || r == '\n'
})
}

func (p *parser) removeRightWhiteSpace(src string) string {
// CR or LF or CRLF
return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n")
return strings.TrimRightFunc(src, func(r rune) bool {
return r == ' ' || r == '\r' || r == '\n'
})
}

func (p *parser) existsNewLineCharacter(src string) bool {
if strings.Index(src, "\n") > 0 {
return true
}
if strings.Index(src, "\r") > 0 {
return true
return p.newLineCharacterNum(src) > 0
}

func (p *parser) newLineCharacterNum(src string) int {
var num int
for i := 0; i < len(src); i++ {
switch src[i] {
case '\r':
if len(src) > i+1 && src[i+1] == '\n' {
i++
}
num++
case '\n':
num++
}
}
return false
return num
}

func (p *parser) mapKeyText(n ast.Node) string {
Expand Down Expand Up @@ -900,7 +928,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (
return nil, errors.ErrSyntax("could not find map", tk.RawToken())
}
if tk.Type() == token.MappingStartType {
return p.parseFlowMap(ctx)
return p.parseFlowMap(ctx.withFlow(true))
}
return p.parseMap(ctx)
case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag:
Expand All @@ -917,7 +945,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) (
return scalar, nil
case token.SequenceTag, token.OrderedMapTag:
if tk.Type() == token.SequenceStartType {
return p.parseFlowSequence(ctx)
return p.parseFlowSequence(ctx.withFlow(true))
}
return p.parseSequence(ctx)
}
Expand Down Expand Up @@ -956,7 +984,7 @@ func (p *parser) parseFlowSequence(ctx *context) (*ast.SequenceNode, error) {
break
}

value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))).withFlow(true), ctx.currentToken())
value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))), ctx.currentToken())
if err != nil {
return nil, err
}
Expand Down
20 changes: 8 additions & 12 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,17 @@ var failureTestNames = []string{
"invalid-comment-after-comma",
"invalid-comment-after-end-of-flow-sequence",
"invalid-comma-in-tag",
"invalid-tag", // pass yamlv3.
"legal-tab-after-indentation", // pass yamlv3.
"literal-modifers/00", // pass yamlv3.
"literal-modifers/01", // pass yamlv3.
"literal-modifers/02", // pass yamlv3.
"literal-modifers/03", // pass yamlv3.
"literal-scalars", // pass yamlv3.
"mapping-key-and-flow-sequence-item-anchors", // no json.
"multiline-double-quoted-implicit-keys", // pass yamlv3.
"multiline-plain-flow-mapping-key",
"invalid-tag", // pass yamlv3.
"legal-tab-after-indentation", // pass yamlv3.
"literal-modifers/00", // pass yamlv3.
"literal-modifers/01", // pass yamlv3.
"literal-modifers/02", // pass yamlv3.
"literal-modifers/03", // pass yamlv3.
"literal-scalars", // pass yamlv3.
"mapping-key-and-flow-sequence-item-anchors", // no json.
"multiline-plain-value-with-tabs-on-empty-lines", // pass yamlv3.
"multiline-scalar-at-top-level", // pass yamlv3.
"multiline-scalar-at-top-level-1-3", // pass yamlv3.
"multiline-single-quoted-implicit-keys", // pass yamlv3.
"multiline-unidented-double-quoted-block-key", // pass yamlv3.
"nested-implicit-complex-keys", // no json.
"node-anchor-not-indented", // pass yamlv3.
"plain-dashes-in-flow-sequence",
Expand Down

0 comments on commit 0e95011

Please sign in to comment.