From 0e95011a5dc7a437c31706dcf7840e0faa225ff0 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Mon, 9 Dec 2024 22:14:28 +0900 Subject: [PATCH] Fix parsing of multiline mapping key (#579) * fix multiline mapping key * remove passed test cases * add boundary checking --- parser/parser.go | 64 +++++++++++++++++++++++++++++------------ yaml_test_suite_test.go | 20 ++++++------- 2 files changed, 54 insertions(+), 30 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index b2367eb..959e366 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -241,9 +241,9 @@ func (p *parser) parseToken(ctx *context, tk *Token) (ast.Node, error) { case token.TagType: return p.parseTag(ctx) case token.MappingStartType: - return p.parseFlowMap(ctx) + return p.parseFlowMap(ctx.withFlow(true)) case token.SequenceStartType: - return p.parseFlowSequence(ctx) + return p.parseFlowSequence(ctx.withFlow(true)) case token.SequenceEntryType: return p.parseSequence(ctx) case token.SequenceEndType: @@ -588,7 +588,7 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error keyText := p.mapKeyText(scalar) keyPath := ctx.withChild(keyText).path key.SetPath(keyPath) - if err := p.validateMapKey(key.GetToken(), keyPath); err != nil { + if err := p.validateMapKey(ctx, key.GetToken(), keyPath, g.Last()); err != nil { return nil, err } p.pathMap[keyPath] = key @@ -609,14 +609,14 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error keyText := p.mapKeyText(key) keyPath := ctx.withChild(keyText).path key.SetPath(keyPath) - if err := p.validateMapKey(key.GetToken(), keyPath); err != nil { + if err := p.validateMapKey(ctx, key.GetToken(), keyPath, g.Last()); err != nil { return nil, err } p.pathMap[keyPath] = key return key, nil } -func (p *parser) validateMapKey(tk *token.Token, keyPath string) error { +func (p *parser) validateMapKey(ctx *context, tk *token.Token, keyPath string, colonTk *Token) error { if !p.allowDuplicateMapKey { if n, exists := p.pathMap[keyPath]; exists { pos := n.GetToken().Position @@ -626,29 +626,57 @@ func (p *parser) validateMapKey(tk *token.Token, keyPath string) error { ) } } - if tk.Type != token.StringType { + origin := p.removeLeftWhiteSpace(tk.Origin) + if ctx.isFlow { + if tk.Type == token.StringType { + origin = p.removeRightWhiteSpace(origin) + if tk.Position.Line+p.newLineCharacterNum(origin) != colonTk.Line() { + return errors.ErrSyntax("map key definition includes an implicit line break", tk) + } + } + return nil + } + if tk.Type != token.StringType && tk.Type != token.SingleQuoteType && tk.Type != token.DoubleQuoteType { return nil } - origin := p.removeLeftSideNewLineCharacter(tk.Origin) if p.existsNewLineCharacter(origin) { return errors.ErrSyntax("unexpected key name", tk) } return nil } -func (p *parser) removeLeftSideNewLineCharacter(src string) string { +func (p *parser) removeLeftWhiteSpace(src string) string { + // CR or LF or CRLF + return strings.TrimLeftFunc(src, func(r rune) bool { + return r == ' ' || r == '\r' || r == '\n' + }) +} + +func (p *parser) removeRightWhiteSpace(src string) string { // CR or LF or CRLF - return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") + return strings.TrimRightFunc(src, func(r rune) bool { + return r == ' ' || r == '\r' || r == '\n' + }) } func (p *parser) existsNewLineCharacter(src string) bool { - if strings.Index(src, "\n") > 0 { - return true - } - if strings.Index(src, "\r") > 0 { - return true + return p.newLineCharacterNum(src) > 0 +} + +func (p *parser) newLineCharacterNum(src string) int { + var num int + for i := 0; i < len(src); i++ { + switch src[i] { + case '\r': + if len(src) > i+1 && src[i+1] == '\n' { + i++ + } + num++ + case '\n': + num++ + } } - return false + return num } func (p *parser) mapKeyText(n ast.Node) string { @@ -900,7 +928,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) ( return nil, errors.ErrSyntax("could not find map", tk.RawToken()) } if tk.Type() == token.MappingStartType { - return p.parseFlowMap(ctx) + return p.parseFlowMap(ctx.withFlow(true)) } return p.parseMap(ctx) case token.IntegerTag, token.FloatTag, token.StringTag, token.BinaryTag, token.TimestampTag, token.BooleanTag, token.NullTag: @@ -917,7 +945,7 @@ func (p *parser) parseTagValue(ctx *context, tagRawTk *token.Token, tk *Token) ( return scalar, nil case token.SequenceTag, token.OrderedMapTag: if tk.Type() == token.SequenceStartType { - return p.parseFlowSequence(ctx) + return p.parseFlowSequence(ctx.withFlow(true)) } return p.parseSequence(ctx) } @@ -956,7 +984,7 @@ func (p *parser) parseFlowSequence(ctx *context) (*ast.SequenceNode, error) { break } - value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))).withFlow(true), ctx.currentToken()) + value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))), ctx.currentToken()) if err != nil { return nil, err } diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 16cdeab..187b39f 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -37,21 +37,17 @@ var failureTestNames = []string{ "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", "invalid-comma-in-tag", - "invalid-tag", // pass yamlv3. - "legal-tab-after-indentation", // pass yamlv3. - "literal-modifers/00", // pass yamlv3. - "literal-modifers/01", // pass yamlv3. - "literal-modifers/02", // pass yamlv3. - "literal-modifers/03", // pass yamlv3. - "literal-scalars", // pass yamlv3. - "mapping-key-and-flow-sequence-item-anchors", // no json. - "multiline-double-quoted-implicit-keys", // pass yamlv3. - "multiline-plain-flow-mapping-key", + "invalid-tag", // pass yamlv3. + "legal-tab-after-indentation", // pass yamlv3. + "literal-modifers/00", // pass yamlv3. + "literal-modifers/01", // pass yamlv3. + "literal-modifers/02", // pass yamlv3. + "literal-modifers/03", // pass yamlv3. + "literal-scalars", // pass yamlv3. + "mapping-key-and-flow-sequence-item-anchors", // no json. "multiline-plain-value-with-tabs-on-empty-lines", // pass yamlv3. "multiline-scalar-at-top-level", // pass yamlv3. "multiline-scalar-at-top-level-1-3", // pass yamlv3. - "multiline-single-quoted-implicit-keys", // pass yamlv3. - "multiline-unidented-double-quoted-block-key", // pass yamlv3. "nested-implicit-complex-keys", // no json. "node-anchor-not-indented", // pass yamlv3. "plain-dashes-in-flow-sequence",