From bf7faf19b83d0f2be66f394ef4b03323de61466a Mon Sep 17 00:00:00 2001 From: Matthew Warman Date: Thu, 14 Dec 2023 18:11:19 +0000 Subject: [PATCH 1/2] feat: preserve line breaks from original input --- ast/ast.go | 61 ++++++++- parser/parser_test.go | 292 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 339 insertions(+), 14 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index b4d5ec41..b04a41a2 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -278,6 +278,49 @@ func readNode(p []byte, node Node) (int, error) { return size, nil } +func checkLineBreak(t *token.Token) bool { + if t.Prev != nil { + lbc := "\n" + prev := t.Prev + var adjustment int + // if the previous type is sequence entry user the previous type for that + if prev.Type == token.SequenceEntryType { + // as well as switching to previous type count any new lines in origin to account for: + // - + // b: c + adjustment = strings.Count(strings.TrimRight(t.Origin, lbc), lbc) + if prev.Prev != nil { + prev = prev.Prev + } + } + lineDiff := t.Position.Line - prev.Position.Line - 1 + if lineDiff > 0 { + if prev.Type == token.StringType { + // Remove any line breaks included in multiline string + adjustment += strings.Count(strings.TrimRight(strings.TrimSpace(prev.Origin), lbc), lbc) + } + // Due to the way that comment parsing works its assumed that when a null value does not have new line in origin + // it was squashed therefore difference is ignored. + //foo: + // bar: + // # comment + // baz: 1 + //becomes + //foo: + // bar: null # comment + // + // baz: 1 + if prev.Type == token.NullType { + return strings.Count(prev.Origin, lbc) > 0 + } + if lineDiff-adjustment > 0 { + return true + } + } + } + return false +} + // Null create node for null value func Null(tk *token.Token) *NullNode { return &NullNode{ @@ -1390,6 +1433,9 @@ func (n *MappingValueNode) String() string { func (n *MappingValueNode) toString() string { space := strings.Repeat(" ", n.Key.GetToken().Position.Column-1) + if checkLineBreak(n.Key.GetToken()) { + space = fmt.Sprintf("%s%s", "\n", space) + } keyIndentLevel := n.Key.GetToken().Position.IndentLevel valueIndentLevel := n.Value.GetToken().Position.IndentLevel keyComment := n.Key.GetComment() @@ -1563,6 +1609,11 @@ func (n *SequenceNode) blockStyleString() string { for idx, value := range n.Values { valueStr := value.String() + newLinePrefix := "" + if strings.HasPrefix(valueStr, "\n") { + valueStr = valueStr[1:] + newLinePrefix = "\n" + } splittedValues := strings.Split(valueStr, "\n") trimmedFirstValue := strings.TrimLeft(splittedValues[0], " ") diffLength := len(splittedValues[0]) - len(trimmedFirstValue) @@ -1585,9 +1636,10 @@ func (n *SequenceNode) blockStyleString() string { } newValue := strings.Join(newValues, "\n") if len(n.ValueHeadComments) == len(n.Values) && n.ValueHeadComments[idx] != nil { - values = append(values, n.ValueHeadComments[idx].StringWithSpace(n.Start.Position.Column-1)) + values = append(values, fmt.Sprintf("%s%s", newLinePrefix, n.ValueHeadComments[idx].StringWithSpace(n.Start.Position.Column-1))) + newLinePrefix = "" } - values = append(values, fmt.Sprintf("%s- %s", space, newValue)) + values = append(values, fmt.Sprintf("%s%s- %s", newLinePrefix, space, newValue)) } if n.FootComment != nil { values = append(values, n.FootComment.StringWithSpace(n.Start.Position.Column-1)) @@ -1880,10 +1932,13 @@ func (n *CommentGroupNode) StringWithSpace(col int) string { values := []string{} space := strings.Repeat(" ", col) for _, comment := range n.Comments { + space := space + if checkLineBreak(comment.Token) { + space = fmt.Sprintf("%s%s", "\n", space) + } values = append(values, space+comment.String()) } return strings.Join(values, "\n") - } // MarshalYAML encodes to a YAML text diff --git a/parser/parser_test.go b/parser/parser_test.go index 8d697e8c..db280935 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -592,6 +592,10 @@ i: 'j' if err != nil { t.Fatalf("%+v", err) } + got := f.String() + if got != strings.TrimPrefix(test.expect, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(test.expect, "\n"), got) + } var v Visitor for _, doc := range f.Docs { ast.Walk(&v, doc.Body) @@ -605,6 +609,268 @@ i: 'j' } } +func TestParseWhitespace(t *testing.T) { + tests := []struct { + source string + expect string + }{ + { + ` +a: b + +c: d + + +e: f +g: h +`, + ` +a: b + +c: d + +e: f +g: h +`, + }, + { + ` +a: + - b: c + d: e + + - f: g + h: i +`, + ` +a: + - b: c + d: e + + - f: g + h: i +`, + }, + { + ` +a: + - b: c + d: e + + - f: g + h: i +`, + ` +a: + - b: c + d: e + + - f: g + h: i +`, + }, + { + ` +a: +- b: c + d: e + +- f: g + h: i +`, + ` +a: +- b: c + d: e + +- f: g + h: i +`, + }, + { + ` +a: +# comment 1 +- b: c + d: e + +# comment 2 +- f: g + h: i +`, + ` +a: +# comment 1 +- b: c + d: e + +# comment 2 +- f: g + h: i +`, + }, + { + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: g + h: i # comment 5 +`, + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: g + h: i # comment 5 +`, + }, + { + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: | + g + g + h: i # comment 5 +`, + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: | + g + g + h: i # comment 5 +`, + }, + { + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: | + asd + def + + h: i # comment 5 +`, + ` +a: + # comment 1 + - b: c + # comment 2 + d: e + + # comment 3 + # comment 4 + - f: | + asd + def + + h: i # comment 5 +`, + }, + { + ` +- b: c + d: e + +- f: g + h: i # comment 4 + `, + ` +- b: c + d: e + +- f: g + h: i # comment 4 +`, + }, + { + ` +a: null +b: null + +d: e +`, + ` +a: null +b: null + +d: e +`, + }, + { + ` +foo: + bar: null # comment + + baz: 1 +`, + ` +foo: + bar: null # comment + + baz: 1 +`, + }, + { + ` +foo: + bar: null # comment + +baz: 1 +`, + ` +foo: + bar: null # comment + +baz: 1 +`, + }, + } + + for _, test := range tests { + t.Run(test.source, func(t *testing.T) { + f, err := parser.ParseBytes([]byte(test.source), parser.ParseComments) + if err != nil { + t.Fatal(err) + } + got := f.String() + if got != strings.TrimPrefix(test.expect, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(test.expect, "\n"), got) + } + }) + } +} + func TestNewLineChar(t *testing.T) { for _, f := range []string{ "lf.yml", @@ -617,10 +883,11 @@ func TestNewLineChar(t *testing.T) { } actual := fmt.Sprintf("%v", ast) expect := `a: "a" + b: 1 ` if expect != actual { - t.Fatal("unexpected result") + t.Fatalf("unexpected result\nexpected:\n%s\ngot:\n%s", expect, actual) } } } @@ -827,8 +1094,9 @@ foo: if len(f.Docs) != 1 { t.Fatal("failed to parse content with same line comment") } - if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") { - t.Fatal("failed to parse comment") + got := f.Docs[0].String() + if got != strings.TrimPrefix(expected, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got) } }) t.Run("next line", func(t *testing.T) { @@ -849,8 +1117,9 @@ foo: if len(f.Docs) != 1 { t.Fatal("failed to parse content with next line comment") } - if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") { - t.Fatal("failed to parse comment") + got := f.Docs[0].String() + if got != strings.TrimPrefix(expected, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got) } }) t.Run("next line and different indent", func(t *testing.T) { @@ -870,8 +1139,9 @@ baz: 1` foo: bar: null # comment baz: 1` - if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") { - t.Fatal("failed to parse comment") + got := f.Docs[0].String() + if got != strings.TrimPrefix(expected, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got) } }) } @@ -897,8 +1167,9 @@ foo: - bar: 1 baz: - xxx` - if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") { - t.Fatal("failed to parse comment") + got := f.Docs[0].String() + if got != strings.TrimPrefix(expected, "\n") { + t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got) } t.Run("foo[0].bar", func(t *testing.T) { path, err := yaml.PathString("$.foo[0].bar") @@ -999,8 +1270,7 @@ func (c *pathCapturer) Visit(node ast.Node) ast.Visitor { return c } -type Visitor struct { -} +type Visitor struct{} func (v *Visitor) Visit(node ast.Node) ast.Visitor { tk := node.GetToken() From 7e17e11db6c46e974cd6540af30564c17be505e1 Mon Sep 17 00:00:00 2001 From: Matthew Warman Date: Fri, 1 Nov 2024 11:58:37 +0000 Subject: [PATCH 2/2] docs: fix spelling error in comment Co-authored-by: Hilmar Falkenberg --- ast/ast.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ast/ast.go b/ast/ast.go index b04a41a2..1f3e86a8 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -283,7 +283,7 @@ func checkLineBreak(t *token.Token) bool { lbc := "\n" prev := t.Prev var adjustment int - // if the previous type is sequence entry user the previous type for that + // if the previous type is sequence entry use the previous type for that if prev.Type == token.SequenceEntryType { // as well as switching to previous type count any new lines in origin to account for: // -