From bf7faf19b83d0f2be66f394ef4b03323de61466a Mon Sep 17 00:00:00 2001
From: Matthew Warman <mcwarman@gmail.com>
Date: Thu, 14 Dec 2023 18:11:19 +0000
Subject: [PATCH 1/2] feat: preserve line breaks from original input

---
 ast/ast.go            |  61 ++++++++-
 parser/parser_test.go | 292 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 339 insertions(+), 14 deletions(-)

diff --git a/ast/ast.go b/ast/ast.go
index b4d5ec41..b04a41a2 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -278,6 +278,49 @@ func readNode(p []byte, node Node) (int, error) {
 	return size, nil
 }
 
+func checkLineBreak(t *token.Token) bool {
+	if t.Prev != nil {
+		lbc := "\n"
+		prev := t.Prev
+		var adjustment int
+		// if the previous type is sequence entry user the previous type for that
+		if prev.Type == token.SequenceEntryType {
+			// as well as switching to previous type count any new lines in origin to account for:
+			// -
+			//   b: c
+			adjustment = strings.Count(strings.TrimRight(t.Origin, lbc), lbc)
+			if prev.Prev != nil {
+				prev = prev.Prev
+			}
+		}
+		lineDiff := t.Position.Line - prev.Position.Line - 1
+		if lineDiff > 0 {
+			if prev.Type == token.StringType {
+				// Remove any line breaks included in multiline string
+				adjustment += strings.Count(strings.TrimRight(strings.TrimSpace(prev.Origin), lbc), lbc)
+			}
+			// Due to the way that comment parsing works its assumed that when a null value does not have new line in origin
+			// it was squashed therefore difference is ignored.
+			//foo:
+			//  bar:
+			//  # comment
+			//  baz: 1
+			//becomes
+			//foo:
+			//  bar: null # comment
+			//
+			//  baz: 1
+			if prev.Type == token.NullType {
+				return strings.Count(prev.Origin, lbc) > 0
+			}
+			if lineDiff-adjustment > 0 {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 // Null create node for null value
 func Null(tk *token.Token) *NullNode {
 	return &NullNode{
@@ -1390,6 +1433,9 @@ func (n *MappingValueNode) String() string {
 
 func (n *MappingValueNode) toString() string {
 	space := strings.Repeat(" ", n.Key.GetToken().Position.Column-1)
+	if checkLineBreak(n.Key.GetToken()) {
+		space = fmt.Sprintf("%s%s", "\n", space)
+	}
 	keyIndentLevel := n.Key.GetToken().Position.IndentLevel
 	valueIndentLevel := n.Value.GetToken().Position.IndentLevel
 	keyComment := n.Key.GetComment()
@@ -1563,6 +1609,11 @@ func (n *SequenceNode) blockStyleString() string {
 
 	for idx, value := range n.Values {
 		valueStr := value.String()
+		newLinePrefix := ""
+		if strings.HasPrefix(valueStr, "\n") {
+			valueStr = valueStr[1:]
+			newLinePrefix = "\n"
+		}
 		splittedValues := strings.Split(valueStr, "\n")
 		trimmedFirstValue := strings.TrimLeft(splittedValues[0], " ")
 		diffLength := len(splittedValues[0]) - len(trimmedFirstValue)
@@ -1585,9 +1636,10 @@ func (n *SequenceNode) blockStyleString() string {
 		}
 		newValue := strings.Join(newValues, "\n")
 		if len(n.ValueHeadComments) == len(n.Values) && n.ValueHeadComments[idx] != nil {
-			values = append(values, n.ValueHeadComments[idx].StringWithSpace(n.Start.Position.Column-1))
+			values = append(values, fmt.Sprintf("%s%s", newLinePrefix, n.ValueHeadComments[idx].StringWithSpace(n.Start.Position.Column-1)))
+			newLinePrefix = ""
 		}
-		values = append(values, fmt.Sprintf("%s- %s", space, newValue))
+		values = append(values, fmt.Sprintf("%s%s- %s", newLinePrefix, space, newValue))
 	}
 	if n.FootComment != nil {
 		values = append(values, n.FootComment.StringWithSpace(n.Start.Position.Column-1))
@@ -1880,10 +1932,13 @@ func (n *CommentGroupNode) StringWithSpace(col int) string {
 	values := []string{}
 	space := strings.Repeat(" ", col)
 	for _, comment := range n.Comments {
+		space := space
+		if checkLineBreak(comment.Token) {
+			space = fmt.Sprintf("%s%s", "\n", space)
+		}
 		values = append(values, space+comment.String())
 	}
 	return strings.Join(values, "\n")
-
 }
 
 // MarshalYAML encodes to a YAML text
diff --git a/parser/parser_test.go b/parser/parser_test.go
index 8d697e8c..db280935 100644
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -592,6 +592,10 @@ i: 'j'
 			if err != nil {
 				t.Fatalf("%+v", err)
 			}
+			got := f.String()
+			if got != strings.TrimPrefix(test.expect, "\n") {
+				t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(test.expect, "\n"), got)
+			}
 			var v Visitor
 			for _, doc := range f.Docs {
 				ast.Walk(&v, doc.Body)
@@ -605,6 +609,268 @@ i: 'j'
 	}
 }
 
+func TestParseWhitespace(t *testing.T) {
+	tests := []struct {
+		source string
+		expect string
+	}{
+		{
+			`
+a: b
+
+c: d
+
+
+e: f
+g: h
+`,
+			`
+a: b
+
+c: d
+
+e: f
+g: h
+`,
+		},
+		{
+			`
+a:
+  - b: c
+    d: e
+
+  - f: g
+    h: i
+`,
+			`
+a:
+  - b: c
+    d: e
+
+  - f: g
+    h: i
+`,
+		},
+		{
+			`
+a:
+  - b: c
+    d: e
+
+  - f: g
+    h: i
+`,
+			`
+a:
+  - b: c
+    d: e
+
+  - f: g
+    h: i
+`,
+		},
+		{
+			`
+a:
+- b: c
+  d: e
+
+- f: g
+  h: i
+`,
+			`
+a:
+- b: c
+  d: e
+
+- f: g
+  h: i
+`,
+		},
+		{
+			`
+a:
+# comment 1
+- b: c
+  d: e
+
+# comment 2
+- f: g
+  h: i
+`,
+			`
+a:
+# comment 1
+- b: c
+  d: e
+
+# comment 2
+- f: g
+  h: i
+`,
+		},
+		{
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: g
+    h: i # comment 5
+`,
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: g
+    h: i # comment 5
+`,
+		},
+		{
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: |
+      g
+      g
+    h: i # comment 5
+`,
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: |
+      g
+      g
+    h: i # comment 5
+`,
+		},
+		{
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: |
+      asd
+      def
+
+    h: i # comment 5
+`,
+			`
+a:
+  # comment 1
+  - b: c
+    # comment 2
+    d: e
+
+  # comment 3
+  # comment 4
+  - f: |
+      asd
+      def
+
+    h: i # comment 5
+`,
+		},
+		{
+			`
+- b: c
+  d: e
+
+- f: g
+  h: i # comment 4
+		`,
+			`
+- b: c
+  d: e
+
+- f: g
+  h: i # comment 4
+`,
+		},
+		{
+			`
+a: null
+b: null
+
+d: e
+`,
+			`
+a: null
+b: null
+
+d: e
+`,
+		},
+		{
+			`
+foo:
+  bar: null # comment
+
+  baz: 1
+`,
+			`
+foo:
+  bar: null # comment
+
+  baz: 1
+`,
+		},
+		{
+			`
+foo:
+  bar: null # comment
+
+baz: 1
+`,
+			`
+foo:
+  bar: null # comment
+
+baz: 1
+`,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.source, func(t *testing.T) {
+			f, err := parser.ParseBytes([]byte(test.source), parser.ParseComments)
+			if err != nil {
+				t.Fatal(err)
+			}
+			got := f.String()
+			if got != strings.TrimPrefix(test.expect, "\n") {
+				t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(test.expect, "\n"), got)
+			}
+		})
+	}
+}
+
 func TestNewLineChar(t *testing.T) {
 	for _, f := range []string{
 		"lf.yml",
@@ -617,10 +883,11 @@ func TestNewLineChar(t *testing.T) {
 		}
 		actual := fmt.Sprintf("%v", ast)
 		expect := `a: "a"
+
 b: 1
 `
 		if expect != actual {
-			t.Fatal("unexpected result")
+			t.Fatalf("unexpected result\nexpected:\n%s\ngot:\n%s", expect, actual)
 		}
 	}
 }
@@ -827,8 +1094,9 @@ foo:
 		if len(f.Docs) != 1 {
 			t.Fatal("failed to parse content with same line comment")
 		}
-		if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") {
-			t.Fatal("failed to parse comment")
+		got := f.Docs[0].String()
+		if got != strings.TrimPrefix(expected, "\n") {
+			t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got)
 		}
 	})
 	t.Run("next line", func(t *testing.T) {
@@ -849,8 +1117,9 @@ foo:
 		if len(f.Docs) != 1 {
 			t.Fatal("failed to parse content with next line comment")
 		}
-		if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") {
-			t.Fatal("failed to parse comment")
+		got := f.Docs[0].String()
+		if got != strings.TrimPrefix(expected, "\n") {
+			t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got)
 		}
 	})
 	t.Run("next line and different indent", func(t *testing.T) {
@@ -870,8 +1139,9 @@ baz: 1`
 foo:
   bar: null # comment
 baz: 1`
-		if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") {
-			t.Fatal("failed to parse comment")
+		got := f.Docs[0].String()
+		if got != strings.TrimPrefix(expected, "\n") {
+			t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got)
 		}
 	})
 }
@@ -897,8 +1167,9 @@ foo:
   - bar: 1
 baz:
   - xxx`
-	if f.Docs[0].String() != strings.TrimPrefix(expected, "\n") {
-		t.Fatal("failed to parse comment")
+	got := f.Docs[0].String()
+	if got != strings.TrimPrefix(expected, "\n") {
+		t.Fatalf("failed to parse comment:\nexpected:\n%s\ngot:\n%s", strings.TrimPrefix(expected, "\n"), got)
 	}
 	t.Run("foo[0].bar", func(t *testing.T) {
 		path, err := yaml.PathString("$.foo[0].bar")
@@ -999,8 +1270,7 @@ func (c *pathCapturer) Visit(node ast.Node) ast.Visitor {
 	return c
 }
 
-type Visitor struct {
-}
+type Visitor struct{}
 
 func (v *Visitor) Visit(node ast.Node) ast.Visitor {
 	tk := node.GetToken()

From 7e17e11db6c46e974cd6540af30564c17be505e1 Mon Sep 17 00:00:00 2001
From: Matthew Warman <mcwarman@gmail.com>
Date: Fri, 1 Nov 2024 11:58:37 +0000
Subject: [PATCH 2/2] docs: fix spelling error in comment

Co-authored-by: Hilmar Falkenberg <hilmar.falkenberg@sap.com>
---
 ast/ast.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ast/ast.go b/ast/ast.go
index b04a41a2..1f3e86a8 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -283,7 +283,7 @@ func checkLineBreak(t *token.Token) bool {
 		lbc := "\n"
 		prev := t.Prev
 		var adjustment int
-		// if the previous type is sequence entry user the previous type for that
+		// if the previous type is sequence entry use the previous type for that
 		if prev.Type == token.SequenceEntryType {
 			// as well as switching to previous type count any new lines in origin to account for:
 			// -