From 354153650f3fbd6a3d117f0353bf9ae791041818 Mon Sep 17 00:00:00 2001 From: Charith Ellawala Date: Mon, 29 Apr 2024 13:22:03 +0100 Subject: [PATCH] fix: Parse indented newlines correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the case where an indented newline causes invalid parse results. Consider the following input where the second line contains an indented new line. ```yaml a:·b␊ ···␊ c:·d␊ ``` Before this fix, the parser produces the following: ```yaml a: null b c: d ``` With this fix, the output is as expected. ```yaml a: b c: d ``` Signed-off-by: Charith Ellawala --- parser/parser_test.go | 26 ++++++++++++++++++++------ parser/testdata/indented_new_line.yml | 4 ++++ scanner/context.go | 23 +++++++++++++++-------- 3 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 parser/testdata/indented_new_line.yml diff --git a/parser/parser_test.go b/parser/parser_test.go index 8d697e8c..3eebb2ff 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -180,7 +180,8 @@ a: 0 - 1 a: 0 - 1 `, }, - {` + { + ` - a: b: c d: e @@ -456,7 +457,7 @@ d: eeeeeeeeeeeeeeeee }, { ` -a: b +a: b c `, ` @@ -465,7 +466,7 @@ a: b c }, { ` -a: +a: b: c `, ` @@ -475,7 +476,7 @@ a: }, { ` -a: b +a: b c: d `, ` @@ -625,6 +626,20 @@ b: 1 } } +func TestIndentedNewLine(t *testing.T) { + ast, err := parser.ParseFile(filepath.Join("testdata", "indented_new_line.yml"), 0) + if err != nil { + t.Fatalf("%+v", err) + } + actual := fmt.Sprintf("%v", ast) + expect := `a: b +c: d +` + if expect != actual { + t.Fatalf("Expected:\n%s\n\nActual:\n%s\n", expect, actual) + } +} + func TestSyntaxError(t *testing.T) { tests := []struct { source string @@ -999,8 +1014,7 @@ func (c *pathCapturer) Visit(node ast.Node) ast.Visitor { return c } -type Visitor struct { -} +type Visitor struct{} func (v *Visitor) Visit(node ast.Node) ast.Visitor { tk := node.GetToken() diff --git a/parser/testdata/indented_new_line.yml b/parser/testdata/indented_new_line.yml new file mode 100644 index 00000000..cf55fc41 --- /dev/null +++ b/parser/testdata/indented_new_line.yml @@ -0,0 +1,4 @@ +# The line following a:b has 3 spaces +a: b + +c: d diff --git a/scanner/context.go b/scanner/context.go index 3aaec561..fa74a96e 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -25,13 +25,11 @@ type Context struct { literalOpt string } -var ( - ctxPool = sync.Pool{ - New: func() interface{} { - return createContext() - }, - } -) +var ctxPool = sync.Pool{ + New: func() interface{} { + return createContext() + }, +} func createContext() *Context { return &Context{ @@ -101,7 +99,7 @@ func (c *Context) addBuf(r rune) { func (c *Context) addOriginBuf(r rune) { c.obuf = append(c.obuf, r) - if r != ' ' && r != '\t' { + if r != ' ' && r != '\t' && r != '\n' { c.notSpaceOrgCharPos = len(c.obuf) } } @@ -110,6 +108,15 @@ func (c *Context) removeRightSpaceFromBuf() int { trimmedBuf := c.obuf[:c.notSpaceOrgCharPos] buflen := len(trimmedBuf) diff := len(c.obuf) - buflen + + // only calculate the space chopped up to the first newline + for i := c.notSpaceOrgCharPos; i < len(c.obuf); i++ { + if c.obuf[i] == '\n' || c.obuf[i] == '\r' { + diff = i - c.notSpaceOrgCharPos + break + } + } + if diff > 0 { c.obuf = c.obuf[:buflen] c.buf = c.bufferedSrc()