From dbcc3e20dc4484c1ce0d9c1b956dd56bc4936d37 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 22 Dec 2019 11:50:07 +0900 Subject: [PATCH 1/4] Add test case of parser for CRLF --- parser/parser_test.go | 18 ++++++++++++++++++ parser/testdata/cr.yml | 1 + parser/testdata/crlf.yml | 2 ++ parser/testdata/lf.yml | 2 ++ 4 files changed, 23 insertions(+) create mode 100644 parser/testdata/cr.yml create mode 100644 parser/testdata/crlf.yml create mode 100644 parser/testdata/lf.yml diff --git a/parser/parser_test.go b/parser/parser_test.go index 272c5601..2deddb25 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,6 +2,8 @@ package parser_test import ( "fmt" + "io/ioutil" + "path/filepath" "strings" "testing" @@ -490,6 +492,22 @@ c: d } } +func TestNewLineChar(t *testing.T) { + for _, f := range []string{ + "lf.yml", + "cr.yml", + "crlf.yml", + } { + file, err := ioutil.ReadFile(filepath.Join("testdata", f)) + if err != nil { + t.Fatalf("%+v", err) + } + if _, err := parser.ParseBytes(file, 0); err != nil { + t.Fatalf("%+v", err) + } + } +} + func TestSyntaxError(t *testing.T) { sources := []string{ "a:\n- b\n c: d\n e: f\n g: h", diff --git a/parser/testdata/cr.yml b/parser/testdata/cr.yml new file mode 100644 index 00000000..c6b698a5 --- /dev/null +++ b/parser/testdata/cr.yml @@ -0,0 +1 @@ +a: 1 b: 2 \ No newline at end of file diff --git a/parser/testdata/crlf.yml b/parser/testdata/crlf.yml new file mode 100644 index 00000000..c796cb32 --- /dev/null +++ b/parser/testdata/crlf.yml @@ -0,0 +1,2 @@ +a: 1 +b: 2 diff --git a/parser/testdata/lf.yml b/parser/testdata/lf.yml new file mode 100644 index 00000000..083c5aec --- /dev/null +++ b/parser/testdata/lf.yml @@ -0,0 +1,2 @@ +a: 1 +b: 2 From 90c6152d4bc6a051532cb349f41cb64aba5eb07a Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 22 Dec 2019 11:59:24 +0900 Subject: [PATCH 2/4] Fix test case --- parser/parser_test.go | 10 +++++++++- parser/testdata/cr.yml | 2 +- parser/testdata/crlf.yml | 4 ++-- parser/testdata/lf.yml | 4 ++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index 2deddb25..59ea24d7 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -502,9 +502,17 @@ func TestNewLineChar(t *testing.T) { if err != nil { t.Fatalf("%+v", err) } - if _, err := parser.ParseBytes(file, 0); err != nil { + ast, err := parser.ParseBytes(file, 0) + if err != nil { t.Fatalf("%+v", err) } + actual := fmt.Sprintf("%v\n", ast) + expect := `a: "a" +b: 1 +` + if expect != actual { + t.Fatal("unexpected result") + } } } diff --git a/parser/testdata/cr.yml b/parser/testdata/cr.yml index c6b698a5..8e558db1 100644 --- a/parser/testdata/cr.yml +++ b/parser/testdata/cr.yml @@ -1 +1 @@ -a: 1 b: 2 \ No newline at end of file +a: "a" b: 1 \ No newline at end of file diff --git a/parser/testdata/crlf.yml b/parser/testdata/crlf.yml index c796cb32..0f25504e 100644 --- a/parser/testdata/crlf.yml +++ b/parser/testdata/crlf.yml @@ -1,2 +1,2 @@ -a: 1 -b: 2 +a: "a" +b: 1 diff --git a/parser/testdata/lf.yml b/parser/testdata/lf.yml index 083c5aec..416d8ba0 100644 --- a/parser/testdata/lf.yml +++ b/parser/testdata/lf.yml @@ -1,2 +1,2 @@ -a: 1 -b: 2 +a: "a" +b: 1 From 0459fbb51ef96504e42c76b8fc9ac020708ac3bc Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 22 Dec 2019 11:59:49 +0900 Subject: [PATCH 3/4] Fix scanner and parser for CR character --- parser/parser.go | 18 ++++++++++++++++-- scanner/scanner.go | 28 +++++++++++++++++++--------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 83c08877..75734cc7 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -74,12 +74,26 @@ func (p *parser) parseTag(ctx *context) (ast.Node, error) { return node, nil } +func (p *parser) removeLeftSideNewLineCharacter(src string) string { + return strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n") +} + +func (p *parser) existsNewLineCharacter(src string) bool { + if strings.Index(src, "\n") > 0 { + return true + } + if strings.Index(src, "\r") > 0 { + return true + } + return false +} + func (p *parser) validateMapKey(tk *token.Token) error { if tk.Type != token.StringType { return nil } - origin := strings.TrimLeft(tk.Origin, "\n") - if strings.Index(origin, "\n") > 0 { + origin := p.removeLeftSideNewLineCharacter(tk.Origin) + if p.existsNewLineCharacter(origin) { return errors.ErrSyntax("unexpected key name", tk) } return nil diff --git a/scanner/scanner.go b/scanner/scanner.go index d8bb770a..33d47e62 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -101,8 +101,18 @@ func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool { return false } +func (s *Scanner) isNewLineChar(c rune) bool { + if c == '\n' { + return true + } + if c == '\r' { + return true + } + return false +} + func (s *Scanner) updateIndent(ctx *Context, c rune) { - if s.isFirstCharAtLine && c == '\n' && ctx.isDocument() { + if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() { return } if s.isFirstCharAtLine && c == ' ' { @@ -199,7 +209,7 @@ func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) { pos = idx + 1 ctx.addOriginBuf(c) switch c { - case ' ', '\n': + case ' ', '\n', '\r': value := ctx.source(ctx.idx-1, ctx.idx+idx) tk = token.Tag(value, string(ctx.obuf), s.pos()) pos = len([]rune(value)) @@ -216,7 +226,7 @@ func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) { pos = idx + 1 ctx.addOriginBuf(c) switch c { - case '\n': + case '\n', '\r': if ctx.previousChar() == '\\' { continue } @@ -237,7 +247,7 @@ func (s *Scanner) scanLiteral(ctx *Context, c rune) { ctx.addToken(token.New(value, string(ctx.obuf), s.pos())) ctx.resetBuffer() s.progressColumn(ctx, 1) - } else if c == '\n' { + } else if s.isNewLineChar(c) { if ctx.isLiteral { ctx.addBuf(c) } else { @@ -266,7 +276,7 @@ func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) { pos = idx ctx.addOriginBuf(c) switch c { - case '\n': + case '\n', '\r': value := ctx.source(ctx.idx, ctx.idx+idx) opt := strings.TrimRight(value, " ") switch opt { @@ -337,8 +347,8 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } else if s.isChangedToIndentStateDown() { s.addBufferedTokenIfExists(ctx) } else if s.isChangedToIndentStateEqual() { - // if first character is \n, buffer expect to raw folded literal - if len(ctx.obuf) > 0 && ctx.obuf[0] != '\n' { + // if first character is new line character, buffer expect to raw folded literal + if len(ctx.obuf) > 0 && !s.isNewLineChar(ctx.obuf[0]) { // doesn't raw folded literal s.addBufferedTokenIfExists(ctx) } @@ -436,7 +446,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { } case ':': nc := ctx.nextChar() - if nc == ' ' || nc == '\n' || ctx.isNextEOS() { + if nc == ' ' || s.isNewLineChar(nc) || ctx.isNextEOS() { // mapping value tk := s.bufferedToken(ctx) if tk != nil { @@ -463,7 +473,7 @@ func (s *Scanner) scan(ctx *Context) (pos int) { token, progress := s.scanTag(ctx) ctx.addToken(token) s.progressColumn(ctx, progress) - if c := ctx.previousChar(); c == '\n' { + if c := ctx.previousChar(); s.isNewLineChar(c) { s.progressLine(ctx) } pos += progress From 825e932247ea62f9e811761764de49329b6df8e7 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sun, 22 Dec 2019 12:08:45 +0900 Subject: [PATCH 4/4] Fix test case ( use parser.ParseFile ) --- parser/parser_test.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index 59ea24d7..73a89ffa 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2,7 +2,6 @@ package parser_test import ( "fmt" - "io/ioutil" "path/filepath" "strings" "testing" @@ -498,11 +497,7 @@ func TestNewLineChar(t *testing.T) { "cr.yml", "crlf.yml", } { - file, err := ioutil.ReadFile(filepath.Join("testdata", f)) - if err != nil { - t.Fatalf("%+v", err) - } - ast, err := parser.ParseBytes(file, 0) + ast, err := parser.ParseFile(filepath.Join("testdata", f), 0) if err != nil { t.Fatalf("%+v", err) }