diff --git a/parser/parser_test.go b/parser/parser_test.go index 8d697e8c..3dc94401 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -83,6 +83,9 @@ func TestParser(t *testing.T) { }`, "\"a\": a\n\"b\": b", "'a': a\n'b': b", + "a: \r\n b: 1\r\n", + "a_ok: \r bc: 2\r", + "a_mk: \n bd: 3\n", } for _, src := range sources { if _, err := parser.Parse(lexer.Tokenize(src), 0); err != nil { diff --git a/scanner/scanner.go b/scanner/scanner.go index b0eac48d..444eb4ae 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -621,6 +621,16 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) { } } + // There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec. + // > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character. + // > Outside scalar content, YAML allows any line break to be used to terminate lines. + // > -- https://yaml.org/spec/1.2/spec.html + if c == '\r' && ctx.nextChar() == '\n' { + ctx.addOriginBuf('\r') + ctx.progress(1) + c = '\n' + } + if ctx.isEOS() { s.addBufferedTokenIfExists(ctx) } else if s.isAnchor { @@ -840,15 +850,6 @@ func (s *Scanner) scan(ctx *Context) (pos int) { return } case '\r', '\n': - // There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec. - // > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character. - // > Outside scalar content, YAML allows any line break to be used to terminate lines. - // > -- https://yaml.org/spec/1.2/spec.html - if c == '\r' && ctx.nextChar() == '\n' { - ctx.addOriginBuf('\r') - ctx.progress(1) - c = '\n' - } s.scanNewLine(ctx, c) continue case ' ':