From 9e58f7eeda83f7c5aede71e28205863b61476ff0 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Tue, 12 Nov 2024 00:04:07 +0900 Subject: [PATCH] fix parsing of escaped new-line-character or white-space in double quoted text --- lexer/lexer_test.go | 37 ++++++++++++++++++++++++++++++++++++- scanner/scanner.go | 20 +++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 9cae3f17..b57fff41 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -550,6 +550,41 @@ func TestTokenize(t *testing.T) { }, }, }, + { + YAML: ` +a: + "bbb \ + ccc + + ddd eee\n\ + \ \ fff ggg\nhhh iii\n + jjj kkk + " +`, + Tokens: token.Tokens{ + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "a", + Origin: "\na", + }, + { + Type: token.MappingValueType, + CharacterType: token.CharacterTypeIndicator, + Indicator: token.BlockStructureIndicator, + Value: ":", + Origin: ":", + }, + { + Type: token.DoubleQuoteType, + CharacterType: token.CharacterTypeIndicator, + Indicator: token.QuotedScalarIndicator, + Value: "bbb ccc\nddd eee\n fff ggg\nhhh iii\n jjj kkk ", + Origin: "\n \"bbb \\\n ccc\n\n ddd eee\\n\\\n \\ \\ fff ggg\\nhhh iii\\n\n jjj kkk\n \"", + }, + }, + }, { YAML: `v: null `, @@ -2948,7 +2983,7 @@ foo2: 'bar2'`, { line: 1, column: 6, - value: "test bar", + value: "test\n\n\n\nbar", }, { line: 7, diff --git a/scanner/scanner.go b/scanner/scanner.go index 746b8c78..8739bf58 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -294,7 +294,15 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { c := src[idx] ctx.addOriginBuf(c) if s.isNewLineChar(c) { - value = append(value, ' ') + if isFirstLineChar { + if value[len(value)-1] == ' ' { + value[len(value)-1] = '\n' + } else { + value = append(value, '\n') + } + } else { + value = append(value, ' ') + } isFirstLineChar = true isNewLine = true s.progressLine(ctx) @@ -388,6 +396,16 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { progress = 1 ctx.addOriginBuf(nextChar) value = append(value, c) + case '\n': + isFirstLineChar = true + isNewLine = true + ctx.addOriginBuf(nextChar) + s.progressColumn(ctx, 1) + s.progressLine(ctx) + idx++ + continue + case ' ': + // skip escape character. default: value = append(value, c) }