From 5e7288e620be69f47881838103f6c7d683d4082f Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 29 Nov 2024 11:00:49 +0900 Subject: [PATCH 1/4] add "no json" marker --- yaml_test_suite_test.go | 54 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 5a54112..dc82826 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -14,12 +14,12 @@ import ( ) var failureTestNames = []string{ - "anchors-on-empty-scalars", - "aliases-in-flow-objects", - "aliases-in-explicit-block-mapping", + "anchors-on-empty-scalars", // no json. + "aliases-in-flow-objects", // no json. + "aliases-in-explicit-block-mapping", // no json. "aliases-in-implicit-block-mapping", "bare-document-after-document-end-marker", - "block-mapping-with-missing-keys", + "block-mapping-with-missing-keys", // no json. "block-mapping-with-missing-values", "block-mapping-with-multiline-scalars", "block-scalar-with-more-spaces-than-first-content-line", @@ -36,10 +36,10 @@ var failureTestNames = []string{ "escaped-slash-in-double-quotes", "explicit-key-and-value-seperated-by-comment", //nolint: misspell "extra-words-on-yaml-directive", - "empty-implicit-key-in-single-pair-flow-sequences", - "empty-keys-in-block-and-flow-mapping", - "empty-lines-at-end-of-document", - "flow-mapping-separate-values", + "empty-implicit-key-in-single-pair-flow-sequences", // no json. + "empty-keys-in-block-and-flow-mapping", // no json. + "empty-lines-at-end-of-document", // no json. + "flow-mapping-separate-values", // no json. "flow-sequence-in-flow-mapping", "flow-collections-over-many-lines/01", "flow-mapping-colon-on-line-after-key/02", @@ -47,7 +47,7 @@ var failureTestNames = []string{ "flow-sequence-with-invalid-comma-at-the-beginning", "folded-block-scalar", "folded-block-scalar-1-3", - "implicit-flow-mapping-key-on-one-line", + "implicit-flow-mapping-key-on-one-line", // no json. "invalid-anchor-in-zero-indented-sequence", "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", @@ -65,7 +65,7 @@ var failureTestNames = []string{ "literal-modifers/02", "literal-modifers/03", "literal-scalars", - "mapping-key-and-flow-sequence-item-anchors", + "mapping-key-and-flow-sequence-item-anchors", // no json. "mapping-starting-at-line", "mapping-with-anchor-on-document-start-line", "multiline-double-quoted-implicit-keys", @@ -75,22 +75,22 @@ var failureTestNames = []string{ "multiline-scalar-at-top-level-1-3", "multiline-single-quoted-implicit-keys", "multiline-unidented-double-quoted-block-key", - "nested-implicit-complex-keys", + "nested-implicit-complex-keys", // no json. "node-anchor-not-indented", "plain-dashes-in-flow-sequence", "plain-url-in-flow-mapping", - "question-mark-edge-cases/00", - "question-mark-edge-cases/01", + "question-mark-edge-cases/00", // no json. + "question-mark-edge-cases/01", // no json. "scalar-doc-with-in-content/01", "scalar-value-with-two-anchors", - "single-character-streams/01", - "single-pair-implicit-entries", - "spec-example-2-11-mapping-between-sequences", - "spec-example-6-12-separation-spaces", - "spec-example-7-16-flow-mapping-entries", - "spec-example-7-3-completely-empty-flow-nodes", - "spec-example-8-18-implicit-block-mapping-entries", - "spec-example-8-19-compact-block-mappings", + "single-character-streams/01", // no json. + "single-pair-implicit-entries", // no json. + "spec-example-2-11-mapping-between-sequences", // no json. + "spec-example-6-12-separation-spaces", // no json. + "spec-example-7-16-flow-mapping-entries", // no json. + "spec-example-7-3-completely-empty-flow-nodes", // no json. + "spec-example-8-18-implicit-block-mapping-entries", // no json. + "spec-example-8-19-compact-block-mappings", // no json. "spec-example-6-19-secondary-tag-handle", "spec-example-6-21-local-tag-prefix", "spec-example-6-24-verbatim-tags", @@ -112,7 +112,7 @@ var failureTestNames = []string{ "spec-example-9-5-directives-documents", "spec-example-9-6-stream", "spec-example-9-6-stream-1-3", - "syntax-character-edge-cases/00", + "syntax-character-edge-cases/00", // no json. "tab-at-beginning-of-line-followed-by-a-flow-mapping", "tab-indented-top-flow", "tabs-in-various-contexts/003", @@ -126,14 +126,14 @@ var failureTestNames = []string{ "tabs-that-look-like-indentation/08", "tag-shorthand-used-in-documents-but-only-defined-in-the-first", "tags-for-block-objects", - "tags-on-empty-scalars", + "tags-on-empty-scalars", // no json. "trailing-line-of-spaces/01", // last '\n' character is needed ? - "various-combinations-of-explicit-block-mappings", // no json - "various-trailing-comments", // no json - "various-trailing-comments-1-3", // no json + "various-combinations-of-explicit-block-mappings", // no json. + "various-trailing-comments", // no json. + "various-trailing-comments-1-3", // no json. "wrong-indented-flow-sequence", // error ? "wrong-indented-multiline-quoted-scalar", // error ? - "zero-indented-sequences-in-explicit-mapping-keys", // no json + "zero-indented-sequences-in-explicit-mapping-keys", // no json. } var failureTestNameMap map[string]struct{} From 76e669a3a222af570c0b8af38c5d5c9e7611315f Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 29 Nov 2024 20:42:47 +0900 Subject: [PATCH 2/4] fix document separator in quote --- parser/parser.go | 9 +++++++++ scanner/scanner.go | 33 +++++++++++++++++++++++++++++++-- yaml_test_suite_test.go | 3 --- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index eb96408..20908f7 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -670,6 +670,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token) return anchor, nil } + if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName { + // key: + // &anchor + // + // key: + // &anchor + return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken()) + } + if tk.Column() < keyCol { // in this case, // ---- diff --git a/scanner/scanner.go b/scanner/scanner.go index 501534e..bc8701f 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -255,6 +255,11 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) { isFirstLineChar = true isNewLine = true s.progressLine(ctx) + if idx+1 < size { + if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil { + return nil, err + } + } continue } else if isFirstLineChar && (c == ' ' || c == '\t') { continue @@ -262,8 +267,7 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) { value = append(value, c) isFirstLineChar = false continue - } - if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' { + } else if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' { // '' handle as ' character value = append(value, c) ctx.addOriginBuf(c) @@ -339,6 +343,11 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { isFirstLineChar = true isNewLine = true s.progressLine(ctx) + if idx+1 < size { + if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil { + return nil, err + } + } continue } else if isFirstLineChar && (c == ' ' || c == '\t') { continue @@ -495,6 +504,26 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { ) } +func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error { + if len(src) < 3 { + return nil + } + var marker string + if len(src) == 3 { + marker = string(src) + } else { + marker = strings.TrimRightFunc(string(src[:4]), func(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' + }) + } + if marker == "---" || marker == "..." { + return ErrInvalidToken( + token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()), + ) + } + return nil +} + func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) { if ctx.existsBuffer() { return false, nil diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index dc82826..8be712e 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -48,11 +48,8 @@ var failureTestNames = []string{ "folded-block-scalar", "folded-block-scalar-1-3", "implicit-flow-mapping-key-on-one-line", // no json. - "invalid-anchor-in-zero-indented-sequence", "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", - "invalid-document-end-marker-in-single-quoted-string", - "invalid-document-start-marker-in-doublequoted-tring", "invalid-escape-in-double-quoted-string", "invalid-tag", "key-with-anchor-after-missing-explicit-mapping-value", From 352b034bdcfa3cc2f54d5cf92796faf0c322143e Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 29 Nov 2024 20:57:53 +0900 Subject: [PATCH 3/4] fix invalid escape character --- decode_test.go | 2 +- lexer/lexer_test.go | 2 +- scanner/scanner.go | 12 +++++++++++- yaml_test_suite_test.go | 1 - 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/decode_test.go b/decode_test.go index 9be730e..85b7712 100644 --- a/decode_test.go +++ b/decode_test.go @@ -810,7 +810,7 @@ func TestDecoder(t *testing.T) { }, { "a: \"\\0\"\n", - map[string]string{"a": "\\0"}, + map[string]string{"a": "\x00"}, }, { "b: 2\na: 1\nd: 4\nc: 3\nsub:\n e: 5\n", diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 7179517..052c00a 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -1347,7 +1347,7 @@ t4: 2098-01-09T10:40:47Z Type: token.DoubleQuoteType, CharacterType: token.CharacterTypeIndicator, Indicator: token.QuotedScalarIndicator, - Value: "\\0", + Value: "\x00", Origin: " \"\\0\"", }, }, diff --git a/scanner/scanner.go b/scanner/scanner.go index bc8701f..897610b 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -454,10 +454,20 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) { progress = 1 ctx.addOriginBuf(nextChar) value = append(value, nextChar) + case '0': + progress = 1 + ctx.addOriginBuf(nextChar) + value = append(value, '\x00') case ' ': // skip escape character. default: - value = append(value, c) + s.progressColumn(ctx, 1) + return nil, ErrInvalidToken( + token.Invalid( + fmt.Sprintf("found unknown escape character %q", nextChar), + string(ctx.obuf), s.pos(), + ), + ) } idx += progress s.progressColumn(ctx, progress) diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 8be712e..b76420f 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -50,7 +50,6 @@ var failureTestNames = []string{ "implicit-flow-mapping-key-on-one-line", // no json. "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", - "invalid-escape-in-double-quoted-string", "invalid-tag", "key-with-anchor-after-missing-explicit-mapping-value", "leading-tabs-in-double-quoted/02", From 341f0bd3b1ebd0f77ec1c1e51ca7981b524c6fb7 Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Fri, 29 Nov 2024 23:55:27 +0900 Subject: [PATCH 4/4] fix parser --- parser/token.go | 10 ++++++++++ scanner/scanner.go | 3 ++- yaml_test_suite_test.go | 4 ---- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/parser/token.go b/parser/token.go index 9897492..2fd3127 100644 --- a/parser/token.go +++ b/parser/token.go @@ -573,6 +573,16 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) { }, }), nil } + if tokens[i].Line() == tokens[i+1].Line() { + switch tokens[i+1].GroupType() { + case TokenGroupMapKey, TokenGroupMapKeyValue: + return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken()) + } + switch tokens[i+1].Type() { + case token.SequenceEntryType: + return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken()) + } + } tks, err := createDocumentTokens(tokens[i+1:]) if err != nil { return nil, err diff --git a/scanner/scanner.go b/scanner/scanner.go index 897610b..428854a 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -758,7 +758,7 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) { if ctx.isEOS() { s.addBufferedTokenIfExists(ctx) - } else if s.isAnchor || s.isAlias { + } else if s.isAnchor || s.isAlias || s.isDirective { s.addBufferedTokenIfExists(ctx) } if ctx.existsBuffer() && s.isFirstCharAtLine { @@ -922,6 +922,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool { return false } + s.addBufferedTokenIfExists(ctx) ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos())) s.progressColumn(ctx, 3) ctx.clear() diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index b76420f..27782ba 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -51,7 +51,6 @@ var failureTestNames = []string{ "invalid-comment-after-comma", "invalid-comment-after-end-of-flow-sequence", "invalid-tag", - "key-with-anchor-after-missing-explicit-mapping-value", "leading-tabs-in-double-quoted/02", "leading-tabs-in-double-quoted/05", "legal-tab-after-indentation", @@ -62,8 +61,6 @@ var failureTestNames = []string{ "literal-modifers/03", "literal-scalars", "mapping-key-and-flow-sequence-item-anchors", // no json. - "mapping-starting-at-line", - "mapping-with-anchor-on-document-start-line", "multiline-double-quoted-implicit-keys", "multiline-plain-flow-mapping-key", "multiline-plain-value-with-tabs-on-empty-lines", @@ -88,7 +85,6 @@ var failureTestNames = []string{ "spec-example-8-18-implicit-block-mapping-entries", // no json. "spec-example-8-19-compact-block-mappings", // no json. "spec-example-6-19-secondary-tag-handle", - "spec-example-6-21-local-tag-prefix", "spec-example-6-24-verbatim-tags", "spec-example-6-28-non-specific-tags", "spec-example-6-4-line-prefixes",