From 68f0bb59fd9334ec4a51f620eef45aa876275b5f Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Sat, 14 Dec 2024 22:57:55 +0900 Subject: [PATCH] Fix parsing of invalid tag character (#591) * remove passed test cases * fix invalid tag * fix error text --- parser/parser_test.go | 8 ++--- scanner/context.go | 2 +- scanner/scanner.go | 25 ++++++++++----- yaml_test_suite_test.go | 67 ++++++++++++++++++++--------------------- 4 files changed, 55 insertions(+), 47 deletions(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index 85c9482..8a80925 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1236,7 +1236,7 @@ b: - 2 { ">\n>", ` -[2:1] could not find multi line content +[2:1] could not find multi-line content 1 | > > 2 | > ^ @@ -1245,7 +1245,7 @@ b: - 2 { ">\n1", ` -[2:1] could not find multi line content +[2:1] could not find multi-line content 1 | > > 2 | 1 ^ @@ -1254,7 +1254,7 @@ b: - 2 { "|\n1", ` -[2:1] could not find multi line content +[2:1] could not find multi-line content 1 | | > 2 | 1 ^ @@ -1263,7 +1263,7 @@ b: - 2 { "a: >3\n 1", ` -[2:3] invalid number of indent is specified in the multi line header +[2:3] invalid number of indent is specified in the multi-line header 1 | a: >3 > 2 | 1 ^ diff --git a/scanner/context.go b/scanner/context.go index 06e1fc4..4f3250b 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -168,7 +168,7 @@ func (s *MultiLineState) validateIndentColumn() error { return nil } if s.firstLineIndentColumn > s.lineIndentColumn { - return errors.New("invalid number of indent is specified in the multi line header") + return errors.New("invalid number of indent is specified in the multi-line header") } return nil } diff --git a/scanner/scanner.go b/scanner/scanner.go index 4cd07e9..4ef05e3 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -650,9 +650,9 @@ func (s *Scanner) isMergeKey(ctx *Context) bool { return false } -func (s *Scanner) scanTag(ctx *Context) bool { +func (s *Scanner) scanTag(ctx *Context) (bool, error) { if ctx.existsBuffer() || s.isDirective { - return false + return false, nil } ctx.addOriginBuf('!') @@ -668,14 +668,14 @@ func (s *Scanner) scanTag(ctx *Context) bool { ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, len([]rune(value))) ctx.clear() - return true + return true, nil case ',': if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 { value := ctx.source(ctx.idx-1, ctx.idx+idx) ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, len([]rune(value))-1) // progress column before collect-entry for scanning it at scanFlowEntry function. ctx.clear() - return true + return true, nil } else { ctx.addOriginBuf(c) } @@ -685,14 +685,19 @@ func (s *Scanner) scanTag(ctx *Context) bool { ctx.addToken(token.Tag(value, string(ctx.obuf), s.pos())) s.progressColumn(ctx, len([]rune(value))-1) // progress column before new-line-char for scanning new-line-char at scanNewLine function. ctx.clear() - return true + return true, nil + case '{', '}': + ctx.addOriginBuf(c) + s.progressColumn(ctx, progress) + invalidTk := token.Invalid(fmt.Sprintf("found invalid tag character %q", c), string(ctx.obuf), s.pos()) + return false, ErrInvalidToken(invalidTk) default: ctx.addOriginBuf(c) } } s.progressColumn(ctx, progress) ctx.clear() - return true + return true, nil } func (s *Scanner) scanComment(ctx *Context) bool { @@ -1277,7 +1282,7 @@ func (s *Scanner) scan(ctx *Context) error { if tk.Position.Column == 1 { return ErrInvalidToken( token.Invalid( - "could not find multi line content", + "could not find multi-line content", string(ctx.obuf), s.pos(), ), ) @@ -1354,7 +1359,11 @@ func (s *Scanner) scan(ctx *Context) error { continue } case '!': - if s.scanTag(ctx) { + scanned, err := s.scanTag(ctx) + if err != nil { + return err + } + if scanned { continue } case '%': diff --git a/yaml_test_suite_test.go b/yaml_test_suite_test.go index 8bccd88..ca52dbc 100644 --- a/yaml_test_suite_test.go +++ b/yaml_test_suite_test.go @@ -14,32 +14,17 @@ import ( ) var failureTestNames = []string{ - "anchors-on-empty-scalars", // no json. - "aliases-in-flow-objects", // no json. - "aliases-in-explicit-block-mapping", // no json. - "block-mapping-with-missing-keys", // no json. - "colon-at-the-beginning-of-adjacent-flow-scalar", - "comment-without-whitespace-after-doublequoted-scalar", - "construct-binary", - "dash-in-flow-sequence", + "anchors-on-empty-scalars", // no json. + "aliases-in-flow-objects", // no json. + "aliases-in-explicit-block-mapping", // no json. + "block-mapping-with-missing-keys", // no json. "empty-implicit-key-in-single-pair-flow-sequences", // no json. "empty-keys-in-block-and-flow-mapping", // no json. "empty-lines-at-end-of-document", // no json. "flow-mapping-separate-values", // no json. - "flow-sequence-in-flow-mapping", - "flow-collections-over-many-lines/01", - "flow-mapping-colon-on-line-after-key/02", - "flow-mapping-edge-cases", - "implicit-flow-mapping-key-on-one-line", // no json. - "invalid-comment-after-comma", - "invalid-comment-after-end-of-flow-sequence", - "invalid-comma-in-tag", - "invalid-tag", // pass yamlv3. - "legal-tab-after-indentation", // pass yamlv3. - "mapping-key-and-flow-sequence-item-anchors", // no json. - "multiline-plain-value-with-tabs-on-empty-lines", // pass yamlv3. - "nested-implicit-complex-keys", // no json. - "plain-dashes-in-flow-sequence", + "implicit-flow-mapping-key-on-one-line", // no json. + "mapping-key-and-flow-sequence-item-anchors", // no json. + "nested-implicit-complex-keys", // no json. "question-mark-edge-cases/00", // no json. "question-mark-edge-cases/01", // no json. "single-character-streams/01", // no json. @@ -50,12 +35,32 @@ var failureTestNames = []string{ "spec-example-7-3-completely-empty-flow-nodes", // no json. "spec-example-8-18-implicit-block-mapping-entries", // no json. "spec-example-8-19-compact-block-mappings", // no json. - "spec-example-8-2-block-indentation-indicator", + "syntax-character-edge-cases/00", // no json. + "tags-on-empty-scalars", // no json. + "various-combinations-of-explicit-block-mappings", // no json. + "various-trailing-comments", // no json. + "various-trailing-comments-1-3", // no json. + "zero-indented-sequences-in-explicit-mapping-keys", // no json. + + "legal-tab-after-indentation", // pass yamlv3. + "multiline-plain-value-with-tabs-on-empty-lines", // pass yamlv3. + "tabs-that-look-like-indentation/05", // pass yamlv3. + + "colon-at-the-beginning-of-adjacent-flow-scalar", + "comment-without-whitespace-after-doublequoted-scalar", + "construct-binary", + "dash-in-flow-sequence", + "flow-sequence-in-flow-mapping", + "flow-collections-over-many-lines/01", + "flow-mapping-colon-on-line-after-key/02", + "flow-mapping-edge-cases", + "invalid-comment-after-comma", + "invalid-comment-after-end-of-flow-sequence", + "invalid-comma-in-tag", + "plain-dashes-in-flow-sequence", "spec-example-9-3-bare-documents", - "spec-example-9-4-explicit-documents", "spec-example-9-6-stream", "spec-example-9-6-stream-1-3", - "syntax-character-edge-cases/00", // no json. "tab-at-beginning-of-line-followed-by-a-flow-mapping", "tab-indented-top-flow", "tabs-in-various-contexts/003", @@ -63,17 +68,11 @@ var failureTestNames = []string{ "tabs-that-look-like-indentation/01", "tabs-that-look-like-indentation/03", "tabs-that-look-like-indentation/04", - "tabs-that-look-like-indentation/05", // pass yamlv3. "tabs-that-look-like-indentation/07", "tag-shorthand-used-in-documents-but-only-defined-in-the-first", - "tags-on-empty-scalars", // no json. - "trailing-line-of-spaces/01", // last '\n' character is needed ? - "various-combinations-of-explicit-block-mappings", // no json. - "various-trailing-comments", // no json. - "various-trailing-comments-1-3", // no json. - "wrong-indented-flow-sequence", // error ? - "wrong-indented-multiline-quoted-scalar", // error ? - "zero-indented-sequences-in-explicit-mapping-keys", // no json. + "trailing-line-of-spaces/01", // last '\n' character is needed ? + "wrong-indented-flow-sequence", // error ? + "wrong-indented-multiline-quoted-scalar", // error ? } var failureTestNameMap map[string]struct{}