Skip to content

Commit

Permalink
Fix parser (#554)
Browse files Browse the repository at this point in the history
* add "no json" marker

* fix document separator in quote

* fix invalid escape character

* fix parser
  • Loading branch information
goccy authored Nov 30, 2024
1 parent 01f4bac commit c5254d7
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 41 deletions.
2 changes: 1 addition & 1 deletion decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ func TestDecoder(t *testing.T) {
},
{
"a: \"\\0\"\n",
map[string]string{"a": "\\0"},
map[string]string{"a": "\x00"},
},
{
"b: 2\na: 1\nd: 4\nc: 3\nsub:\n e: 5\n",
Expand Down
2 changes: 1 addition & 1 deletion lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,7 @@ t4: 2098-01-09T10:40:47Z
Type: token.DoubleQuoteType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.QuotedScalarIndicator,
Value: "\\0",
Value: "\x00",
Origin: " \"\\0\"",
},
},
Expand Down
9 changes: 9 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,15 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
return anchor, nil
}

if tk.Column() <= keyCol && tk.GroupType() == TokenGroupAnchorName {
// key: <value does not defined>
// &anchor
//
// key: <value does not defined>
// &anchor
return nil, errors.ErrSyntax("anchor is not allowed in this context", tk.RawToken())
}

if tk.Column() < keyCol {
// in this case,
// ----
Expand Down
10 changes: 10 additions & 0 deletions parser/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,16 @@ func createDocumentTokens(tokens []*Token) ([]*Token, error) {
},
}), nil
}
if tokens[i].Line() == tokens[i+1].Line() {
switch tokens[i+1].GroupType() {
case TokenGroupMapKey, TokenGroupMapKeyValue:
return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken())
}
switch tokens[i+1].Type() {
case token.SequenceEntryType:
return nil, errors.ErrSyntax("value cannot be placed after document separator", tokens[i+1].RawToken())
}
}
tks, err := createDocumentTokens(tokens[i+1:])
if err != nil {
return nil, err
Expand Down
48 changes: 44 additions & 4 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,19 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
isFirstLineChar = true
isNewLine = true
s.progressLine(ctx)
if idx+1 < size {
if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil {
return nil, err
}
}
continue
} else if isFirstLineChar && (c == ' ' || c == '\t') {
continue
} else if c != '\'' {
value = append(value, c)
isFirstLineChar = false
continue
}
if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
} else if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
// '' handle as ' character
value = append(value, c)
ctx.addOriginBuf(c)
Expand Down Expand Up @@ -339,6 +343,11 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
isFirstLineChar = true
isNewLine = true
s.progressLine(ctx)
if idx+1 < size {
if err := s.validateDocumentSeparatorMarker(ctx, src[idx+1:]); err != nil {
return nil, err
}
}
continue
} else if isFirstLineChar && (c == ' ' || c == '\t') {
continue
Expand Down Expand Up @@ -445,10 +454,20 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
progress = 1
ctx.addOriginBuf(nextChar)
value = append(value, nextChar)
case '0':
progress = 1
ctx.addOriginBuf(nextChar)
value = append(value, '\x00')
case ' ':
// skip escape character.
default:
value = append(value, c)
s.progressColumn(ctx, 1)
return nil, ErrInvalidToken(
token.Invalid(
fmt.Sprintf("found unknown escape character %q", nextChar),
string(ctx.obuf), s.pos(),
),
)
}
idx += progress
s.progressColumn(ctx, progress)
Expand Down Expand Up @@ -495,6 +514,26 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
)
}

func (s *Scanner) validateDocumentSeparatorMarker(ctx *Context, src []rune) error {
if len(src) < 3 {
return nil
}
var marker string
if len(src) == 3 {
marker = string(src)
} else {
marker = strings.TrimRightFunc(string(src[:4]), func(r rune) bool {
return r == ' ' || r == '\t' || r == '\n' || r == '\r'
})
}
if marker == "---" || marker == "..." {
return ErrInvalidToken(
token.Invalid("found unexpected document separator", string(ctx.obuf), s.pos()),
)
}
return nil
}

func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) {
if ctx.existsBuffer() {
return false, nil
Expand Down Expand Up @@ -719,7 +758,7 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) {

if ctx.isEOS() {
s.addBufferedTokenIfExists(ctx)
} else if s.isAnchor || s.isAlias {
} else if s.isAnchor || s.isAlias || s.isDirective {
s.addBufferedTokenIfExists(ctx)
}
if ctx.existsBuffer() && s.isFirstCharAtLine {
Expand Down Expand Up @@ -883,6 +922,7 @@ func (s *Scanner) scanDocumentEnd(ctx *Context) bool {
return false
}

s.addBufferedTokenIfExists(ctx)
ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
s.progressColumn(ctx, 3)
ctx.clear()
Expand Down
62 changes: 27 additions & 35 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ import (
)

var failureTestNames = []string{
"anchors-on-empty-scalars",
"aliases-in-flow-objects",
"aliases-in-explicit-block-mapping",
"anchors-on-empty-scalars", // no json.
"aliases-in-flow-objects", // no json.
"aliases-in-explicit-block-mapping", // no json.
"aliases-in-implicit-block-mapping",
"bare-document-after-document-end-marker",
"block-mapping-with-missing-keys",
"block-mapping-with-missing-keys", // no json.
"block-mapping-with-missing-values",
"block-mapping-with-multiline-scalars",
"block-scalar-with-more-spaces-than-first-content-line",
Expand All @@ -36,26 +36,21 @@ var failureTestNames = []string{
"escaped-slash-in-double-quotes",
"explicit-key-and-value-seperated-by-comment", //nolint: misspell
"extra-words-on-yaml-directive",
"empty-implicit-key-in-single-pair-flow-sequences",
"empty-keys-in-block-and-flow-mapping",
"empty-lines-at-end-of-document",
"flow-mapping-separate-values",
"empty-implicit-key-in-single-pair-flow-sequences", // no json.
"empty-keys-in-block-and-flow-mapping", // no json.
"empty-lines-at-end-of-document", // no json.
"flow-mapping-separate-values", // no json.
"flow-sequence-in-flow-mapping",
"flow-collections-over-many-lines/01",
"flow-mapping-colon-on-line-after-key/02",
"flow-mapping-edge-cases",
"flow-sequence-with-invalid-comma-at-the-beginning",
"folded-block-scalar",
"folded-block-scalar-1-3",
"implicit-flow-mapping-key-on-one-line",
"invalid-anchor-in-zero-indented-sequence",
"implicit-flow-mapping-key-on-one-line", // no json.
"invalid-comment-after-comma",
"invalid-comment-after-end-of-flow-sequence",
"invalid-document-end-marker-in-single-quoted-string",
"invalid-document-start-marker-in-doublequoted-tring",
"invalid-escape-in-double-quoted-string",
"invalid-tag",
"key-with-anchor-after-missing-explicit-mapping-value",
"leading-tabs-in-double-quoted/02",
"leading-tabs-in-double-quoted/05",
"legal-tab-after-indentation",
Expand All @@ -65,34 +60,31 @@ var failureTestNames = []string{
"literal-modifers/02",
"literal-modifers/03",
"literal-scalars",
"mapping-key-and-flow-sequence-item-anchors",
"mapping-starting-at-line",
"mapping-with-anchor-on-document-start-line",
"mapping-key-and-flow-sequence-item-anchors", // no json.
"multiline-double-quoted-implicit-keys",
"multiline-plain-flow-mapping-key",
"multiline-plain-value-with-tabs-on-empty-lines",
"multiline-scalar-at-top-level",
"multiline-scalar-at-top-level-1-3",
"multiline-single-quoted-implicit-keys",
"multiline-unidented-double-quoted-block-key",
"nested-implicit-complex-keys",
"nested-implicit-complex-keys", // no json.
"node-anchor-not-indented",
"plain-dashes-in-flow-sequence",
"plain-url-in-flow-mapping",
"question-mark-edge-cases/00",
"question-mark-edge-cases/01",
"question-mark-edge-cases/00", // no json.
"question-mark-edge-cases/01", // no json.
"scalar-doc-with-in-content/01",
"scalar-value-with-two-anchors",
"single-character-streams/01",
"single-pair-implicit-entries",
"spec-example-2-11-mapping-between-sequences",
"spec-example-6-12-separation-spaces",
"spec-example-7-16-flow-mapping-entries",
"spec-example-7-3-completely-empty-flow-nodes",
"spec-example-8-18-implicit-block-mapping-entries",
"spec-example-8-19-compact-block-mappings",
"single-character-streams/01", // no json.
"single-pair-implicit-entries", // no json.
"spec-example-2-11-mapping-between-sequences", // no json.
"spec-example-6-12-separation-spaces", // no json.
"spec-example-7-16-flow-mapping-entries", // no json.
"spec-example-7-3-completely-empty-flow-nodes", // no json.
"spec-example-8-18-implicit-block-mapping-entries", // no json.
"spec-example-8-19-compact-block-mappings", // no json.
"spec-example-6-19-secondary-tag-handle",
"spec-example-6-21-local-tag-prefix",
"spec-example-6-24-verbatim-tags",
"spec-example-6-28-non-specific-tags",
"spec-example-6-4-line-prefixes",
Expand All @@ -112,7 +104,7 @@ var failureTestNames = []string{
"spec-example-9-5-directives-documents",
"spec-example-9-6-stream",
"spec-example-9-6-stream-1-3",
"syntax-character-edge-cases/00",
"syntax-character-edge-cases/00", // no json.
"tab-at-beginning-of-line-followed-by-a-flow-mapping",
"tab-indented-top-flow",
"tabs-in-various-contexts/003",
Expand All @@ -126,14 +118,14 @@ var failureTestNames = []string{
"tabs-that-look-like-indentation/08",
"tag-shorthand-used-in-documents-but-only-defined-in-the-first",
"tags-for-block-objects",
"tags-on-empty-scalars",
"tags-on-empty-scalars", // no json.
"trailing-line-of-spaces/01", // last '\n' character is needed ?
"various-combinations-of-explicit-block-mappings", // no json
"various-trailing-comments", // no json
"various-trailing-comments-1-3", // no json
"various-combinations-of-explicit-block-mappings", // no json.
"various-trailing-comments", // no json.
"various-trailing-comments-1-3", // no json.
"wrong-indented-flow-sequence", // error ?
"wrong-indented-multiline-quoted-scalar", // error ?
"zero-indented-sequences-in-explicit-mapping-keys", // no json
"zero-indented-sequences-in-explicit-mapping-keys", // no json.
}

var failureTestNameMap map[string]struct{}
Expand Down

0 comments on commit c5254d7

Please sign in to comment.