diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 30c30692..4eb88e8d 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -36,7 +36,7 @@ jobs: - name: test run: | make test - + i386-test: name: Test in i386 strategy: @@ -56,7 +56,21 @@ jobs: - name: test run: | make simple-test - + + fuzz: + name: Fuzzing Test + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v4 + - name: setup Go + uses: actions/setup-go@v4 + with: + go-version: "1.23" + - name: run + run: | + make fuzz + ycat: name: ycat runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index c8af8e5a..91660623 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,10 @@ simple-test: testmod go test -v ./... go test -v ./testdata -modfile=$(TESTMOD) +.PHONY: fuzz +fuzz: + go test -fuzz=Fuzz -fuzztime 60s + .PHONY: cover cover: testmod go test -coverpkg=.,./ast,./lexer,./parser,./printer,./scanner,./token -coverprofile=cover.out -modfile=$(TESTMOD) ./... ./testdata diff --git a/decode.go b/decode.go index b367ccf8..d07f1f41 100644 --- a/decode.go +++ b/decode.go @@ -1674,6 +1674,12 @@ func (d *Decoder) decodeMap(ctx context.Context, dst reflect.Value, src ast.Node mapValue.SetMapIndex(d.createDecodableValue(keyType), d.castToAssignableValue(dstValue, valueType)) continue } + if keyType.Kind() != k.Kind() { + return errors.ErrSyntax( + fmt.Sprintf("cannot convert %q type to %q type", k.Kind(), keyType.Kind()), + key.GetToken(), + ) + } mapValue.SetMapIndex(k, d.castToAssignableValue(dstValue, valueType)) } dst.Set(mapValue) diff --git a/fuzz_test.go b/fuzz_test.go new file mode 100644 index 00000000..3485197e --- /dev/null +++ b/fuzz_test.go @@ -0,0 +1,52 @@ +package yaml_test + +import ( + "strings" + "testing" + + "github.com/goccy/go-yaml" +) + +func FuzzUnmarshalToMap(f *testing.F) { + const validYAML = ` +id: 1 +message: Hello, World +verified: true +` + + invalidYAML := []string{ + "0::", + "{0", + "*-0", + ">\n>", + "&{0", + "0_", + "0\n:", + "0\n-", + "0\n0", + "0\n0\n", + "0\n0\n0", + "0\n0\n0\n", + "0\n0\n0\n0", + "0\n0\n0\n0\n", + "0\n0\n0\n0\n0", + "0\n0\n0\n0\n0\n", + "0\n0\n0\n0\n0\n0", + "0\n0\n0\n0\n0\n0\n", + "", + } + + f.Add([]byte(validYAML)) + for _, s := range invalidYAML { + f.Add([]byte(s)) + f.Add([]byte(validYAML + s)) + f.Add([]byte(s + validYAML)) + f.Add([]byte(s + validYAML + s)) + f.Add([]byte(strings.Repeat(s, 3))) + } + + f.Fuzz(func(t *testing.T, src []byte) { + v := map[string]any{} + _ = yaml.Unmarshal(src, &v) + }) +} diff --git a/parser/context.go b/parser/context.go index 683a033d..36ceb58a 100644 --- a/parser/context.go +++ b/parser/context.go @@ -69,6 +69,10 @@ func (c *context) nextNotCommentToken() *Token { return nil } +func (c *context) isTokenNotFound() bool { + return c.currentToken() == nil +} + func (c *context) withGroup(g *TokenGroup) *context { ctx := *c ctx.tokenRef = &tokenRef{ diff --git a/parser/parser.go b/parser/parser.go index 803d5842..eab0fd87 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -175,6 +175,9 @@ func (p *parser) parseToken(ctx *context, tk *Token) (ast.Node, error) { return nil, err } ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find anchor value", tk.RawToken()) + } value, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err @@ -244,6 +247,9 @@ func (p *parser) parseScalarValue(ctx *context, tk *Token) (ast.ScalarNode, erro return nil, err } ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find anchor value", tk.RawToken()) + } value, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err @@ -338,6 +344,9 @@ func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) { ctx.goNext() } else { ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find map value", colonTk.RawToken()) + } value, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err @@ -472,6 +481,9 @@ func (p *parser) parseMap(ctx *context) (*ast.MappingNode, error) { } func (p *parser) validateMapKeyValueNextToken(ctx *context, keyTk, tk *Token) error { + if tk == nil { + return nil + } if tk.Column() <= keyTk.Column() { return nil } @@ -519,12 +531,18 @@ func (p *parser) parseMapKey(ctx *context, g *TokenGroup) (ast.MapKeyNode, error } if g.First().Type() == token.MappingKeyType { mapKeyTk := g.First() + if mapKeyTk.Group == nil { + return nil, errors.ErrSyntax("could not find value for mapping key", mapKeyTk.RawToken()) + } ctx := ctx.withGroup(mapKeyTk.Group) key, err := newMappingKeyNode(ctx, mapKeyTk) if err != nil { return nil, err } ctx.goNext() // skip mapping key token + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find value for mapping key", mapKeyTk.RawToken()) + } scalar, err := p.parseScalarValue(ctx, ctx.currentToken()) if err != nil { @@ -689,6 +707,10 @@ func (p *parser) parseAnchor(ctx *context, g *TokenGroup) (*ast.AnchorNode, erro return nil, err } ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find anchor value", anchor.GetToken()) + } + value, err := p.parseToken(ctx, ctx.currentToken()) if err != nil { return nil, err @@ -703,6 +725,10 @@ func (p *parser) parseAnchorName(ctx *context) (*ast.AnchorNode, error) { return nil, err } ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find anchor value", anchor.GetToken()) + } + anchorName, err := p.parseScalarValue(ctx, ctx.currentToken()) if err != nil { return nil, err @@ -720,6 +746,9 @@ func (p *parser) parseAlias(ctx *context) (*ast.AliasNode, error) { return nil, err } ctx.goNext() + if ctx.isTokenNotFound() { + return nil, errors.ErrSyntax("could not find alias value", alias.GetToken()) + } aliasName, err := p.parseScalarValue(ctx, ctx.currentToken()) if err != nil { @@ -834,13 +863,17 @@ func (p *parser) parseFlowSequence(ctx *context) (*ast.SequenceNode, error) { return nil, errors.ErrSyntax("',' or ']' must be specified", tk.RawToken()) } - if tk := ctx.currentToken(); tk != nil && tk.Type() == token.SequenceEndType { + if tk := ctx.currentToken(); tk.Type() == token.SequenceEndType { // this case is here: "[ elem, ]". // In this case, ignore the last element and break sequence parsing. node.End = tk.RawToken() break } + if ctx.isTokenNotFound() { + break + } + value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))).withFlow(true), ctx.currentToken()) if err != nil { return nil, err diff --git a/scanner/context.go b/scanner/context.go index 92207786..6d5e7ebf 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -175,7 +175,7 @@ func (c *Context) updateDocumentNewLineInFolded(column int) { return } if c.docLineIndentColumn == c.docPrevLineIndentColumn { - if c.buf[len(c.buf)-1] == '\n' { + if len(c.buf) != 0 && c.buf[len(c.buf)-1] == '\n' { c.buf[len(c.buf)-1] = ' ' } } diff --git a/scanner/scanner.go b/scanner/scanner.go index 9dda5c9f..26777be6 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -1002,6 +1002,11 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error { if hasComment { commentLen := orgOptLen - len(opt) headerPos := strings.Index(string(ctx.obuf), "|") + if len(ctx.obuf) < commentLen+headerPos { + invalidTk := token.Invalid("found invalid literal header option", string(ctx.obuf), s.pos()) + s.progressColumn(ctx, progress) + return ErrInvalidToken(invalidTk) + } litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] commentBuf := ctx.obuf[len(litBuf):] ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos())) @@ -1017,6 +1022,11 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error { if hasComment { commentLen := orgOptLen - len(opt) headerPos := strings.Index(string(ctx.obuf), ">") + if len(ctx.obuf) < commentLen+headerPos { + invalidTk := token.Invalid("found invalid folded header option", string(ctx.obuf), s.pos()) + s.progressColumn(ctx, progress) + return ErrInvalidToken(invalidTk) + } foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] commentBuf := ctx.obuf[len(foldedBuf):] ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))