Skip to content

Commit

Permalink
fix invalid quoted-text handling (#495)
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy authored Oct 31, 2024
1 parent ad223ff commit 975a624
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 21 deletions.
8 changes: 8 additions & 0 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2410,6 +2410,14 @@ a: |invalid
src: `
a: |invalid`,
},
{
name: "invalid single-quoted",
src: `a: 'foobarbaz`,
},
{
name: "invalid double-quoted",
src: `a: "\"key\": \"value:\"`,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
Expand Down
18 changes: 17 additions & 1 deletion parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ a: |invalidopt
`,
`
[2:4] found invalid token
> 2 | a:|invalidopt
> 2 | a: |invalidopt
^
3 | foo`,
},
Expand Down Expand Up @@ -797,6 +797,22 @@ b: - 2
^
3 | b: - 2`,
},
{
`a: 'foobarbaz`,
`
[1:4] found invalid token
> 1 | a: 'foobarbaz
^
`,
},
{
`a: "\"key\": \"value:\"`,
`
[1:4] found invalid token
> 1 | a: "\"key\": \"value:\"
^
`,
},
}
for _, test := range tests {
t.Run(test.source, func(t *testing.T) {
Expand Down
56 changes: 36 additions & 20 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ func (s *Scanner) breakLiteral(ctx *Context) {
ctx.breakLiteral()
}

func (s *Scanner) scanSingleQuote(ctx *Context) *token.Token {
func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
ctx.addOriginBuf('\'')
srcpos := s.pos()
startIndex := ctx.idx + 1
Expand All @@ -219,7 +219,6 @@ func (s *Scanner) scanSingleQuote(ctx *Context) *token.Token {
isFirstLineChar := false
isNewLine := false

var tk *token.Token
for idx := startIndex; idx < size; idx++ {
if !isNewLine {
s.progressColumn(ctx, 1)
Expand Down Expand Up @@ -250,10 +249,13 @@ func (s *Scanner) scanSingleQuote(ctx *Context) *token.Token {
continue
}
s.progressColumn(ctx, 1)
tk = token.SingleQuote(string(value), string(ctx.obuf), srcpos)
return tk
return token.SingleQuote(string(value), string(ctx.obuf), srcpos), nil
}
return tk
s.progressColumn(ctx, 1)
return nil, ErrInvalidToken(
"could not find end character of single-quotated text",
token.Invalid(string(ctx.obuf), srcpos),
)
}

func hexToInt(b rune) int {
Expand All @@ -274,7 +276,7 @@ func hexRunesToInt(b []rune) int {
return sum
}

func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token {
func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
ctx.addOriginBuf('"')
srcpos := s.pos()
startIndex := ctx.idx + 1
Expand All @@ -284,7 +286,6 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token {
isFirstLineChar := false
isNewLine := false

var tk *token.Token
for idx := startIndex; idx < size; idx++ {
if !isNewLine {
s.progressColumn(ctx, 1)
Expand Down Expand Up @@ -400,23 +401,34 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) *token.Token {
continue
}
s.progressColumn(ctx, 1)
tk = token.DoubleQuote(string(value), string(ctx.obuf), srcpos)
return tk
return token.DoubleQuote(string(value), string(ctx.obuf), srcpos), nil
}
return tk
s.progressColumn(ctx, 1)
return nil, ErrInvalidToken(
"could not find end character of double-quotated text",
token.Invalid(string(ctx.obuf), srcpos),
)
}

func (s *Scanner) scanQuote(ctx *Context, ch rune) bool {
func (s *Scanner) scanQuote(ctx *Context, ch rune) (bool, error) {
if ctx.existsBuffer() {
return false
return false, nil
}
if ch == '\'' {
ctx.addToken(s.scanSingleQuote(ctx))
tk, err := s.scanSingleQuote(ctx)
if err != nil {
return false, err
}
ctx.addToken(tk)
} else {
ctx.addToken(s.scanDoubleQuote(ctx))
tk, err := s.scanDoubleQuote(ctx)
if err != nil {
return false, err
}
ctx.addToken(tk)
}
ctx.clear()
return true
return true, nil
}

func (s *Scanner) scanWhiteSpace(ctx *Context) bool {
Expand Down Expand Up @@ -850,16 +862,16 @@ func (s *Scanner) scanLiteralHeaderOption(ctx *Context) error {
s.progressColumn(ctx, progress)
return nil
default:
tk := token.Invalid(string(header)+opt, s.pos())
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", opt), tk)
return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", opt), invalidTk)
}
}
}
text := string(ctx.src[ctx.idx:])
tk := token.Invalid(string(header)+text, s.pos())
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
s.progressColumn(ctx, len(text))
return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", text), tk)
return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", text), invalidTk)
}

func (s *Scanner) scanMapKey(ctx *Context) bool {
Expand Down Expand Up @@ -1016,7 +1028,11 @@ func (s *Scanner) scan(ctx *Context) error {
continue
}
case '\'', '"':
if s.scanQuote(ctx, c) {
scanned, err := s.scanQuote(ctx, c)
if err != nil {
return err
}
if scanned {
continue
}
case '\r', '\n':
Expand Down

0 comments on commit 975a624

Please sign in to comment.