Skip to content

Commit

Permalink
fix invalid token error (#536)
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy authored Nov 14, 2024
1 parent 271213a commit 8b2110b
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 44 deletions.
2 changes: 1 addition & 1 deletion parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,7 @@ func ParseBytes(bytes []byte, mode Mode, opts ...Option) (*ast.File, error) {
// Parse parse from token instances, and returns ast.File
func Parse(tokens token.Tokens, mode Mode, opts ...Option) (*ast.File, error) {
if tk := tokens.InvalidToken(); tk != nil {
return nil, errors.ErrSyntax("found invalid token", tk)
return nil, errors.ErrSyntax(tk.Error, tk)
}
f, err := newParser(tokens, mode, opts).parse(newContext())
if err != nil {
Expand Down
18 changes: 9 additions & 9 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ a: |invalidopt
foo
`,
`
[2:4] found invalid token
[2:4] invalid header option: "invalidopt"
> 2 | a: |invalidopt
^
3 | foo`,
Expand Down Expand Up @@ -1160,15 +1160,15 @@ b: - 2
{
`a: 'foobarbaz`,
`
[1:4] found invalid token
[1:4] could not find end character of single-quoted text
> 1 | a: 'foobarbaz
^
`,
},
{
`a: "\"key\": \"value:\"`,
`
[1:4] found invalid token
[1:4] could not find end character of double-quoted text
> 1 | a: "\"key\": \"value:\"
^
`,
Expand All @@ -1192,7 +1192,7 @@ b: - 2
{
">\n>",
`
[2:1] found invalid token
[2:1] could not find document
1 | >
> 2 | >
^
Expand All @@ -1201,7 +1201,7 @@ b: - 2
{
">\n1",
`
[2:1] found invalid token
[2:1] could not find document
1 | >
> 2 | 1
^
Expand All @@ -1210,7 +1210,7 @@ b: - 2
{
"|\n1",
`
[2:1] found invalid token
[2:1] could not find document
1 | |
> 2 | 1
^
Expand All @@ -1219,7 +1219,7 @@ b: - 2
{
"a: >3\n 1",
`
[2:3] found invalid token
[2:3] invalid number of indent is specified in the document header
1 | a: >3
> 2 | 1
^
Expand Down Expand Up @@ -1261,14 +1261,14 @@ a:
{
"key: [@val]",
`
[1:7] found invalid token
[1:7] '@' is a reserved character
> 1 | key: [@val]
^
`,
},
{
"key: [`val]",
"\n[1:7] found invalid token\n> 1 | key: [`val]\n ^\n",
"\n[1:7] '`' is a reserved character\n> 1 | key: [`val]\n ^\n",
},
{
`{a: b}: v`,
Expand Down
10 changes: 4 additions & 6 deletions scanner/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ package scanner
import "github.com/goccy/go-yaml/token"

type InvalidTokenError struct {
Message string
Token *token.Token
Token *token.Token
}

func (e *InvalidTokenError) Error() string {
return e.Message
return e.Token.Error
}

func ErrInvalidToken(msg string, tk *token.Token) *InvalidTokenError {
func ErrInvalidToken(tk *token.Token) *InvalidTokenError {
return &InvalidTokenError{
Message: msg,
Token: tk,
Token: tk,
}
}
62 changes: 42 additions & 20 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,10 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
}
s.progressColumn(ctx, 1)
return nil, ErrInvalidToken(
"could not find end character of single-quotated text",
token.Invalid(string(ctx.obuf), srcpos),
token.Invalid(
"could not find end character of single-quoted text",
string(ctx.obuf), srcpos,
),
)
}

Expand Down Expand Up @@ -427,8 +429,10 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
}
s.progressColumn(ctx, 1)
return nil, ErrInvalidToken(
"could not find end character of double-quotated text",
token.Invalid(string(ctx.obuf), srcpos),
token.Invalid(
"could not find end character of double-quoted text",
string(ctx.obuf), srcpos,
),
)
}

Expand Down Expand Up @@ -570,8 +574,10 @@ func (s *Scanner) trimCommentFromDocumentOpt(text string, header rune) (string,
}
if idx == 0 {
return "", ErrInvalidToken(
fmt.Sprintf("invalid document header %s", text),
token.Invalid(string(header)+text, s.pos()),
token.Invalid(
fmt.Sprintf("invalid document header %s", text),
string(header)+text, s.pos(),
),
)
}
return text[:idx-1], nil
Expand All @@ -582,9 +588,9 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
if ctx.isEOS() {
ctx.updateDocumentLineIndentColumn(s.column)
if err := ctx.validateDocumentLineIndentColumn(); err != nil {
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
s.progressColumn(ctx, 1)
return ErrInvalidToken(err.Error(), invalidTk)
return ErrInvalidToken(invalidTk)
}
ctx.addBuf(c)
value := ctx.bufferedSrc()
Expand All @@ -600,8 +606,10 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
s.progressColumn(ctx, 1)
} else if s.isFirstCharAtLine && c == '\t' {
err := ErrInvalidToken(
"found a tab character where an indentation space is expected",
token.Invalid(string(ctx.obuf), s.pos()),
token.Invalid(
"found a tab character where an indentation space is expected",
string(ctx.obuf), s.pos(),
),
)
s.progressColumn(ctx, 1)
return err
Expand All @@ -611,9 +619,9 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
s.lastDelimColumn = ctx.docFirstLineIndentColumn - 1
}
if err := ctx.validateDocumentLineIndentColumn(); err != nil {
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
s.progressColumn(ctx, 1)
return ErrInvalidToken(err.Error(), invalidTk)
return ErrInvalidToken(invalidTk)
}
ctx.updateDocumentNewLineInFolded(s.column)
ctx.addBuf(c)
Expand Down Expand Up @@ -899,9 +907,9 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
return err
}
if err := s.validateDocumentHeaderOption(opt); err != nil {
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(err.Error(), invalidTk)
return ErrInvalidToken(invalidTk)
}
hasComment := len(opt) < orgOptLen
if s.column == 1 {
Expand Down Expand Up @@ -946,9 +954,12 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
}
}
text := string(ctx.src[ctx.idx:])
invalidTk := token.Invalid(string(ctx.obuf), s.pos())
invalidTk := token.Invalid(
fmt.Sprintf("invalid document header: %q", text),
string(ctx.obuf), s.pos(),
)
s.progressColumn(ctx, len(text))
return ErrInvalidToken(fmt.Sprintf("invalid document header: %q", text), invalidTk)
return ErrInvalidToken(invalidTk)
}

func (s *Scanner) scanMapKey(ctx *Context) bool {
Expand Down Expand Up @@ -1015,7 +1026,12 @@ func (s *Scanner) scanReservedChar(ctx *Context, c rune) error {

ctx.addBuf(c)
ctx.addOriginBuf(c)
err := ErrInvalidToken("%q is a reserved character", token.Invalid(string(ctx.obuf), s.pos()))
err := ErrInvalidToken(
token.Invalid(
fmt.Sprintf("%q is a reserved character", c),
string(ctx.obuf), s.pos(),
),
)
s.progressColumn(ctx, 1)
ctx.clear()
return err
Expand All @@ -1028,7 +1044,11 @@ func (s *Scanner) scanTab(ctx *Context, c rune) error {

ctx.addBuf(c)
ctx.addOriginBuf(c)
err := ErrInvalidToken("found character '\t' that cannot start any token", token.Invalid(string(ctx.obuf), s.pos()))
err := ErrInvalidToken(
token.Invalid("found character '\t' that cannot start any token",
string(ctx.obuf), s.pos(),
),
)
s.progressColumn(ctx, 1)
ctx.clear()
return err
Expand All @@ -1054,8 +1074,10 @@ func (s *Scanner) scan(ctx *Context) error {
// But if literal/folded token column is 1, it is invalid at down state.
if tk.Position.Column == 1 {
return ErrInvalidToken(
"could not find document",
token.Invalid(string(ctx.obuf), s.pos()),
token.Invalid(
"could not find document",
string(ctx.obuf), s.pos(),
),
)
}
if tk.Type != token.StringType {
Expand Down
27 changes: 19 additions & 8 deletions token/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -727,14 +727,24 @@ func (p *Position) String() string {

// Token type for token
type Token struct {
Type Type
// Type is a token type.
Type Type
// CharacterType is a character type.
CharacterType CharacterType
Indicator Indicator
Value string
Origin string
Position *Position
Next *Token
Prev *Token
// Indicator is a indicator type.
Indicator Indicator
// Value is a string extracted with only meaningful characters, with spaces and such removed.
Value string
// Origin is a string that stores the original text as-is.
Origin string
// Error keeps error message for InvalidToken.
Error string
// Position is a token position.
Position *Position
// Next is a next token reference.
Next *Token
// Prev is a previous token reference.
Prev *Token
}

// PreviousType previous token type
Expand Down Expand Up @@ -1090,13 +1100,14 @@ func DocumentEnd(org string, pos *Position) *Token {
}
}

func Invalid(org string, pos *Position) *Token {
func Invalid(err string, org string, pos *Position) *Token {
return &Token{
Type: InvalidType,
CharacterType: CharacterTypeInvalid,
Indicator: NotIndicator,
Value: org,
Origin: org,
Error: err,
Position: pos,
}
}
Expand Down

0 comments on commit 8b2110b

Please sign in to comment.