fix invalid token error

goccy · Nov 14, 2024 · 1b3ca30 · 1b3ca30
1 parent 271213a
commit 1b3ca30
Show file tree

Hide file tree

Showing 5 changed files with 75 additions and 44 deletions.
diff --git a/parser/parser.go b/parser/parser.go
@@ -1007,7 +1007,7 @@ func ParseBytes(bytes []byte, mode Mode, opts ...Option) (*ast.File, error) {
 // Parse parse from token instances, and returns ast.File
 func Parse(tokens token.Tokens, mode Mode, opts ...Option) (*ast.File, error) {
 	if tk := tokens.InvalidToken(); tk != nil {
-		return nil, errors.ErrSyntax("found invalid token", tk)
+		return nil, errors.ErrSyntax(tk.Error, tk)
 	}
 	f, err := newParser(tokens, mode, opts).parse(newContext())
 	if err != nil {

diff --git a/parser/parser_test.go b/parser/parser_test.go
@@ -1041,7 +1041,7 @@ a: |invalidopt
   foo
 `,
 			`
-[2:4] found invalid token
+[2:4] invalid header option: "invalidopt"
 >  2 | a: |invalidopt
           ^
    3 |   foo`,
@@ -1160,15 +1160,15 @@ b: - 2
 		{
 			`a: 'foobarbaz`,
 			`
-[1:4] found invalid token
+[1:4] could not find end character of single-quoted text
 >  1 | a: 'foobarbaz
           ^
 `,
 		},
 		{
 			`a: "\"key\": \"value:\"`,
 			`
-[1:4] found invalid token
+[1:4] could not find end character of double-quoted text
 >  1 | a: "\"key\": \"value:\"
           ^
 `,
@@ -1192,7 +1192,7 @@ b: - 2
 		{
 			">\n>",
 			`
-[2:1] found invalid token
+[2:1] could not find document
    1 | >
 >  2 | >
        ^
@@ -1201,7 +1201,7 @@ b: - 2
 		{
 			">\n1",
 			`
-[2:1] found invalid token
+[2:1] could not find document
    1 | >
 >  2 | 1
        ^
@@ -1210,7 +1210,7 @@ b: - 2
 		{
 			"|\n1",
 			`
-[2:1] found invalid token
+[2:1] could not find document
    1 | |
 >  2 | 1
        ^
@@ -1219,7 +1219,7 @@ b: - 2
 		{
 			"a: >3\n  1",
 			`
-[2:3] found invalid token
+[2:3] invalid number of indent is specified in the document header
    1 | a: >3
 >  2 |   1
          ^
@@ -1261,14 +1261,14 @@ a:
 		{
 			"key: [@val]",
 			`
-[1:7] found invalid token
+[1:7] '@' is a reserved character
 >  1 | key: [@val]
              ^
 `,
 		},
 		{
 			"key: [`val]",
-			"\n[1:7] found invalid token\n>  1 | key: [`val]\n             ^\n",
+			"\n[1:7] '`' is a reserved character\n>  1 | key: [`val]\n             ^\n",
 		},
 		{
 			`{a: b}: v`,

diff --git a/scanner/error.go b/scanner/error.go
@@ -3,17 +3,15 @@ package scanner
 import "github.com/goccy/go-yaml/token"
 
 type InvalidTokenError struct {
-	Message string
-	Token   *token.Token
+	Token *token.Token
 }
 
 func (e *InvalidTokenError) Error() string {
-	return e.Message
+	return e.Token.Error
 }
 
-func ErrInvalidToken(msg string, tk *token.Token) *InvalidTokenError {
+func ErrInvalidToken(tk *token.Token) *InvalidTokenError {
 	return &InvalidTokenError{
-		Message: msg,
-		Token:   tk,
+		Token: tk,
 	}
 }
diff --git a/scanner/scanner.go b/scanner/scanner.go
@@ -257,8 +257,10 @@ func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
 	}
 	s.progressColumn(ctx, 1)
 	return nil, ErrInvalidToken(
-		"could not find end character of single-quotated text",
-		token.Invalid(string(ctx.obuf), srcpos),
+		token.Invalid(
+			"could not find end character of single-quoted text",
+			string(ctx.obuf), srcpos,
+		),
 	)
 }
 
@@ -427,8 +429,10 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (*token.Token, error) {
 	}
 	s.progressColumn(ctx, 1)
 	return nil, ErrInvalidToken(
-		"could not find end character of double-quotated text",
-		token.Invalid(string(ctx.obuf), srcpos),
+		token.Invalid(
+			"could not find end character of double-quoted text",
+			string(ctx.obuf), srcpos,
+		),
 	)
 }
 
@@ -570,8 +574,10 @@ func (s *Scanner) trimCommentFromDocumentOpt(text string, header rune) (string,
 	}
 	if idx == 0 {
 		return "", ErrInvalidToken(
-			fmt.Sprintf("invalid document header %s", text),
-			token.Invalid(string(header)+text, s.pos()),
+			token.Invalid(
+				fmt.Sprintf("invalid document header %s", text),
+				string(header)+text, s.pos(),
+			),
 		)
 	}
 	return text[:idx-1], nil
@@ -582,9 +588,9 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
 	if ctx.isEOS() {
 		ctx.updateDocumentLineIndentColumn(s.column)
 		if err := ctx.validateDocumentLineIndentColumn(); err != nil {
-			invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+			invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
 			s.progressColumn(ctx, 1)
-			return ErrInvalidToken(err.Error(), invalidTk)
+			return ErrInvalidToken(invalidTk)
 		}
 		ctx.addBuf(c)
 		value := ctx.bufferedSrc()
@@ -600,8 +606,10 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
 		s.progressColumn(ctx, 1)
 	} else if s.isFirstCharAtLine && c == '\t' {
 		err := ErrInvalidToken(
-			"found a tab character where an indentation space is expected",
-			token.Invalid(string(ctx.obuf), s.pos()),
+			token.Invalid(
+				"found a tab character where an indentation space is expected",
+				string(ctx.obuf), s.pos(),
+			),
 		)
 		s.progressColumn(ctx, 1)
 		return err
@@ -611,9 +619,9 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
 			s.lastDelimColumn = ctx.docFirstLineIndentColumn - 1
 		}
 		if err := ctx.validateDocumentLineIndentColumn(); err != nil {
-			invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+			invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
 			s.progressColumn(ctx, 1)
-			return ErrInvalidToken(err.Error(), invalidTk)
+			return ErrInvalidToken(invalidTk)
 		}
 		ctx.updateDocumentNewLineInFolded(s.column)
 		ctx.addBuf(c)
@@ -899,9 +907,9 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
 				return err
 			}
 			if err := s.validateDocumentHeaderOption(opt); err != nil {
-				invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+				invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
 				s.progressColumn(ctx, progress)
-				return ErrInvalidToken(err.Error(), invalidTk)
+				return ErrInvalidToken(invalidTk)
 			}
 			hasComment := len(opt) < orgOptLen
 			if s.column == 1 {
@@ -946,9 +954,12 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
 		}
 	}
 	text := string(ctx.src[ctx.idx:])
-	invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+	invalidTk := token.Invalid(
+		fmt.Sprintf("invalid document header: %q", text),
+		string(ctx.obuf), s.pos(),
+	)
 	s.progressColumn(ctx, len(text))
-	return ErrInvalidToken(fmt.Sprintf("invalid document header: %q", text), invalidTk)
+	return ErrInvalidToken(invalidTk)
 }
 
 func (s *Scanner) scanMapKey(ctx *Context) bool {
@@ -1015,7 +1026,12 @@ func (s *Scanner) scanReservedChar(ctx *Context, c rune) error {
 
 	ctx.addBuf(c)
 	ctx.addOriginBuf(c)
-	err := ErrInvalidToken("%q is a reserved character", token.Invalid(string(ctx.obuf), s.pos()))
+	err := ErrInvalidToken(
+		token.Invalid(
+			fmt.Sprintf("%q is a reserved character", c),
+			string(ctx.obuf), s.pos(),
+		),
+	)
 	s.progressColumn(ctx, 1)
 	ctx.clear()
 	return err
@@ -1028,7 +1044,11 @@ func (s *Scanner) scanTab(ctx *Context, c rune) error {
 
 	ctx.addBuf(c)
 	ctx.addOriginBuf(c)
-	err := ErrInvalidToken("found character '\t' that cannot start any token", token.Invalid(string(ctx.obuf), s.pos()))
+	err := ErrInvalidToken(
+		token.Invalid("found character '\t' that cannot start any token",
+			string(ctx.obuf), s.pos(),
+		),
+	)
 	s.progressColumn(ctx, 1)
 	ctx.clear()
 	return err
@@ -1054,8 +1074,10 @@ func (s *Scanner) scan(ctx *Context) error {
 					// But if literal/folded token column is 1, it is invalid at down state.
 					if tk.Position.Column == 1 {
 						return ErrInvalidToken(
-							"could not find document",
-							token.Invalid(string(ctx.obuf), s.pos()),
+							token.Invalid(
+								"could not find document",
+								string(ctx.obuf), s.pos(),
+							),
 						)
 					}
 					if tk.Type != token.StringType {

diff --git a/token/token.go b/token/token.go
@@ -727,14 +727,24 @@ func (p *Position) String() string {
 
 // Token type for token
 type Token struct {
-	Type          Type
+	// Type is a token type.
+	Type Type
+	// CharacterType is a character type.
 	CharacterType CharacterType
-	Indicator     Indicator
-	Value         string
-	Origin        string
-	Position      *Position
-	Next          *Token
-	Prev          *Token
+	// Indicator is a indicator type.
+	Indicator Indicator
+	// Value is a string extracted with only meaningful characters, with spaces and such removed.
+	Value string
+	// Origin is a string that stores the original text as-is.
+	Origin string
+	// Error keeps error message for InvalidToken.
+	Error string
+	// Position is a token position.
+	Position *Position
+	// Next is a next token reference.
+	Next *Token
+	// Prev is a previous token reference.
+	Prev *Token
 }
 
 // PreviousType previous token type
@@ -1090,13 +1100,14 @@ func DocumentEnd(org string, pos *Position) *Token {
 	}
 }
 
-func Invalid(org string, pos *Position) *Token {
+func Invalid(err string, org string, pos *Position) *Token {
 	return &Token{
 		Type:          InvalidType,
 		CharacterType: CharacterTypeInvalid,
 		Indicator:     NotIndicator,
 		Value:         org,
 		Origin:        org,
+		Error:         err,
 		Position:      pos,
 	}
 }