Skip to content

Commit

Permalink
fix parsing of document header option (#551)
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy authored Nov 28, 2024
1 parent 3584ab7 commit 44b8854
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 67 deletions.
40 changes: 38 additions & 2 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ s: >
},
{
YAML: `
s: >1
s: >1 # comment
1s
`,
Tokens: token.Tokens{
Expand All @@ -2224,7 +2224,14 @@ s: >1
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: ">1",
Origin: " >1\n",
Origin: " >1 ",
},
{
Type: token.CommentType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.CommentIndicator,
Value: " comment",
Origin: "# comment\n",
},
{
Type: token.StringType,
Expand Down Expand Up @@ -2510,6 +2517,35 @@ s: >-3
},
},
},
{
YAML: `
| # comment
foo
`,
Tokens: token.Tokens{
{
Type: token.LiteralType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: "|",
Origin: "\n| ", //nolint:gci,gofmt
},
{
Type: token.CommentType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.CommentIndicator,
Value: " comment",
Origin: "# comment\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "foo\n",
Origin: " foo\n",
},
},
},
{
YAML: `1x0`,
Tokens: token.Tokens{
Expand Down
96 changes: 31 additions & 65 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -625,22 +625,6 @@ func (s *Scanner) scanComment(ctx *Context) bool {
return true
}

func (s *Scanner) trimCommentFromDocumentOpt(text string, header rune) (string, error) {
idx := strings.Index(text, "#")
if idx < 0 {
return text, nil
}
if idx == 0 {
return "", ErrInvalidToken(
token.Invalid(
fmt.Sprintf("invalid document header %s", text),
string(header)+text, s.pos(),
),
)
}
return text[:idx-1], nil
}

func (s *Scanner) scanDocument(ctx *Context, c rune) error {
ctx.addOriginBuf(c)
if ctx.isEOS() {
Expand Down Expand Up @@ -982,63 +966,45 @@ func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
ctx.addOriginBuf(c)
switch c {
case '\n', '\r':
value := ctx.source(ctx.idx, ctx.idx+idx)
opt := strings.TrimRight(value, " ")
orgOptLen := len(opt)
opt, err := s.trimCommentFromDocumentOpt(opt, header)
if err != nil {
return err
value := strings.TrimRight(ctx.source(ctx.idx, ctx.idx+idx), " ")
commentValueIndex := strings.Index(value, "#")
opt := value
if commentValueIndex > 0 {
opt = value[:commentValueIndex]
}
if err := s.validateDocumentHeaderOption(opt); err != nil {
invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(invalidTk)
opt = strings.TrimRightFunc(opt, func(r rune) bool {
return r == ' ' || r == '\t'
})
if len(opt) != 0 {
if err := s.validateDocumentHeaderOption(opt); err != nil {
invalidTk := token.Invalid(err.Error(), string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(invalidTk)
}
}
hasComment := len(opt) < orgOptLen
if s.column == 1 {
s.lastDelimColumn = 1
}
if header == '|' {
if hasComment {
commentLen := orgOptLen - len(opt)
headerPos := strings.Index(string(ctx.obuf), "|")
if len(ctx.obuf) < commentLen+headerPos {
invalidTk := token.Invalid("found invalid literal header option", string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(invalidTk)
}
litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
commentBuf := ctx.obuf[len(litBuf):]
ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos()))
s.column += len(litBuf)
s.offset += len(litBuf)
commentHeader := strings.Index(value, "#")
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
} else {
ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
}

commentIndex := strings.Index(string(ctx.obuf), "#")
headerBuf := string(ctx.obuf)
if commentIndex > 0 {
headerBuf = headerBuf[:commentIndex]
}
switch header {
case '|':
ctx.addToken(token.Literal("|"+opt, headerBuf, s.pos()))
ctx.isLiteral = true
} else if header == '>' {
if hasComment {
commentLen := orgOptLen - len(opt)
headerPos := strings.Index(string(ctx.obuf), ">")
if len(ctx.obuf) < commentLen+headerPos {
invalidTk := token.Invalid("found invalid folded header option", string(ctx.obuf), s.pos())
s.progressColumn(ctx, progress)
return ErrInvalidToken(invalidTk)
}
foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
commentBuf := ctx.obuf[len(foldedBuf):]
ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))
s.column += len(foldedBuf)
s.offset += len(foldedBuf)
commentHeader := strings.Index(value, "#")
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
} else {
ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
}
case '>':
ctx.addToken(token.Folded(">"+opt, headerBuf, s.pos()))
ctx.isFolded = true
}
if commentIndex > 0 {
comment := string(value[commentValueIndex+1:])
s.offset += len(headerBuf)
s.column += len(headerBuf)
ctx.addToken(token.Comment(comment, string(ctx.obuf[len(headerBuf):]), s.pos()))
}
s.indentState = IndentStateKeep
ctx.resetBuffer()
ctx.docOpt = opt
Expand Down

0 comments on commit 44b8854

Please sign in to comment.