Skip to content

Commit

Permalink
Fix parser (#546)
Browse files Browse the repository at this point in the history
* fix parser

* fix anchor-for-empty-node

* add test case
  • Loading branch information
goccy authored Nov 26, 2024
1 parent deb129a commit 3399084
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 30 deletions.
43 changes: 41 additions & 2 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ func (p *parser) parseFlowMap(ctx *context) (*ast.MappingNode, error) {
if err != nil {
return nil, err
}
ctx := ctx.withChild(p.mapKeyText(key))
colonTk := mapKeyTk.Group.Last()
if p.isFlowMapDelim(ctx.nextToken()) {
value, err := newNullNode(ctx, ctx.insertNullToken(colonTk))
Expand Down Expand Up @@ -619,23 +620,61 @@ func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonTk *Token)
if ctx.isComment() {
tk = ctx.nextNotCommentToken()
}
keyCol := key.GetToken().Position.Column
keyLine := key.GetToken().Position.Line

if tk.Column() == key.GetToken().Position.Column && p.isMapToken(tk) {
if tk.Column() == keyCol && p.isMapToken(tk) {
// in this case,
// ----
// key: <value does not defined>
// next
return newNullNode(ctx, ctx.insertNullToken(colonTk))
}

if tk.Column() < key.GetToken().Position.Column {
if tk.Line() == keyLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() == keyCol && p.isMapToken(ctx.nextToken()) {
// in this case,
// ----
// key: &anchor
// next
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

if tk.Column() < keyCol {
// in this case,
// ----
// key: <value does not defined>
// next
return newNullNode(ctx, ctx.insertNullToken(colonTk))
}

if tk.Line() == keyLine && tk.GroupType() == TokenGroupAnchorName &&
ctx.nextToken().Column() < keyCol {
// in this case,
// ----
// key: &anchor
// next
group := &TokenGroup{
Type: TokenGroupAnchor,
Tokens: []*Token{tk, ctx.createNullToken(tk)},
}
anchor, err := p.parseAnchor(ctx.withGroup(group), group)
if err != nil {
return nil, err
}
ctx.goNext()
return anchor, nil
}

value, err := p.parseToken(ctx, ctx.currentToken())
if err != nil {
return nil, err
Expand Down
46 changes: 46 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,20 @@ a:
- |2
b
c: d
`,
},
{
`
a:
b: &anchor
c: &anchor2
d: e
`,
`
a:
b: &anchor null
c: &anchor2 null
d: e
`,
},
}
Expand Down Expand Up @@ -939,6 +953,38 @@ foo:
bar: null # comment
baz: 1
`,
},
{
`
{
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {
"name": "foo",
"labels": {
"app": "bar"
}
},
"spec": {
"replicas": 3,
"selector": {
"matchLabels": {
"app": "bar"
}
},
"template": {
"metadata": {
"labels": {
"app": "bar"
}
}
}
}
}
`,
`
{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": {"name": "foo", "labels": {"app": "bar"}}, "spec": {"replicas": 3, "selector": {"matchLabels": {"app": "bar"}}, "template": {"metadata": {"labels": {"app": "bar"}}}}}
`,
},
}
Expand Down
3 changes: 3 additions & 0 deletions parser/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ func createAnchorAndAliasTokenGroups(tokens []*Token) ([]*Token, error) {
},
}
valueTk := tokens[i+2]
if tk.Line() == valueTk.Line() && valueTk.Type() == token.SequenceEntryType {
return nil, errors.ErrSyntax("sequence entries are not allowed after anchor on the same line", valueTk.RawToken())
}
if tk.Line() == valueTk.Line() && isScalarType(valueTk) {
ret = append(ret, &Token{
Group: &TokenGroup{
Expand Down
17 changes: 17 additions & 0 deletions scanner/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ func (c *Context) updateDocumentNewLineState() {
c.docLineIndentColumn = 0
}

func (c *Context) isIndentColumn(column int) bool {
if c.docFirstLineIndentColumn == 0 {
return column == 1
}
return c.docFirstLineIndentColumn > column
}

func (c *Context) addDocumentIndent(column int) {
if c.docFirstLineIndentColumn == 0 {
return
Expand Down Expand Up @@ -192,6 +199,16 @@ func (c *Context) addBuf(r rune) {
}
}

func (c *Context) addBufWithTab(r rune) {
if len(c.buf) == 0 && r == ' ' {
return
}
c.buf = append(c.buf, r)
if r != ' ' {
c.notSpaceCharPos = len(c.buf)
}
}

func (c *Context) addOriginBuf(r rune) {
c.obuf = append(c.obuf, r)
if r != ' ' && r != '\t' {
Expand Down
63 changes: 46 additions & 17 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type Scanner struct {
indentLevel int
isFirstCharAtLine bool
isAnchor bool
isAlias bool
isDirective bool
startedFlowSequenceNum int
startedFlowMapNum int
Expand Down Expand Up @@ -105,6 +106,7 @@ func (s *Scanner) progressLine(ctx *Context) {
s.indentNum = 0
s.isFirstCharAtLine = true
s.isAnchor = false
s.isAlias = false
s.isDirective = false
s.progress(ctx, 1)
}
Expand Down Expand Up @@ -516,7 +518,7 @@ func (s *Scanner) scanWhiteSpace(ctx *Context) bool {
if ctx.isDocument() {
return false
}
if !s.isAnchor && !s.isFirstCharAtLine {
if !s.isAnchor && !s.isAlias && !s.isFirstCharAtLine {
return false
}

Expand All @@ -528,6 +530,7 @@ func (s *Scanner) scanWhiteSpace(ctx *Context) bool {

s.addBufferedTokenIfExists(ctx)
s.isAnchor = false
s.isAlias = false
return true
}

Expand Down Expand Up @@ -663,7 +666,7 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
} else if s.isFirstCharAtLine && c == ' ' {
ctx.addDocumentIndent(s.column)
s.progressColumn(ctx, 1)
} else if s.isFirstCharAtLine && c == '\t' {
} else if s.isFirstCharAtLine && c == '\t' && ctx.isIndentColumn(s.column) {
err := ErrInvalidToken(
token.Invalid(
"found a tab character where an indentation space is expected",
Expand All @@ -683,7 +686,7 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
return ErrInvalidToken(invalidTk)
}
ctx.updateDocumentNewLineInFolded(s.column)
ctx.addBuf(c)
ctx.addBufWithTab(c)
s.progressColumn(ctx, 1)
}
return nil
Expand Down Expand Up @@ -717,7 +720,7 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) {

if ctx.isEOS() {
s.addBufferedTokenIfExists(ctx)
} else if s.isAnchor {
} else if s.isAnchor || s.isAlias {
s.addBufferedTokenIfExists(ctx)
}
if ctx.existsBuffer() && s.isFirstCharAtLine {
Expand Down Expand Up @@ -812,13 +815,19 @@ func (s *Scanner) scanFlowEntry(ctx *Context, c rune) bool {
return true
}

func (s *Scanner) scanMapDelim(ctx *Context) bool {
func (s *Scanner) scanMapDelim(ctx *Context) (bool, error) {
nc := ctx.nextChar()
if s.isDirective {
return false
if s.isDirective || s.isAnchor || s.isAlias {
return false, nil
}
if s.startedFlowMapNum <= 0 && nc != ' ' && nc != '\t' && !s.isNewLineChar(nc) && !ctx.isNextEOS() {
return false
return false, nil
}

if strings.HasPrefix(strings.TrimPrefix(string(ctx.obuf), " "), "\t") && !strings.HasPrefix(string(ctx.buf), "\t") {
invalidTk := token.Invalid("tab character cannot use as a map key directly", string(ctx.obuf), s.pos())
s.progressColumn(ctx, 1)
return false, ErrInvalidToken(invalidTk)
}

// mapping value
Expand All @@ -836,7 +845,7 @@ func (s *Scanner) scanMapDelim(ctx *Context) bool {
ctx.addToken(token.MappingValue(s.pos()))
s.progressColumn(ctx, 1)
ctx.clear()
return true
return true, nil
}

func (s *Scanner) scanDocumentStart(ctx *Context) bool {
Expand Down Expand Up @@ -908,14 +917,20 @@ func (s *Scanner) scanRawFoldedChar(ctx *Context) bool {
return true
}

func (s *Scanner) scanSequence(ctx *Context) bool {
func (s *Scanner) scanSequence(ctx *Context) (bool, error) {
if ctx.existsBuffer() {
return false
return false, nil
}

nc := ctx.nextChar()
if nc != 0 && nc != ' ' && !s.isNewLineChar(nc) {
return false
if nc != 0 && nc != ' ' && nc != '\t' && !s.isNewLineChar(nc) {
return false, nil
}

if strings.HasPrefix(strings.TrimPrefix(string(ctx.obuf), " "), "\t") {
invalidTk := token.Invalid("tab character cannot use as a sequence delimiter", string(ctx.obuf), s.pos())
s.progressColumn(ctx, 1)
return false, ErrInvalidToken(invalidTk)
}

s.addBufferedTokenIfExists(ctx)
Expand All @@ -925,7 +940,7 @@ func (s *Scanner) scanSequence(ctx *Context) bool {
ctx.addToken(tk)
s.progressColumn(ctx, 1)
ctx.clear()
return true
return true, nil
}

func (s *Scanner) scanDocumentHeader(ctx *Context) (bool, error) {
Expand Down Expand Up @@ -1036,7 +1051,7 @@ func (s *Scanner) scanMapKey(ctx *Context) bool {
}

nc := ctx.nextChar()
if nc != ' ' {
if nc != ' ' && nc != '\t' {
return false
}

Expand Down Expand Up @@ -1084,6 +1099,7 @@ func (s *Scanner) scanAlias(ctx *Context) bool {
ctx.addOriginBuf('*')
ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
s.progressColumn(ctx, 1)
s.isAlias = true
ctx.clear()
return true
}
Expand All @@ -1107,6 +1123,11 @@ func (s *Scanner) scanReservedChar(ctx *Context, c rune) error {
}

func (s *Scanner) scanTab(ctx *Context, c rune) error {
if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 {
// tabs character is allowed in flow mode.
return nil
}

if !s.isFirstCharAtLine {
return nil
}
Expand Down Expand Up @@ -1185,7 +1206,11 @@ func (s *Scanner) scan(ctx *Context) error {
if s.scanRawFoldedChar(ctx) {
continue
}
if s.scanSequence(ctx) {
scanned, err := s.scanSequence(ctx)
if err != nil {
return err
}
if scanned {
continue
}
case '[':
Expand All @@ -1201,7 +1226,11 @@ func (s *Scanner) scan(ctx *Context) error {
continue
}
case ':':
if s.scanMapDelim(ctx) {
scanned, err := s.scanMapDelim(ctx)
if err != nil {
return err
}
if scanned {
continue
}
case '|', '>':
Expand Down
12 changes: 1 addition & 11 deletions yaml_test_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ var failureTestNames = []string{
"aliases-in-flow-objects",
"aliases-in-explicit-block-mapping",
"aliases-in-implicit-block-mapping",
"allowed-characters-in-alias",
"anchor-before-sequence-entry-on-same-line",
"anchor-for-empty-node",
"anchor-plus-alias",
"anchors-in-mapping",
"anchors-with-colon-in-name",
Expand Down Expand Up @@ -156,13 +153,7 @@ var failureTestNames = []string{
"syntax-character-edge-cases/00",
"tab-at-beginning-of-line-followed-by-a-flow-mapping",
"tab-indented-top-flow",
"tabs-in-various-contexts/001",
"tabs-in-various-contexts/002",
"tabs-in-various-contexts/004",
"tabs-in-various-contexts/005",
"tabs-in-various-contexts/006",
"tabs-in-various-contexts/008",
"tabs-in-various-contexts/010",
"tabs-in-various-contexts/003",
"tabs-that-look-like-indentation/00",
"tabs-that-look-like-indentation/01",
"tabs-that-look-like-indentation/02",
Expand All @@ -178,7 +169,6 @@ var failureTestNames = []string{
"tags-in-explicit-mapping",
"tags-in-implicit-mapping",
"tags-on-empty-scalars",
"three-dashes-and-content-without-space",
"trailing-line-of-spaces/01", // last '\n' character is needed ?
"various-combinations-of-explicit-block-mappings", // no json
"various-trailing-comments", // no json
Expand Down

0 comments on commit 3399084

Please sign in to comment.