Skip to content

Commit

Permalink
fix number parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy committed Nov 6, 2024
1 parent 29b57b4 commit 624c4fc
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 59 deletions.
62 changes: 61 additions & 1 deletion lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,62 @@ func TestTokenize(t *testing.T) {
YAML: `0_`,
Tokens: token.Tokens{
{
Type: token.OctetIntegerType,
Type: token.IntegerType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "0_",
Origin: "0_",
},
},
},
{
YAML: `0x_1A_2B_3C`,
Tokens: token.Tokens{
{
Type: token.HexIntegerType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "0x_1A_2B_3C",
Origin: "0x_1A_2B_3C",
},
},
},
{
YAML: `+0b1010`,
Tokens: token.Tokens{
{
Type: token.BinaryIntegerType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "+0b1010",
Origin: "+0b1010",
},
},
},
{
YAML: `0100`,
Tokens: token.Tokens{
{
Type: token.OctetIntegerType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "0100",
Origin: "0100",
},
},
},
{
YAML: `0o10`,
Tokens: token.Tokens{
{
Type: token.OctetIntegerType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "0o10",
Origin: "0o10",
},
},
},
{
YAML: `{}
`,
Expand Down Expand Up @@ -2197,6 +2245,18 @@ s: >-3
},
},
},
{
YAML: `1x0`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "1x0",
Origin: "1x0",
},
},
},
}
for _, test := range tests {
t.Run(test.YAML, func(t *testing.T) {
Expand Down
121 changes: 63 additions & 58 deletions token/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,86 +523,91 @@ type numStat struct {
typ numType
}

func getNumberStat(str string) *numStat {
func getNumberStat(value string) *numStat {
stat := &numStat{}
if str == "" {
if value == "" {
return stat
}
if str == "-" || str == "." || str == "+" || str == "_" {
dotCount := strings.Count(value, ".")
if dotCount > 1 {
return stat
}
if str[0] == '_' {

trimmed := strings.TrimPrefix(strings.TrimPrefix(value, "+"), "-")

var typ numType
switch {
case strings.HasPrefix(trimmed, "0x"):
trimmed = strings.TrimPrefix(trimmed, "0x")
typ = numTypeHex
case strings.HasPrefix(trimmed, "0o"):
trimmed = strings.TrimPrefix(trimmed, "0o")
typ = numTypeOctet
case strings.HasPrefix(trimmed, "0b"):
trimmed = strings.TrimPrefix(trimmed, "0b")
typ = numTypeBinary
case dotCount == 1:
typ = numTypeFloat
}

if trimmed == "" {
return stat
}
dotFound := false
isNegative := false
isExponent := false
if str[0] == '-' {
isNegative = true
}
for idx, c := range str {

var numCount int
for idx, c := range trimmed {
if isNumber(c) {
numCount++
continue
}
switch c {
case 'x':
if (isNegative && idx == 2) || (!isNegative && idx == 1) {
continue
}
case 'o':
if (isNegative && idx == 2) || (!isNegative && idx == 1) {
continue
}
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
case '_', '.':
continue
case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F':
if (len(str) > 2 && str[0] == '0' && str[1] == 'x') ||
(len(str) > 3 && isNegative && str[1] == '0' && str[2] == 'x') {
// hex number
continue
case 'a', 'b', 'c', 'd', 'f', 'A', 'B', 'C', 'D', 'F':
if typ != numTypeHex && typ != numTypeBinary {
return stat
}
if c == 'b' && ((isNegative && idx == 2) || (!isNegative && idx == 1)) {
// binary number
case 'e', 'E':
if typ == numTypeHex || typ == numTypeBinary {
continue
}
if (c == 'e' || c == 'E') && dotFound {
// exponent
isExponent = true
continue
if typ != numTypeFloat {
return stat
}
case '.':
if dotFound {
// multiple dot

// looks like exponent number.
if len(trimmed) <= idx+2 {
return stat
}
dotFound = true
continue
case '-':
if idx == 0 || isExponent {
continue
sign := trimmed[idx+1]
if sign != '+' && sign != '-' {
return stat
}
case '+':
if idx == 0 || isExponent {
continue
for _, c := range trimmed[idx+2:] {
if !isNumber(c) {
return stat
}
}
case '_':
continue
stat.isNum = true
stat.typ = typ
return stat
default:
return stat
}
return stat
}
stat.isNum = true
switch {
case dotFound:
stat.typ = numTypeFloat
case strings.HasPrefix(str, "0b") || strings.HasPrefix(str, "-0b"):
stat.typ = numTypeBinary
case strings.HasPrefix(str, "0x") || strings.HasPrefix(str, "-0x"):
stat.typ = numTypeHex
case strings.HasPrefix(str, "0o") || strings.HasPrefix(str, "-0o"):
stat.typ = numTypeOctet
case (len(str) > 1 && str[0] == '0') || (len(str) > 1 && str[0] == '-' && str[1] == '0'):
stat.typ = numTypeOctet
if numCount > 1 && trimmed[0] == '0' && typ == numTypeNone {
// YAML 1.1 Spec ?
typ = numTypeOctet
}
stat.isNum = true
stat.typ = typ
return stat
}

func isNumber(c rune) bool {
return c >= '0' && c <= '9'
}

func looksLikeTimeValue(value string) bool {
for i, c := range value {
switch c {
Expand Down Expand Up @@ -672,7 +677,7 @@ func LiteralBlockHeader(value string) string {
}
}

// New create reserved keyword token or number token and other string token
// New create reserved keyword token or number token and other string token.
func New(value string, org string, pos *Position) *Token {
fn := reservedKeywordMap[value]
if fn != nil {
Expand Down

0 comments on commit 624c4fc

Please sign in to comment.