Skip to content

Commit

Permalink
builtin: implement a JS version of string.split_any (#21612)
Browse files Browse the repository at this point in the history
  • Loading branch information
juan-db authored Jun 8, 2024
1 parent ff865ea commit da4afef
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 1 deletion.
8 changes: 7 additions & 1 deletion vlib/builtin/js/jsfns.js.v
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ pub interface JS.Number {
JS.Any
}

pub interface JS.RegExp {
JS.Any
}

pub type SplitSeparator = JS.RegExp | JS.String

@[single_impl]
pub interface JS.String {
JS.Any
Expand All @@ -32,7 +38,7 @@ pub interface JS.String {
endsWith(substr JS.String) JS.Boolean
startsWith(substr JS.String) JS.Boolean
slice(a JS.Number, b JS.Number) JS.String
split(dot JS.String) JS.Array
split(delim SplitSeparator) JS.Array
indexOf(needle JS.String) JS.Number
lastIndexOf(needle JS.String) JS.Number
}
Expand Down
40 changes: 40 additions & 0 deletions vlib/builtin/js/string.js.v
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,46 @@ pub fn (s string) split(dot string) []string {
return arr
}

pub fn (s string) split_any(delim string) []string {
if delim.len == 0 {
return s.split(delim)
}

mut pattern := delim

// we use a regex with a bracket expression to match any of the characters in delim
// so we need to prevent the caller from escaping the regex
// to do this we escape any `]`, and remove all `\` while adding an escaped `\\`
// back if the original string contained any
if pattern.contains('\\') {
pattern = pattern.replace('\\', '')
pattern = '${pattern}\\\\'
}
pattern = pattern.replace(']', '\\]')

mut regexp := JS.RegExp{}
#regexp = new RegExp('[' + pattern.str + ']', 'g')

tmparr := s.str.split(regexp).map(fn (it JS.Any) JS.Any {
res := ''
#res.str = it

return res
})
_ := tmparr

mut arr := []string{}
#arr = new array(new array_buffer({arr: tmparr,index_start: new int(0),len: new int(tmparr.length)}))

// FIXME: ugly hack to handle edge case where the last character in the string is
// one of the delimiters to match V behavior
#if (s.len > 0 && pattern.str.includes(s.str[s.len - 1])) {
arr.pop()
#}

return arr
}

pub fn (s string) bytes() []u8 {
sep := ''
tmparr := s.str.split(sep.str).map(fn (it JS.Any) JS.Any {
Expand Down
45 changes: 45 additions & 0 deletions vlib/builtin/js/string_test.js.v
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,51 @@ fn test_split() {
assert vals[1] == ''
}

fn test_split_any() {
mut s := 'aaa'
mut a := s.split_any('')
assert a.len == 3
assert a[0] == 'a'
assert a[1] == 'a'
assert a[2] == 'a'
s = ''
a = s.split_any('')
assert a.len == 0
s = '12131415'
a = s.split_any('1')
assert a.len == 5
assert a[0] == ''
assert a[1] == '2'
assert a[2] == '3'
assert a[3] == '4'
assert a[4] == '5'
s = '12131415'
a = s.split_any('2345')
assert a.len == 4
assert a[0] == '1'
assert a[1] == '1'
assert a[2] == '1'
assert a[3] == '1'
s = 'a,b,c'
a = s.split_any('],')
assert a.len == 3
assert a[0] == 'a'
assert a[1] == 'b'
assert a[2] == 'c'
s = 'a]b]c'
a = s.split_any('],')
assert a.len == 3
assert a[0] == 'a'
assert a[1] == 'b'
assert a[2] == 'c'
s = 'a]b]c'
a = s.split_any('],\\')
assert a.len == 3
assert a[0] == 'a'
assert a[1] == 'b'
assert a[2] == 'c'
}

/*
fn test_trim_space() {
a := ' a '
Expand Down

0 comments on commit da4afef

Please sign in to comment.