Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/engine/src/builtins/intl/segmenter/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl SegmentIterator {
.segmenter
.downcast_ref::<Segmenter>()
.expect("segment iterator object should contain a segmenter");
let mut segments = segmenter.native.segment(string);
let mut segments = segmenter.native.segment(string.variant());
// the first elem is always 0.
segments.next();
segments
Expand Down
29 changes: 16 additions & 13 deletions core/engine/src/builtins/intl/segmenter/mod.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
use std::ops::Range;

use boa_gc::{Finalize, Trace};
use icu_collator::provider::CollationDiacriticsV1;
use icu_locale::Locale;
use icu_segmenter::{
GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
options::{SentenceBreakOptions, WordBreakOptions},
};

use crate::{
Context, JsArgs, JsData, JsNativeError, JsResult, JsStr, JsString, JsSymbol, JsValue,
Context, JsArgs, JsData, JsNativeError, JsResult, JsString, JsSymbol, JsValue,
builtins::{
BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject,
options::{get_option, get_options_object},
Expand All @@ -21,6 +13,14 @@ use crate::{
realm::Realm,
string::StaticJsStrings,
};
use boa_gc::{Finalize, Trace};
use boa_string::JsStrVariant;
use icu_collator::provider::CollationDiacriticsV1;
use icu_locale::Locale;
use icu_segmenter::{
GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
options::{SentenceBreakOptions, WordBreakOptions},
};

mod iterator;
mod options;
Expand Down Expand Up @@ -62,9 +62,12 @@ impl NativeSegmenter {

/// Segment the passed string, returning an iterator with the index boundaries
/// of the segments.
pub(crate) fn segment<'l, 's>(&'l self, input: JsStr<'s>) -> NativeSegmentIterator<'l, 's> {
match input.variant() {
crate::string::JsStrVariant::Latin1(input) => match self {
pub(crate) fn segment<'l, 's>(
&'l self,
input: JsStrVariant<'s>,
) -> NativeSegmentIterator<'l, 's> {
match input {
JsStrVariant::Latin1(input) => match self {
Self::Grapheme(g) => {
NativeSegmentIterator::GraphemeLatin1(g.as_borrowed().segment_latin1(input))
}
Expand All @@ -75,7 +78,7 @@ impl NativeSegmenter {
NativeSegmentIterator::SentenceLatin1(s.as_borrowed().segment_latin1(input))
}
},
crate::string::JsStrVariant::Utf16(input) => match self {
JsStrVariant::Utf16(input) => match self {
Self::Grapheme(g) => {
NativeSegmentIterator::GraphemeUtf16(g.as_borrowed().segment_utf16(input))
}
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/intl/segmenter/segments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl Segments {
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
let (range, is_word_like) = {
let mut segments = segmenter.native.segment(segments.string.as_str());
let mut segments = segmenter.native.segment(segments.string.variant());
std::iter::from_fn(|| segments.next().map(|i| (i, segments.is_word_like())))
.tuple_windows()
.find(|((i, _), (j, _))| (*i..*j).contains(&n))
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/json/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ impl Json {
// 7. Else if Type(space) is String, then
} else if let Some(s) = space.as_string() {
// a. If the length of space is 10 or less, let gap be space; otherwise let gap be the substring of space from 0 to 10.
js_string!(s.get(..10).unwrap_or(s.as_str()))
s.get(..10).unwrap_or(s)
// 8. Else,
} else {
// a. Let gap be the empty String.
Expand Down
33 changes: 22 additions & 11 deletions core/engine/src/builtins/number/globals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ pub(crate) fn parse_int(_: &JsValue, args: &[JsValue], context: &mut Context) ->
// 0 digit, at the option of the implementation; and if R is not 2, 4, 8, 10, 16, or 32, then
// mathInt may be an implementation-approximated value representing the integer value that is
// represented by Z in radix-R notation.)
let math_int = from_js_str_radix(z, r).expect("Already checked");
let math_int = from_js_str_radix(z.as_str(), r).expect("Already checked");

// 15. If mathInt = 0, then
// a. If sign = -1, return -0𝔽.
Expand Down Expand Up @@ -303,11 +303,6 @@ pub(crate) fn parse_float(
args: &[JsValue],
context: &mut Context,
) -> JsResult<JsValue> {
const PLUS_CHAR: u16 = b'+' as u16;
const MINUS_CHAR: u16 = b'-' as u16;
const LOWER_CASE_I_CHAR: u16 = b'i' as u16;
const UPPER_CASE_I_CHAR: u16 = b'I' as u16;

let Some(string) = args.first() else {
return Ok(JsValue::nan());
};
Expand All @@ -333,18 +328,34 @@ pub(crate) fn parse_float(
// 5. Let parsedNumber be ParseText(trimmedPrefix, StrDecimalLiteral).
// 6. Assert: parsedNumber is a Parse Node.
// 7. Return the StringNumericValue of parsedNumber.
let (positive, prefix) = match trimmed_string.get(0) {
Some(PLUS_CHAR) => (true, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
Some(MINUS_CHAR) => (false, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
_ => (true, trimmed_string),
let (positive, prefix) = match trimmed_string
.code_unit_at(0)
.and_then(|c| char::from_u32(u32::from(c)))
{
Some('+') => (
true,
trimmed_string
.get(1..)
.unwrap_or(StaticJsStrings::EMPTY_STRING),
),
Some('-') => (
false,
trimmed_string
.get(1..)
.unwrap_or(StaticJsStrings::EMPTY_STRING),
),
_ => (true, trimmed_string.clone()),
};

if prefix.starts_with(js_str!("Infinity")) {
if positive {
return Ok(JsValue::positive_infinity());
}
return Ok(JsValue::negative_infinity());
} else if let Some(LOWER_CASE_I_CHAR | UPPER_CASE_I_CHAR) = prefix.get(0) {
} else if let Some('I' | 'i') = prefix
.code_unit_at(0)
.and_then(|c| char::from_u32(u32::from(c)))
{
return Ok(JsValue::nan());
}

Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1788,7 +1788,7 @@ impl RegExp {
// 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
Ok(js_string!(
&JsString::from(&accumulated_result[..]),
s.get_expect(next_source_position..)
&s.get_expect(next_source_position..)
)
.into())
}
Expand Down
18 changes: 8 additions & 10 deletions core/engine/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,11 +620,10 @@ impl String {

match position {
// 4. Let size be the length of S.
IntegerOrInfinity::Integer(i) if i >= 0 => {
IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => {
// 6. Return the Number value for the numeric value of the code unit at index position within the String S.
Ok(string
.get(i as usize)
.map_or_else(JsValue::nan, JsValue::from))
// SAFETY: already validated the index.
Ok(unsafe { string.code_unit_at(i as usize).unwrap_unchecked() }.into())
}
// 5. If position < 0 or position ≥ size, return NaN.
_ => Ok(JsValue::nan()),
Expand Down Expand Up @@ -1043,7 +1042,7 @@ impl String {
};

// 10. Let preserved be the substring of string from 0 to position.
let preserved = JsString::from(string.get_expect(..position));
let preserved = string.get_expect(..position);

let replacement = match replace_value {
// 11. If functionalReplace is true, then
Expand Down Expand Up @@ -1080,7 +1079,7 @@ impl String {
Ok(js_string!(
&preserved,
&replacement,
&JsString::from(string.get_expect(position + search_length..))
&string.get_expect(position + search_length..)
)
.into())
}
Expand Down Expand Up @@ -1675,7 +1674,7 @@ impl String {
// 2. Return ? TrimString(S, end).
let object = this.require_object_coercible()?;
let string = object.to_string(context)?;
Ok(js_string!(string.trim_end()).into())
Ok(string.trim_end().into())
}

/// [`String.prototype.toUpperCase()`][upper] and [`String.prototype.toLowerCase()`][lower]
Expand Down Expand Up @@ -1957,9 +1956,8 @@ impl String {
if separator_length == 0 {
// a. Let head be the substring of S from 0 to lim.
// b. Let codeUnits be a List consisting of the sequence of code units that are the elements of head.
let head = this_str
.get(..lim)
.unwrap_or(this_str.as_str())
let head_str = this_str.get(..lim).unwrap_or(this_str);
let head = head_str
.iter()
.map(|code| js_string!(std::slice::from_ref(&code)).into());

Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/builtins/string/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ fn char_at() {
#[test]
fn char_code_at() {
run_test_actions([
TestAction::assert_eq("'abc'.charCodeAt-1", f64::NAN),
TestAction::assert_eq("'abc'.charCodeAt(-1)", f64::NAN),
TestAction::assert_eq("'abc'.charCodeAt(1)", 98),
TestAction::assert_eq("'abc'.charCodeAt(9)", f64::NAN),
TestAction::assert_eq("'abc'.charCodeAt()", 97),
Expand Down
35 changes: 24 additions & 11 deletions core/engine/src/builtins/uri/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ where
}

// b. Let C be the code unit at index k within string.
let c = string.get_expect(k);
let c = string.code_unit_at(k).expect("Bounds were verified");

// c. If C is in unescapedSet, then
if unescaped_set(c) {
Expand Down Expand Up @@ -384,7 +384,7 @@ where
}

// b. Let C be the code unit at index k within string.
let c = string.get_expect(k);
let c = string.code_point_at(k).as_u32() as u16;

// c. If C is not the code unit 0x0025 (PERCENT SIGN), then
#[allow(clippy::if_not_else)]
Expand All @@ -406,10 +406,17 @@ where
// iii. If the code units at index (k + 1) and (k + 2) within string do not represent
// hexadecimal digits, throw a URIError exception.
// iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
.ok_or_else(|| {
JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?;

// SAFETY: the indices have been verified as valid already.
let (high, low) = unsafe {
(
string.code_unit_at(k + 1).unwrap_unchecked(),
string.code_unit_at(k + 2).unwrap_unchecked(),
)
};
let b = decode_hex_byte(high, low).ok_or_else(|| {
JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?;

// v. Set k to k + 2.
k += 2;
Expand Down Expand Up @@ -457,18 +464,24 @@ where
k += 1;

// b. If the code unit at index k within string is not the code unit 0x0025 (PERCENT SIGN), throw a URIError exception.
if string.get_expect(k) != 0x0025 {
if string.code_unit_at(k) != Some(0x0025) {
return Err(JsNativeError::uri()
.with_message("escape characters must be preceded with a % sign")
.into());
}

// c. If the code units at index (k + 1) and (k + 2) within string do not represent hexadecimal digits, throw a URIError exception.
// d. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
.ok_or_else(|| {
JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?;
// SAFETY: the indices have been verified as valid already.
let (high, low) = unsafe {
(
string.code_unit_at(k + 1).unwrap_unchecked(),
string.code_unit_at(k + 2).unwrap_unchecked(),
)
};
let b = decode_hex_byte(high, low).ok_or_else(|| {
JsNativeError::uri().with_message("invalid hexadecimal digit found")
})?;

// e. Set k to k + 2.
k += 2;
Expand Down
2 changes: 1 addition & 1 deletion core/engine/src/vm/opcode/get/property.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ fn get_by_value<const PUSH_KEY: bool>(
}
} else if let Some(string) = base.as_string() {
let value = string
.get(index.get() as usize)
.code_unit_at(index.get() as usize)
.map_or_else(JsValue::undefined, |char| {
js_string!([char].as_slice()).into()
});
Expand Down
13 changes: 13 additions & 0 deletions core/string/src/code_point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,16 @@ impl std::fmt::Display for CodePoint {
}
}
}

impl From<char> for CodePoint {
fn from(value: char) -> Self {
Self::Unicode(value)
}
}

impl From<u16> for CodePoint {
fn from(value: u16) -> Self {
char::from_u32(u32::from(value))
.map_or_else(|| CodePoint::UnpairedSurrogate(value), CodePoint::Unicode)
}
}
34 changes: 18 additions & 16 deletions core/string/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ use std::cell::RefCell;
use std::fmt;
use std::fmt::Write;

/// Display implementation for [`JsString`] that escapes unicode characters.
#[derive(Debug)]
/// `Display` implementation for [`JsString`] that escapes unicode characters.
// This should not implement debug, only be shown as a standard display.
#[allow(missing_debug_implementations)]
pub struct JsStrDisplayEscaped<'a> {
inner: JsStr<'a>,
inner: &'a JsString,
}

impl fmt::Display for JsStrDisplayEscaped<'_> {
Expand All @@ -30,14 +31,15 @@ impl fmt::Display for JsStrDisplayEscaped<'_> {
}
}

impl<'a> From<JsStr<'a>> for JsStrDisplayEscaped<'a> {
fn from(inner: JsStr<'a>) -> Self {
impl<'a> From<&'a JsString> for JsStrDisplayEscaped<'a> {
fn from(inner: &'a JsString) -> Self {
Self { inner }
}
}

/// Display implementation for [`JsString`] that escapes unicode characters.
#[derive(Debug)]
/// `Display` implementation for [`JsString`] that escapes unicode characters.
// This should not implement debug, only be shown as a standard display.
#[allow(missing_debug_implementations)]
pub struct JsStrDisplayLossy<'a> {
inner: JsStr<'a>,
}
Expand Down Expand Up @@ -117,35 +119,35 @@ impl<'a> From<&'a JsString> for JsStringDebugInfo<'a> {
#[test]
fn latin1() {
// 0xE9 is `é` in ISO-8859-1 (see https://www.ascii-code.com/ISO-8859-1).
let s = JsStr::latin1(b"Hello \xE9 world!");
let s = JsString::from("Hello \u{E9} world!");

let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
let rust_str = format!("{}", JsStrDisplayEscaped { inner: &s });
assert_eq!(rust_str, "Hello é world!");

let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
let rust_str = format!("{}", JsStrDisplayLossy { inner: s.as_str() });
assert_eq!(rust_str, "Hello é world!");
}

#[test]
fn emoji() {
// 0x1F600 is `😀` (see https://www.fileformat.info/info/unicode/char/1f600/index.htm).
let s = JsStr::utf16(&[0xD83D, 0xDE00]);
let s = JsString::from(&[0xD83D, 0xDE00]);

let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
let rust_str = format!("{}", JsStrDisplayEscaped { inner: &s });
assert_eq!(rust_str, "😀");

let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
let rust_str = format!("{}", JsStrDisplayLossy { inner: s.as_str() });
assert_eq!(rust_str, "😀");
}

#[test]
fn unpaired_surrogates() {
// 0xD800 is an unpaired surrogate (see https://www.fileformat.info/info/unicode/char/d800/index.htm).
let s = JsStr::utf16(&[0xD800]);
let s = JsString::from(&[0xD800]);

let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
let rust_str = format!("{}", JsStrDisplayEscaped { inner: &s });
assert_eq!(rust_str, "\\uD800");

let rust_str = format!("{}", JsStrDisplayLossy { inner: s });
let rust_str = format!("{}", JsStrDisplayLossy { inner: s.as_str() });
assert_eq!(rust_str, "�");
}
Loading
Loading