Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(node/util): use string.split #15889

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 0 additions & 88 deletions src/bun.js/node/node_util_binding.zig
Original file line number Diff line number Diff line change
Expand Up @@ -106,91 +106,3 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr
var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory();
return fmtstring.transferToJS(globalThis);
}

/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string.
/// Returns `undefined` if param is utf8 or utf16 and not fully ascii.
///
/// ```js
/// // util.js
/// const extractedNewLineRe = new RegExp("(?<=\\n)");
/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
/// ```
pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
bun.assert(callframe.argumentsCount() == 1);
const value = callframe.argument(0);
bun.assert(value.isString());

const str = try value.toBunString2(globalThis);
defer str.deref();

return switch (str.encoding()) {
inline .utf16, .latin1 => |encoding| split(encoding, globalThis, bun.default_allocator, &str),
.utf8 => if (bun.strings.isAllASCII(str.byteSlice()))
return split(.utf8, globalThis, bun.default_allocator, &str)
else
return JSC.JSValue.jsUndefined(),
};
}

fn split(
comptime encoding: bun.strings.EncodingNonAscii,
globalThis: *JSC.JSGlobalObject,
allocator: Allocator,
str: *const bun.String,
) bun.JSError!JSC.JSValue {
var fallback = std.heap.stackFallback(1024, allocator);
const alloc = fallback.get();
const Char = switch (encoding) {
.utf8, .latin1 => u8,
.utf16 => u16,
};

var lines: std.ArrayListUnmanaged(bun.String) = .{};
defer {
for (lines.items) |out| {
out.deref();
}
lines.deinit(alloc);
}

const buffer: []const Char = if (encoding == .utf16)
str.utf16()
else
str.byteSlice();
var it: SplitNewlineIterator(Char) = .{ .buffer = buffer, .index = 0 };
while (it.next()) |line| {
const encoded_line = switch (encoding) {
inline .utf8 => bun.String.fromUTF8(line),
inline .latin1 => bun.String.createLatin1(line),
inline .utf16 => bun.String.fromUTF16(line),
};
errdefer encoded_line.deref();
try lines.append(alloc, encoded_line);
}

return bun.String.toJSArray(globalThis, lines.items);
}

pub fn SplitNewlineIterator(comptime T: type) type {
return struct {
buffer: []const T,
index: ?usize,

const Self = @This();

/// Returns a slice of the next field, or null if splitting is complete.
pub fn next(self: *Self) ?[]const T {
const start = self.index orelse return null;

if (std.mem.indexOfScalarPos(T, self.buffer, start, '\n')) |delim_start| {
const end = delim_start + 1;
const slice = self.buffer[start..end];
self.index = end;
return slice;
} else {
self.index = null;
return self.buffer[start..];
}
}
};
}
30 changes: 9 additions & 21 deletions src/js/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,21 +141,6 @@ const kRejected = Symbol("kRejected"); // state ID 2
const ALL_PROPERTIES = 0;
const ONLY_ENUMERABLE = 2;

/**
* Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings.
* @returns `value` split on newlines (newline included at end), or `undefined`
* if non-ascii UTF8/UTF16.
*
* Passing this a non-string will cause a panic.
*
* @type {(value: string) => string[] | undefined}
*/
const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction(
"node_util_binding.zig",
"extractedSplitNewLinesFastPathStringsOnly",
1,
);

const isAsyncFunction = v =>
typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async");
const isGeneratorFunction = v =>
Expand Down Expand Up @@ -430,8 +415,6 @@ try {
"[\\x00-\\x1f\\x5c\\x7f-\\x9f]|[\\ud800-\\udbff](?![\\udc00-\\udfff])|(?<![\\ud800-\\udbff])[\\udc00-\\udfff]",
"g",
);
const extractedNewLineRe = new RegExp("(?<=\\n)");
extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
// CI doesn't run in an elderly runtime
} catch {
// These are from a previous version of node,
Expand All @@ -453,11 +436,16 @@ try {
}

const extractedSplitNewLines = value => {
if (typeof value === "string") {
return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value);
const lines = StringPrototypeSplit(value, "\n");
DonIsaac marked this conversation as resolved.
Show resolved Hide resolved
for (let i = 0; i < lines.length; i++) {
if (i !== lines.length - 1) {
lines[i] += "\n";
continue;
}
if (lines[i] === "") lines.pop();
}
return extractedSplitNewLinesSlow(value);
}
return lines;
};

const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
const numberRegExp = /^(0|[1-9][0-9]*)$/;
Expand Down
Loading