Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WebNN EP] Automatically move input CPU tensors to ml-tensor #23073

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions js/web/lib/wasm/jsep/backend-webnn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ export class WebNNBackend {
* Current session id.
*/
private activeSessionId?: number;
/**
* Maps from session id to list of graph inputs.
*/
private sessionGraphInputs: Map<number, string[]> = new Map();
/**
* Temporary graph inputs for the current session.
* These inputs will be registered when the session is created.
*/
private temporaryGraphInputs: string[] = [];
/**
* Temporary tensors for the current session.
*/
private temporarySessionTensors: Map<number, TensorId[]> = new Map();
egalli marked this conversation as resolved.
Show resolved Hide resolved

constructor(env: Env) {
configureLogger(env.logLevel!, !!env.debug);
Expand All @@ -91,6 +104,19 @@ export class WebNNBackend {
this.activeSessionId = sessionId;
}

public onRunEnd(sessionId: number): void {
LOG_DEBUG('verbose', () => `[WebNN] onRunEnd {sessionId: ${sessionId}}`);
const tensors = this.temporarySessionTensors.get(sessionId);
egalli marked this conversation as resolved.
Show resolved Hide resolved
if (!tensors) {
return;
}
for (const tensor of tensors) {
LOG_DEBUG('verbose', () => `[WebNN] releasing temporary tensor {tensorId: ${tensor}}`);
this.tensorManager.releaseTensorId(tensor);
}
this.temporarySessionTensors.delete(sessionId);
}

public async createMLContext(optionsOrDevice?: MLContextOptions | GPUDevice): Promise<MLContext> {
if (optionsOrDevice instanceof GPUDevice) {
const mlContextIndex = this.mlContextCache.findIndex((entry) => entry.gpuDevice === optionsOrDevice);
Expand Down Expand Up @@ -142,9 +168,15 @@ export class WebNNBackend {
this.sessionIdsByMLContext.set(mlContext, sessionIds);
}
sessionIds.add(sessionId);

if (this.temporaryGraphInputs.length > 0) {
this.sessionGraphInputs.set(sessionId, this.temporaryGraphInputs);
this.temporaryGraphInputs = [];
}
}

public onReleaseSession(sessionId: number): void {
this.sessionGraphInputs.delete(sessionId);
const mlContext = this.mlContextBySessionId.get(sessionId)!;
if (!mlContext) {
// Current session is not a WebNN session.
Expand Down Expand Up @@ -189,6 +221,23 @@ export class WebNNBackend {
return this.tensorManager.ensureTensor(tensorId, webnnDataType, dimensions, copyOld);
}

public async createTemporaryTensor(onnxDataType: DataType, shape: readonly number[]): Promise<TensorId> {
LOG_DEBUG('verbose', () => `[WebNN] createTemporaryTensor {onnxDataType: ${onnxDataType}, shape: ${shape}}`);
const dataType = onnxDataTypeToWebnnDataType.get(onnxDataType);
if (!dataType) {
throw new Error(`Unsupported ONNX data type: ${onnxDataType}`);
}
const tensorId = this.tensorManager.reserveTensorId();
await this.tensorManager.ensureTensor(tensorId, dataType, shape, false);
const tensors = this.temporarySessionTensors.get(this.currentSessionId);
if (!tensors) {
this.temporarySessionTensors.set(this.currentSessionId, [tensorId]);
} else {
tensors.push(tensorId);
}
return tensorId;
}

public uploadTensor(tensorId: TensorId, data: Uint8Array): void {
const wasm = getInstance();
if (!wasm.shouldTransferToMLTensor) {
Expand Down Expand Up @@ -291,6 +340,19 @@ export class WebNNBackend {
return builder.constant(desc, bufferView);
}

public registerGraphInput(inputName: string): void {
this.temporaryGraphInputs.push(inputName);
}

public isGraphInput(inputName: string): boolean {
const sessionId = this.currentSessionId;
const inputNames = this.sessionGraphInputs.get(sessionId);
if (!inputNames) {
return false;
}
return inputNames.includes(inputName);
}

public flush(): void {
// Unlike the WebGPU backend, the WebNN backend does not need to flush any pending operations.
}
Expand Down
45 changes: 36 additions & 9 deletions js/web/lib/wasm/wasm-core-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,14 @@ export const releaseSession = (sessionId: number): void => {
activeSessions.delete(sessionId);
};

export const prepareInputOutputTensor = (
export const prepareInputOutputTensor = async (
tensor: TensorMetadata | null,
tensorHandles: number[],
allocs: number[],
sessionId: number,
index: number,
enableGraphCapture = false,
): void => {
): Promise<void> => {
if (!tensor) {
tensorHandles.push(0);
return;
Expand All @@ -472,6 +472,7 @@ export const prepareInputOutputTensor = (
const dataType = tensor[0];
const dims = tensor[1];
const location = tensor[3];
let actualLocation = location;

let rawData: number;
let dataByteLength: number;
Expand Down Expand Up @@ -519,10 +520,35 @@ export const prepareInputOutputTensor = (
wasm.setValue(rawData + i * ptrSize, allocWasmString(data[i], allocs), '*');
}
} else {
dataByteLength = data.byteLength;
rawData = wasm._malloc(dataByteLength);
allocs.push(rawData);
wasm.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData);
const isGraphInput = wasm.jsepIsGraphInput;
if (dataType !== 'string' && isGraphInput) {
const tensorNameUTF8 = wasm._OrtGetInputName(sessionId, index);
const tensorName = wasm.UTF8ToString(tensorNameUTF8);
// Promote the tensor to 'ml-tensor' if it is a graph input.
if (isGraphInput(tensorName)) {
const dataTypeEnum = tensorDataTypeStringToEnum(dataType);
dataByteLength = calculateTensorSizeInBytes(dataTypeEnum, dims)!;
actualLocation = 'ml-tensor';
const createTemporaryTensor = wasm.jsepCreateTemporaryTensor;
const uploadTensor = wasm.jsepUploadTensor;
if (!createTemporaryTensor || !uploadTensor) {
throw new Error('Tensor location "ml-tensor" is not supported without using WebNN.');
}
const tensorId = await createTemporaryTensor(dataTypeEnum, dims as number[]);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found an issue while debugging microsoft/webnn-developer-preview#69

We can't safety use await and expect WebNNBackend.activeSessionId to be valid.

We'll need to manually pass the sessionHandle/Id to createTemporaryTensor and isGraphInput

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appears you pushed more commits related to sessionHandle. So is this comment resolveable now?

uploadTensor(tensorId, new Uint8Array(data.buffer, data.byteOffset, data.byteLength));
rawData = tensorId;
} else {
dataByteLength = data.byteLength;
rawData = wasm._malloc(dataByteLength);
allocs.push(rawData);
wasm.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData);
}
} else {
dataByteLength = data.byteLength;
rawData = wasm._malloc(dataByteLength);
allocs.push(rawData);
wasm.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData);
}
}
}

Expand All @@ -536,7 +562,7 @@ export const prepareInputOutputTensor = (
dataByteLength,
dimsOffset,
dims.length,
dataLocationStringToEnum(location),
dataLocationStringToEnum(actualLocation),
);
if (tensor === 0) {
checkLastError(`Can't create tensor for input/output. session=${sessionId}, index=${index}.`);
Expand Down Expand Up @@ -595,7 +621,7 @@ export const run = async (

// create input tensors
for (let i = 0; i < inputCount; i++) {
prepareInputOutputTensor(
await prepareInputOutputTensor(
inputTensors[i],
inputTensorHandles,
inputOutputAllocs,
Expand All @@ -607,7 +633,7 @@ export const run = async (

// create output tensors
for (let i = 0; i < outputCount; i++) {
prepareInputOutputTensor(
await prepareInputOutputTensor(
outputTensors[i],
outputTensorHandles,
inputOutputAllocs,
Expand Down Expand Up @@ -841,6 +867,7 @@ export const run = async (
if (!keepOutputTensor) {
wasm._OrtReleaseTensor(tensor);
}
wasm.jsepOnRunEnd?.(sessionHandle);
}
}

Expand Down
25 changes: 25 additions & 0 deletions js/web/lib/wasm/wasm-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ export declare namespace JSEP {
* @param sessionId - specify the session ID.
*/
jsepOnRunStart: (sessionId: number) => void;
/**
* [exported from pre-jsep.js] Called when InferenceSession.run finished. This function will be called after
* _OrtRun[WithBinding]() is called.
* @param sessionId - specify the session ID.
*/
jsepOnRunEnd: (sessionId: number) => void;
/**
* [exported from pre-jsep.js] Create a session. This function will be called after _OrtCreateSession() is
* called.
Expand Down Expand Up @@ -249,6 +255,25 @@ export declare namespace JSEP {
builder: MLGraphBuilder,
desc: MLOperandDescriptor,
): MLOperand;

/**
* [exported from pre-jsep.js] Register a WebNN graph input.
* @param inputName - specify the input name.
*/
jsepRegisterGraphInput(inputName: string): void;
/**
* [exported from pre-jsep.js] Check if a graph input is a WebNN graph input.
* @param inputName - specify the input name.
* @returns whether the input is a WebNN graph input.
*/
jsepIsGraphInput(inputName: string): boolean;
/**
* [exported from pre-jsep.js] Create a temporary MLTensor for a session.
* @param dataType - specify the data type.
* @param shape - specify the shape.
* @returns the MLTensor ID for the temporary MLTensor.
*/
jsepCreateTemporaryTensor: (dataType: DataType, shape: readonly number[]) => Promise<number>;
}
}

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/webnn/builders/model_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i

if (is_input) {
wnn_operands_.insert(std::make_pair(name, wnn_builder_.call<emscripten::val>("input", name, desc)));
emscripten::val::module_property("jsepRegisterGraphInput")(name);
input_names_.push_back(name);
} else {
output_names_.push_back(name);
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/wasm/pre-jsep.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,14 @@ Module['jsepInit'] = (name, params) => {

// This function is called from both JS and an EM_ASM block, it needs both a minifiable name and an explicit name.
Module['jsepReleaseTensorId'] = Module.jsepReleaseTensorId;
Module['jsepUploadTensor'] = Module.jsepUploadTensor;

// Functions called from JS also need to have explicit names.
const backend = Module.jsepBackend;
Module['jsepOnRunStart'] = sessionId => {
return backend['onRunStart'](sessionId);
};
Module['jsepOnRunEnd'] = backend['onRunEnd'].bind(backend);
Module['jsepRegisterMLContext'] = (sessionId, mlContext) => {
backend['registerMLContext'](sessionId, mlContext);
};
Expand All @@ -245,5 +247,9 @@ Module['jsepInit'] = (name, params) => {
return backend['registerMLConstant'](
externalFilePath, dataOffset, dataLength, builder, desc, Module.MountedFiles);
};
Module['jsepRegisterGraphInput'] = backend['registerGraphInput'].bind(backend);
Module['jsepIsGraphInput'] = backend['isGraphInput'].bind(backend);

Module['jsepCreateTemporaryTensor'] = backend['createTemporaryTensor'].bind(backend);
}
};
Loading