diff --git a/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.drawio b/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.drawio index b2b30b858e..fee6cab1ed 100644 --- a/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.drawio +++ b/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.drawio @@ -1,301 +1,301 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.svg b/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.svg index 0b62bbda0e..54b48bcd9a 100644 --- a/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.svg +++ b/docs/data/how-to/hip_runtime_api/asynchronous/sequential_async_event.svg @@ -1,2 +1,4 @@ -
time
time
default stream
default stream
H2D
data1
H2D...
H2D
data2
H2D...
kernel
data1
kernel...
kernel
data2
kernel...
D2H
data1
D2H...
D2H
data2
D2H...
H2D
data2
H2D...
kernel
data2
kernel...
stream2
stream2
D2H
data2
D2H...
H2D
data1
H2D...
kernel
data1
kernel...
stream1
stream1
D2H
data1
D2H...
default stream
default stream
Seqeuntial calls:
Seqeuntial calls:
Asynchronous calls:
Asynchronous calls:
Asynchronous calls with hipEvent:
Asynchronous calls with hipEvent: -
H2D
data2
H2D...
kernel
data2
kernel...
stream2
stream2
H2D
data1
H2D...
kernel
data1
kernel...
stream1
stream1
D2H
data1
D2H...
default stream
default stream
event1
event1
event2
event2
D2H
data2
D2H...
create events
create events
Text is not SVG - cannot display
\ No newline at end of file + + + +
time
time
default stream
default stream
H2D
data1
H2D...
H2D
data2
H2D...
kernel
data1
kernel...
kernel
data2
kernel...
D2H
data1
D2H...
D2H
data2
D2H...
H2D
data2
H2D...
kernel
data2
kernel...
stream2
stream2
D2H
data2
D2H...
H2D
data1
H2D...
kernel
data1
kernel...
stream1
stream1
D2H
data1
D2H...
default stream
default stream
Seqeuntial calls:
Seqeuntial calls:
Asynchronous calls:
Asynchronous calls:
Asynchronous calls with hipEvent:
Asynchronous calls with hipEvent:
H2D
data2
H2D...
kernel
data2
kernel...
stream2
stream2
H2D
data1
H2D...
kernel
data1
kernel...
stream1
stream1
D2H
data2
D2H...
default stream
default stream
event1
event1
event2
event2
D2H
data1
D2H...
create events
create events
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/how-to/hip_runtime_api/asynchronous.rst b/docs/how-to/hip_runtime_api/asynchronous.rst index f55b0adca0..8596bf3d1c 100644 --- a/docs/how-to/hip_runtime_api/asynchronous.rst +++ b/docs/how-to/hip_runtime_api/asynchronous.rst @@ -94,9 +94,9 @@ When running kernels in parallel, the execution time can increase due to contention for shared resources. This is because multiple kernels may attempt to access the same GPU resources simultaneously, leading to delays. -Asynchronous kernel execution is beneficial only under specific conditions. -It is most effective when the kernels do not fully utilize the GPU's resources. -In such cases, overlapping kernel execution can improve overall throughput and +Asynchronous kernel execution is beneficial only under specific conditions It +is most effective when the kernels do not fully utilize the GPU's resources. In +such cases, overlapping kernel execution can improve overall throughput and efficiency by keeping the GPU busy without exceeding its capacity. Overlap of data transfer and kernel execution @@ -161,8 +161,8 @@ Synchronization, event management and synchronous calls =============================================================================== Synchronization and event management are important for coordinating tasks and -ensuring correct execution order, and synchronous calls are -necessary for maintaining data consistency. +ensuring correct execution order, and synchronous calls are necessary for +maintaining data consistency. Synchronous calls ------------------------------------------------------------------------------- @@ -199,7 +199,7 @@ Example ------------------------------------------------------------------------------- The examples shows the difference between sequential, asynchronous calls and -asynchronous calls with hipEvents. +asynchronous calls with ``hipEvents``. .. figure:: ../../data/how-to/hip_runtime_api/asynchronous/sequential_async_event.svg :alt: Compare the different calls @@ -396,28 +396,30 @@ The example codes // Stream 1: Host to Device 1 hipMemcpyAsync(d_data1, h_data1, N * sizeof(*d_data1), hipMemcpyHostToDevice, stream1); + // Stream 2: Host to Device 2 + hipMemcpyAsync(d_data2, h_data2, N * sizeof(*d_data2), hipMemcpyHostToDevice, stream2); + // Stream 1: Kernel 1 - hipLaunchKernelGGL(kernel, dim3(N/256), dim3(256), 0, stream1, d_data1, 1); + hipLaunchKernelGGL(kernel, dim3(N / 256), dim3(256), 0, stream1, d_data1, 1); - // Record event after the GPU kernel in stream1 + // Record event after the GPU kernel in Stream 1 hipEventRecord(event1, stream1); - // Stream 1: Device to Host 1 (after event) + // Stream 2: Wait for Event 1 before starting Kernel 2 hipStreamWaitEvent(stream2, event1, 0); - hipMemcpyAsync(h_data1, d_data1, N * sizeof(*h_data1), hipMemcpyDeviceToHost, stream2); - - // Stream 2: Host to Device 2 - hipMemcpyAsync(d_data2, h_data2, N * sizeof(*d_data2), hipMemcpyHostToDevice, stream2); // Stream 2: Kernel 2 - hipLaunchKernelGGL(kernel, dim3(N/256), dim3(256), 0, stream2, d_data2, 2); + hipLaunchKernelGGL(kernel, dim3(N / 256), dim3(256), 0, stream2, d_data2, 2); - // Record event after the GPU kernel in stream2 + // Record event after Kernel 2 in Stream 2 hipEventRecord(event2, stream2); - // Stream 2: Device to Host 2 (after event) + // Stream 1: Wait for Event 2 before Device to Host copy hipStreamWaitEvent(stream1, event2, 0); - hipMemcpyAsync(h_data2, d_data2, N * sizeof(*h_data2), hipMemcpyDeviceToHost, stream1); + hipMemcpyAsync(h_data1, d_data1, N * sizeof(*h_data1), hipMemcpyDeviceToHost, stream1); + + // Stream 2: Device to Host 2 (after Kernel 2) + hipMemcpyAsync(h_data2, d_data2, N * sizeof(*h_data2), hipMemcpyDeviceToHost, stream2); // Wait for all operations in both streams to complete hipStreamSynchronize(stream1);