Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Frei/device memory support #508

Open
wants to merge 50 commits into
base: devel
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
02997c6
fix wrong assert about maxTimeSegments.
freibold Sep 9, 2024
a503f23
fix missing sign-extension when using device memory for BVH data.
freibold Sep 18, 2024
cd6846e
BVH buffer in explicitly managed host/device memory on systems
freibold Sep 13, 2024
f5a4b9c
triangle geometry is now host/device, scene next
freibold Sep 26, 2024
539d966
All geom types work, spilling bug?
freibold Oct 16, 2024
b7a041f
compile fixes
freibold Oct 24, 2024
5c163b5
geometry data works
freibold Oct 28, 2024
160b9ca
add debug tutorial and restore state of other tutorials
freibold Oct 29, 2024
1b0ac5c
scene is now in device memory, too. USM shared memory is now only
freibold Nov 6, 2024
49ef39f
introduced read only handle RTCTraversable which is accessible on
freibold Nov 11, 2024
ef99300
memory passing interface
freibold Nov 18, 2024
bce7d2a
rework buffer interface
freibold Nov 22, 2024
6290555
test address sanitizer issue
freibold Nov 25, 2024
fe5839c
cleanups
freibold Nov 26, 2024
f6b5bb1
do not use ALIGNED_STRUCT_ macro for Scene and Geometry
freibold Nov 26, 2024
475f7f1
port of multi_instanced_geometry tutorial to RTCTraversable
freibold Nov 26, 2024
9d14cd8
port forest tutorial to new host/device memory support
freibold Nov 27, 2024
094b4b7
change API naming for buffers form 'Ex' to 'HostDevice'
freibold Nov 27, 2024
e65098d
add ISPC API using RTCTraversable
freibold Nov 29, 2024
3cd3f2f
create CPP from ISPC tutorials
freibold Nov 29, 2024
92c5f4d
add padding to tree vertex and color data buffers for safe SIMD
freibold Dec 2, 2024
0611461
small fix for validation API
freibold Dec 2, 2024
8c32f18
less alloc/free when creating device representation of scenen/geometry
freibold Dec 4, 2024
ac3d1c3
add sycl events for sychronization
freibold Dec 5, 2024
a5b7df0
add cmake option for explicit host/device BVH data
freibold Dec 5, 2024
b868759
clear host scratch space for static scenes
freibold Dec 5, 2024
94aa2e6
rename host_device_memory tutorial and add CI tests
freibold Dec 5, 2024
381e713
refactor alloc. remove global (tls) SYCL context and device.
freibold Dec 9, 2024
fd6a57a
add nightly Windows multilevel test.
freibold Dec 9, 2024
6261729
update documentation for RTCBuffer
freibold Dec 9, 2024
365d71a
fix wrong deallocation in multi_instance_geometry tutorial.
freibold Dec 9, 2024
0b0626d
update rtcSet(New|Shared)GeometryBuffer(|HostDevice) documentation
freibold Dec 10, 2024
5064047
update rtcCommitScene(|WithQueue) documentation
freibold Dec 10, 2024
3f6f6b5
rename memory types and account for host unified memory in buffer con…
freibold Dec 11, 2024
f97e8c5
revert accidental changes in ray flag settings
freibold Dec 11, 2024
54157fa
docu fixes
freibold Dec 11, 2024
ae21235
change rtcCommit???WithQueue interface to return sycl::event
freibold Dec 11, 2024
26771b1
make rtcCommitSceneWithQueue work with out-of-order SYCL queues, too
freibold Dec 11, 2024
9219fa3
updating general intro documentation
freibold Dec 11, 2024
6482924
documentation for ray queries using RTCTraversable
freibold Dec 11, 2024
e40bef0
more general documentation
freibold Dec 11, 2024
0e25353
add export filters for commit with queue API functions
freibold Dec 11, 2024
8dac949
add CI test for BVH data in host device memory
freibold Dec 11, 2024
a56ac13
test new ICX version
freibold Dec 11, 2024
cdf70a7
changelog, typos
freibold Dec 11, 2024
0e2e3a5
update internal Linux and Window driver in CI
freibold Dec 11, 2024
9950968
remove timing output from dynamic scene tutorial
freibold Dec 13, 2024
76f8c1f
fix issues with host/device memory API on iGPU
freibold Dec 13, 2024
13c62b6
fix blender segfault with geometry == NULL inside geometries array
dopitz Dec 16, 2024
2b5ddc2
preparations for merge
dopitz Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
change rtcCommit???WithQueue interface to return sycl::event
freibold committed Dec 11, 2024
commit ae21235a10f08047d042c6d162ad474464414ae5
2 changes: 1 addition & 1 deletion include/embree4/rtcore_buffer.h
Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ RTC_API void rtcCommitBuffer(RTCBuffer buffer);

#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)

RTC_API void rtcCommitBufferWithQueue(RTCBuffer buffer, sycl::queue queue, sycl::event* event);
RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer buffer, sycl::queue queue);

#endif

2 changes: 1 addition & 1 deletion include/embree4/rtcore_device.h
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_de

/* rtcCommitSceneWithQueue is asynchronous, user has to call queue.wait()
for synchronization. rtcCommitScene is blocking. */
RTC_API void rtcCommitSceneWithQueue(RTCScene scene, sycl::queue queue, sycl::event* event);
RTC_API_CPP sycl::event rtcCommitSceneWithQueue(RTCScene scene, sycl::queue queue);

#endif

11 changes: 4 additions & 7 deletions kernels/common/buffer.h
Original file line number Diff line number Diff line change
@@ -170,21 +170,18 @@ namespace embree
DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);
if (gpu_device) {
sycl::queue queue(gpu_device->getGPUDevice());
commit(queue, nullptr);
commit(queue);
queue.wait_and_throw();
}
#endif
}

#if defined(EMBREE_SYCL_SUPPORT)
__forceinline void commit(sycl::queue queue, sycl::event* event) {
__forceinline sycl::event commit(sycl::queue queue) {
if (dptr == ptr)
return;
return sycl::event();

sycl::event last_event = queue.memcpy(dptr, ptr, numBytes);

if (event)
*event = last_event;
return queue.memcpy(dptr, ptr, numBytes);
}
#endif

10 changes: 6 additions & 4 deletions kernels/common/rtcore.cpp
Original file line number Diff line number Diff line change
@@ -83,25 +83,27 @@ RTC_NAMESPACE_BEGIN;
RTC_CATCH_END(nullptr);
}

RTC_API void rtcCommitSceneWithQueue (RTCScene hscene, sycl::queue queue, sycl::event* event)
RTC_API_CPP sycl::event rtcCommitSceneWithQueue (RTCScene hscene, sycl::queue queue)
{
Scene* scene = (Scene*) hscene;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitSceneWithQueue);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
scene->commit(false, queue, event);
return scene->commit(false, queue);
RTC_CATCH_END2(scene);
return sycl::event();
}

RTC_API void rtcCommitBufferWithQueue(RTCBuffer hbuffer, sycl::queue queue, sycl::event* event) {
RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer hbuffer, sycl::queue queue) {
Buffer* buffer = (Buffer*)hbuffer;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitBufferWithQueue);
RTC_VERIFY_HANDLE(hbuffer);
RTC_ENTER_DEVICE(hbuffer);
buffer->commit(queue, event);
return buffer->commit(queue);
RTC_CATCH_END2(buffer);
return sycl::event();
}

#endif
4 changes: 2 additions & 2 deletions kernels/common/scene.cpp
Original file line number Diff line number Diff line change
@@ -909,10 +909,10 @@ namespace embree
}

#if defined(EMBREE_SYCL_SUPPORT)
void Scene::commit (bool join, sycl::queue queue, sycl::event* event)
sycl::event Scene::commit (bool join, sycl::queue queue)
{
commit_internal(join);
syncWithDevice(queue, event);
return syncWithDevice(queue);
}
#endif

4 changes: 2 additions & 2 deletions kernels/common/scene.h
Original file line number Diff line number Diff line change
@@ -194,7 +194,7 @@ namespace embree
void build_gpu_accels();
void commit_internal (bool join);
#if defined(EMBREE_SYCL_SUPPORT)
void commit (bool join, sycl::queue queue, sycl::event* event);
sycl::event commit (bool join, sycl::queue queue);
#endif
void commit (bool join);
void commit_task ();
@@ -319,7 +319,7 @@ namespace embree
#if defined(EMBREE_SYCL_SUPPORT)
private:
void syncWithDevice();
void syncWithDevice(sycl::queue queue, sycl::event* event);
sycl::event syncWithDevice(sycl::queue queue);
#endif

public:
22 changes: 22 additions & 0 deletions kernels/rtcore_config.h.in
Original file line number Diff line number Diff line change
@@ -36,13 +36,15 @@
# define RTC_NAMESPACE_END }
# define RTC_NAMESPACE_USE using namespace @EMBREE_API_NAMESPACE@;
# define RTC_API_EXTERN_C
# define RTC_API_EXTERN_CPP
# undef EMBREE_API_NAMESPACE
#else
# define RTC_NAMESPACE_BEGIN
# define RTC_NAMESPACE_END
# define RTC_NAMESPACE_USE
# if defined(__cplusplus)
# define RTC_API_EXTERN_C extern "C"
# define RTC_API_EXTERN_CPP extern "C++"
# else
# define RTC_API_EXTERN_C
# endif
@@ -62,12 +64,32 @@
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif

#if defined(ISPC)
# define RTC_API_IMPORT_CPP extern "C++" unmasked
# define RTC_API_EXPORT_CPP extern "C++" unmasked
#elif defined(EMBREE_STATIC_LIB)
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP
#elif defined(_WIN32)
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP __declspec(dllimport)
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __declspec(dllexport)
#else
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __attribute__ ((visibility ("default")))
#endif

#if defined(RTC_EXPORT_API)
# define RTC_API RTC_API_EXPORT
#else
# define RTC_API RTC_API_IMPORT
#endif

#if defined(RTC_EXPORT_API)
# define RTC_API_CPP RTC_API_EXPORT_CPP
#else
# define RTC_API_CPP RTC_API_IMPORT_CPP
#endif

#if defined(ISPC)
# define RTC_SYCL_INDIRECTLY_CALLABLE
#elif defined(__SYCL_DEVICE_ONLY__)
12 changes: 5 additions & 7 deletions kernels/sycl/scene_sycl.cpp
Original file line number Diff line number Diff line change
@@ -132,21 +132,21 @@ void Scene::syncWithDevice()
}

sycl::queue queue = sycl::queue(gpu_device->getGPUDevice());
syncWithDevice(queue, nullptr);
syncWithDevice(queue);
queue.wait_and_throw();
}

void Scene::syncWithDevice(sycl::queue queue, sycl::event* event)
sycl::event Scene::syncWithDevice(sycl::queue queue)
{
if(!device->is_gpu()) {
return;
return sycl::event();
}

// TODO: why is this compiled for __SYCL_DEVICE_ONLY__ ???
#if !defined(__SYCL_DEVICE_ONLY__)
accelBuffer.commit(queue);
#endif

const bool dynamic_scene = getSceneFlags() & RTC_SCENE_FLAG_DYNAMIC;

const bool num_geometries_changed = num_geometries != geometries.size();
@@ -225,9 +225,7 @@ void Scene::syncWithDevice(sycl::queue queue, sycl::event* event)
scene_device = (Scene*) device->malloc(sizeof(Scene), 16, EmbreeMemoryType::USM_DEVICE);
}

sycl::event last_event = queue.memcpy(scene_device, (void*)this, sizeof(Scene));
if (event)
*event = last_event;
return queue.memcpy(scene_device, (void*)this, sizeof(Scene));
}

#endif
2 changes: 1 addition & 1 deletion tutorials/dynamic_scene/dynamic_scene_device.cpp
Original file line number Diff line number Diff line change
@@ -328,7 +328,7 @@ extern "C" void device_render (int* pixels,
/* commit changes to scene */
auto start_commit = std::chrono::high_resolution_clock::now();
#if defined(EMBREE_SYCL_TUTORIAL)
rtcCommitSceneWithQueue (data.g_scene, *global_gpu_queue, nullptr);
rtcCommitSceneWithQueue (data.g_scene, *global_gpu_queue);
#else
rtcCommitScene (data.g_scene);
#endif
2 changes: 1 addition & 1 deletion tutorials/host_device_memory/host_device_memory_device.cpp
Original file line number Diff line number Diff line change
@@ -358,7 +358,7 @@ extern "C" void device_init (char* cfg)

/* commit changes to scene */
#if defined(EMBREE_SYCL_SUPPORT) && defined(EMBREE_SYCL_TUTORIAL)
rtcCommitSceneWithQueue (data.g_scene, *global_gpu_queue, nullptr);
rtcCommitSceneWithQueue (data.g_scene, *global_gpu_queue);
#else
rtcCommitScene (data.g_scene);
#endif