Skip to content

Commit 6801315

Browse files
Merge branch 'bottleneckOpts' into 'main'
[REMIX-3744] optimize bottlenecks when instances are repeated many times See merge request lightspeedrtx/dxvk-remix-nv!1193
2 parents 368525a + 918e3d1 commit 6801315

10 files changed

+193
-133
lines changed

src/dxvk/rtx_render/rtx_camera_manager.cpp

+35-15
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
*/
2222
#include "rtx_camera_manager.h"
2323

24-
#include "rtx_matrix_helpers.h"
25-
2624
#include "dxvk_device.h"
2725

2826
namespace {
@@ -44,6 +42,7 @@ namespace dxvk {
4442

4543
void CameraManager::onFrameEnd() {
4644
m_lastSetCameraType = CameraType::Unknown;
45+
m_decompositionCache.clear();
4746
}
4847

4948
CameraType::Enum CameraManager::processCameraData(const DrawCallState& input) {
@@ -75,10 +74,7 @@ namespace dxvk {
7574
}
7675

7776
// Get camera params
78-
float fov, aspectRatio, nearPlane, farPlane, shearX, shearY;
79-
bool isLHS;
80-
bool isReverseZ;
81-
decomposeProjection(input.getTransformData().viewToProjection, aspectRatio, fov, nearPlane, farPlane, shearX, shearY, isLHS, isReverseZ);
77+
DecomposeProjectionParams decomposeProjectionParams = getOrDecomposeProjection(input.getTransformData().viewToProjection);
8278

8379
// Filter invalid cameras, extreme shearing
8480
static auto isFovValid = [](float fovA) {
@@ -88,7 +84,7 @@ namespace dxvk {
8884
return std::abs(fovA - cameraB.getFov()) < kFovToleranceRadians;
8985
};
9086

91-
if (std::abs(shearX) > 0.01f || !isFovValid(fov)) {
87+
if (std::abs(decomposeProjectionParams.shearX) > 0.01f || !isFovValid(decomposeProjectionParams.fov)) {
9288
ONCE(Logger::warn("[RTX] CameraManager: rejected an invalid camera"));
9389
return input.getCategoryFlags().test(InstanceCategories::Sky) ? CameraType::Sky : CameraType::Unknown;
9490
}
@@ -117,13 +113,13 @@ namespace dxvk {
117113
cameraType = CameraType::RenderToTexture;
118114
} else if (input.testCategoryFlags(InstanceCategories::Sky)) {
119115
cameraType = CameraType::Sky;
120-
} else if (isViewModel(fov, input.maxZ, frameId)) {
116+
} else if (isViewModel(decomposeProjectionParams.fov, input.maxZ, frameId)) {
121117
cameraType = CameraType::ViewModel;
122118
}
123119

124120
// Check fov consistency across frames
125121
if (frameId > 0) {
126-
if (getCamera(cameraType).isValid(frameId - 1) && !areFovsClose(fov, getCamera(cameraType))) {
122+
if (getCamera(cameraType).isValid(frameId - 1) && !areFovsClose(decomposeProjectionParams.fov, getCamera(cameraType))) {
127123
ONCE(Logger::warn("[RTX] CameraManager: FOV of a camera changed between frames"));
128124
}
129125
}
@@ -148,7 +144,15 @@ namespace dxvk {
148144
}
149145
} else {
150146
isCameraCut = camera.update(
151-
frameId, worldToView, viewToProjection, fov, aspectRatio, nearPlane,farPlane, isLHS);
147+
frameId,
148+
worldToView,
149+
viewToProjection,
150+
decomposeProjectionParams.fov,
151+
decomposeProjectionParams.aspectRatio,
152+
decomposeProjectionParams.nearPlane,
153+
decomposeProjectionParams.farPlane,
154+
decomposeProjectionParams.isLHS
155+
);
152156
}
153157

154158

@@ -173,13 +177,29 @@ namespace dxvk {
173177
void CameraManager::processExternalCamera(CameraType::Enum type,
174178
const Matrix4& worldToView,
175179
const Matrix4& viewToProjection) {
176-
float fov, aspectRatio, nearPlane, farPlane, shearX, shearY;
177-
bool isLHS;
178-
bool isReverseZ;
179-
decomposeProjection(viewToProjection, aspectRatio, fov, nearPlane, farPlane, shearX, shearY, isLHS, isReverseZ);
180+
DecomposeProjectionParams decomposeProjectionParams = getOrDecomposeProjection(viewToProjection);
180181

181182
getCamera(type).update(
182183
m_device->getCurrentFrameId(),
183-
worldToView, viewToProjection, fov, aspectRatio, nearPlane, farPlane, isLHS);
184+
worldToView,
185+
viewToProjection,
186+
decomposeProjectionParams.fov,
187+
decomposeProjectionParams.aspectRatio,
188+
decomposeProjectionParams.nearPlane,
189+
decomposeProjectionParams.farPlane,
190+
decomposeProjectionParams.isLHS);
184191
}
192+
193+
DecomposeProjectionParams CameraManager::getOrDecomposeProjection(const Matrix4& viewToProjection) {
194+
XXH64_hash_t projectionHash = XXH64(&viewToProjection, sizeof(viewToProjection), 0);
195+
auto iter = m_decompositionCache.find(projectionHash);
196+
if (iter != m_decompositionCache.end()) {
197+
return iter->second;
198+
}
199+
200+
DecomposeProjectionParams decomposeProjectionParams;
201+
decomposeProjection(viewToProjection, decomposeProjectionParams);
202+
m_decompositionCache.emplace(projectionHash, decomposeProjectionParams);
203+
return decomposeProjectionParams;
204+
}
185205
} // namespace dxvk

src/dxvk/rtx_render/rtx_camera_manager.h

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "rtx_option.h"
2626
#include "rtx_types.h"
2727
#include "rtx_common_object.h"
28+
#include "rtx_matrix_helpers.h"
2829

2930
namespace dxvk {
3031
class DxvkDevice;
@@ -81,6 +82,9 @@ namespace dxvk {
8182
std::array<RtCamera, CameraType::Count> m_cameras;
8283
CameraType::Enum m_lastSetCameraType = CameraType::Unknown;
8384
uint32_t m_lastCameraCutFrameId = -1;
85+
fast_unordered_cache<DecomposeProjectionParams> m_decompositionCache;
86+
87+
DecomposeProjectionParams getOrDecomposeProjection(const Matrix4& viewToProjection);
8488

8589
RTX_OPTION("rtx", bool, rayPortalEnabled, false, "Enables ray portal support. Note this requires portal texture hashes to be set for the ray portal geometries in rtx.rayPortalModelTextureHashes.");
8690
};

src/dxvk/rtx_render/rtx_instance_manager.cpp

+14-32
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ namespace dxvk {
134134
m_frameLastUpdated
135135
m_frameCreated
136136
m_isCreatedByRenderer
137-
m_spatialCachePos
137+
m_spatialCacheHash
138138
buildGeometries
139139
buildRanges
140140
billboardIndices
@@ -170,8 +170,7 @@ namespace dxvk {
170170

171171
// Cache based on current position.
172172
const Vector3 newPos = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
173-
m_linkedBlas->getSpatialMap().move(m_spatialCachePos, newPos, this);
174-
m_spatialCachePos = newPos;
173+
m_spatialCacheHash = m_linkedBlas->getSpatialMap().move(m_spatialCacheHash, newPos, surface.objectToWorld, this);
175174
}
176175
}
177176

@@ -180,8 +179,8 @@ namespace dxvk {
180179
surface.normalObjectToWorld = transpose(inverse(Matrix3(surface.objectToWorld)));
181180
surface.prevObjectToWorld = objectToWorld;
182181
if (!m_isCreatedByRenderer) {
183-
m_spatialCachePos = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
184-
m_linkedBlas->getSpatialMap().insert(m_spatialCachePos, this);
182+
const Vector3 centroid = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
183+
m_spatialCacheHash = m_linkedBlas->getSpatialMap().insert(centroid, surface.objectToWorld, this);
185184
}
186185

187186
// The D3D matrix on input, needs to be transposed before feeding to the VK API (left/right handed conversion)
@@ -653,38 +652,21 @@ namespace dxvk {
653652
// Search the BLAS for an instance matching ours
654653
{
655654
// Search for an exact match
656-
const std::vector<const RtInstance*>* matchingCell = blas.getSpatialMap().getDataAtPos(worldPosition);
657-
if (matchingCell != nullptr) {
658-
for (const RtInstance* instance : *matchingCell) {
659-
const Matrix4 instanceTransform = instance->getTransform();
660-
// Note: this may be the second call this frame targetting the same instance.
661-
if (memcmp(&transform, &instanceTransform, sizeof(instanceTransform)) == 0) {
662-
return const_cast<RtInstance*>(instance);
663-
}
664-
}
655+
result = const_cast<RtInstance*>(blas.getSpatialMap().getDataAtTransform(transform));
656+
if (result != nullptr) {
657+
return result;
665658
}
666659

667660
// No exact match, so find the closest match in the region
668661
// (need to check a 2x2x2 patch of cells to account for positions close to a border)
669-
const auto adjacentCells = blas.getSpatialMap().getDataNearPos(worldPosition);
670-
for (const std::vector<const RtInstance*>* cellPtr : adjacentCells){
671-
for (const RtInstance* instance : *cellPtr) {
672-
if (instance->m_frameLastUpdated != currentFrameIdx && instance->m_materialHash == material.getHash()) {
673-
// Instance hasn't been touched yet this frame.
674-
675-
const Vector3& prevInstanceWorldPosition = instance->getSpatialCachePosition();
676-
677-
const float distSqr = lengthSqr(prevInstanceWorldPosition - worldPosition);
678-
if (distSqr <= uniqueObjectDistanceSqr && distSqr < nearestDistSqr) {
679-
if (distSqr == 0.0f) {
680-
// Not going to find anything closer.
681-
return const_cast<RtInstance*>(instance);
682-
}
683-
nearestDistSqr = distSqr;
684-
result = const_cast<RtInstance*>(instance);
685-
}
686-
}
662+
result = const_cast<RtInstance*>(blas.getSpatialMap().getNearestData(worldPosition, uniqueObjectDistanceSqr, nearestDistSqr,
663+
[&] (const RtInstance* instance) {
664+
return instance->m_frameLastUpdated != currentFrameIdx && instance->m_materialHash == material.getHash();
687665
}
666+
));
667+
if (nearestDistSqr == 0.0f && result != nullptr) {
668+
// Not going to find anything closer
669+
return result;
688670
}
689671
}
690672

src/dxvk/rtx_render/rtx_instance_manager.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,11 @@ class RtInstance {
6565
Vector3 getWorldPosition() const { return Vector3{ m_vkInstance.transform.matrix[0][3], m_vkInstance.transform.matrix[1][3], m_vkInstance.transform.matrix[2][3] }; }
6666
const Vector3& getPrevWorldPosition() const { return surface.prevObjectToWorld.data[3].xyz(); }
6767

68-
const Vector3& getSpatialCachePosition() const { return m_spatialCachePos; }
6968
void removeFromSpatialCache() const {
7069
if (m_isCreatedByRenderer) {
7170
return;
7271
}
73-
m_linkedBlas->getSpatialMap().erase(m_spatialCachePos, this);
72+
m_linkedBlas->getSpatialMap().erase(m_spatialCacheHash);
7473
}
7574

7675
bool isCreatedThisFrame(uint32_t frameIndex) const { return frameIndex == m_frameCreated; }
@@ -203,7 +202,7 @@ class RtInstance {
203202

204203
CategoryFlags m_categoryFlags;
205204

206-
Vector3 m_spatialCachePos = Vector3(0.f);
205+
XXH64_hash_t m_spatialCacheHash = 0;
207206

208207
public:
209208
bool isFrontFaceFlipped = false;

src/dxvk/rtx_render/rtx_matrix_helpers.h

+16-1
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,19 @@ static inline void decomposeProjection(const dxvk::Matrix4& matrix, float& aspec
8080
"\n\tPROJ_ANGLEMAXY: ", cameraParams[PROJ_ANGLEMAXY]));
8181
}
8282
#endif
83-
}
83+
}
84+
85+
struct DecomposeProjectionParams {
86+
float fov;
87+
float aspectRatio;
88+
float nearPlane;
89+
float farPlane;
90+
float shearX;
91+
float shearY;
92+
bool isLHS;
93+
bool isReverseZ;
94+
};
95+
96+
static inline void decomposeProjection(const dxvk::Matrix4& matrix, DecomposeProjectionParams& result, bool log = false) {
97+
decomposeProjection(matrix, result.aspectRatio, result.fov, result.nearPlane, result.farPlane, result.shearX, result.shearY, result.isLHS, result.isReverseZ, log);
98+
}

src/dxvk/rtx_render/rtx_texture.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,8 @@ namespace dxvk {
143143
}
144144

145145
XXH64_hash_t getImageHash() const {
146-
const DxvkImageView* resolvedImageView = getImageView();
147146
XXH64_hash_t result = 0;
148-
if (resolvedImageView) {
147+
if (const DxvkImageView* resolvedImageView = getImageView()) {
149148
result = resolvedImageView->image()->getHash();
150149
}
151150

src/dxvk/rtx_render/rtx_types.cpp

+1-8
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,7 @@ namespace dxvk {
360360
}
361361

362362
void BlasEntry::rebuildSpatialMap() {
363-
InstanceMap newMap(RtxOptions::uniqueObjectDistance() * 2.f);
364-
365-
for (const auto& iter : m_spatialMap.getAll()){
366-
for (const RtInstance* instance : iter.second) {
367-
newMap.insert(instance->getSpatialCachePosition(), instance);
368-
}
369-
}
370-
m_spatialMap = std::move(newMap);
363+
m_spatialMap.rebuild(RtxOptions::uniqueObjectDistance() * 2.f);
371364
}
372365

373366
} // namespace dxvk

src/dxvk/rtx_render/rtx_types.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ struct BlasEntry {
586586
// Frame when the vertex data of this geometry was last updated, used to detect static geometries
587587
uint32_t frameLastUpdated = kInvalidFrameIndex;
588588

589-
using InstanceMap = SpatialMap<const RtInstance*>;
589+
using InstanceMap = SpatialMap<RtInstance>;
590590

591591
Rc<PooledBlas> staticBlas;
592592

0 commit comments

Comments
 (0)