Skip to content

Commit 680cfe1

Browse files
committed
Use feature buffers for denoising
1 parent b6ff5d5 commit 680cfe1

26 files changed

+5109
-951
lines changed

compile_shaders.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ def main():
107107

108108
# Denoise
109109
compile_shader(src_name="filter_variance")
110-
compile_shader(src_name="nlm_filter")
110+
compile_shader(src_name="nlm_filter", spv_name="nlm_filter", defines="-DUSE_BASE_COLOR=0 -DUSE_DEPTH_NORMAL=0")
111+
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_n", defines="-DUSE_BASE_COLOR=0 -DUSE_DEPTH_NORMAL=1")
112+
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_b", defines="-DUSE_BASE_COLOR=1 -DUSE_DEPTH_NORMAL=0")
113+
compile_shader(src_name="nlm_filter", spv_name="nlm_filter_bn", defines="-DUSE_BASE_COLOR=1 -DUSE_DEPTH_NORMAL=1")
111114

112115
# Other
113116
compile_shader(src_name="prepare_indir_args")

internal/Core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
#define force_inline __attribute__((always_inline)) inline
1212
#define assume_aligned(ptr, sz) (__builtin_assume_aligned((const void *)ptr, sz))
1313
#define vectorcall
14+
#define restrict __restrict__
1415
#endif
1516
#ifdef _MSC_VER
1617
#define force_inline __forceinline
1718
#define vectorcall __vectorcall
1819
#define assume_aligned(ptr, sz) (__assume((((const char *)ptr) - ((const char *)0)) % (sz) == 0), (ptr))
20+
#define restrict __restrict
1921

2022
#include <intrin.h>
2123

internal/CoreRef.cpp

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4372,7 +4372,7 @@ void Ray::Ref::ShadePrimary(const pass_settings_t &ps, Span<const hit_data_t> in
43724372
int img_w, float mix_factor, color_rgba_t *out_color, color_rgba_t *out_base_color,
43734373
color_rgba_t *out_depth_normal) {
43744374
auto clamp_direct = simd_fvec4{std::numeric_limits<float>::max()};
4375-
if (ps.clamp_direct) {
4375+
if (ps.clamp_direct != 0.0f) {
43764376
clamp_direct.set<0>(ps.clamp_direct);
43774377
clamp_direct.set<1>(ps.clamp_direct);
43784378
clamp_direct.set<2>(ps.clamp_direct);
@@ -4411,7 +4411,7 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
44114411
int *out_secondary_rays_count, shadow_ray_t *out_shadow_rays, int *out_shadow_rays_count,
44124412
int img_w, color_rgba_t *out_color) {
44134413
auto clamp_indirect = simd_fvec4{std::numeric_limits<float>::max()};
4414-
if (ps.clamp_indirect) {
4414+
if (ps.clamp_indirect != 0.0f) {
44154415
clamp_indirect.set<0>(ps.clamp_indirect);
44164416
clamp_indirect.set<1>(ps.clamp_indirect);
44174417
clamp_indirect.set<2>(ps.clamp_indirect);
@@ -4435,10 +4435,13 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
44354435
}
44364436
}
44374437

4438-
template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
4439-
void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
4440-
const float damping, const color_rgba_t variance[], const rect_t &output_rect,
4441-
const int output_stride, color_rgba_t output[]) {
4438+
namespace Ray {
4439+
namespace Ref {
4440+
template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE, bool FEATURE0, bool FEATURE1>
4441+
void JointNLMFilter(const color_rgba_t *restrict input, const rect_t &rect, const int input_stride, const float alpha,
4442+
const float damping, const color_rgba_t variance[], const color_rgba_t *restrict feature0,
4443+
const float feature0_weight, const color_rgba_t *restrict feature1, const float feature1_weight,
4444+
const rect_t &output_rect, const int output_stride, color_rgba_t *restrict output) {
44424445
const int WindowRadius = (WINDOW_SIZE - 1) / 2;
44434446
const float PatchDistanceNormFactor = NEIGHBORHOOD_SIZE * NEIGHBORHOOD_SIZE;
44444447
const int NeighborRadius = (NEIGHBORHOOD_SIZE - 1) / 2;
@@ -4457,7 +4460,7 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44574460
for (int l = -WindowRadius; l <= WindowRadius; ++l) {
44584461
const int jx = ix + l;
44594462

4460-
simd_fvec4 distance = {};
4463+
simd_fvec4 color_distance = {};
44614464

44624465
for (int q = -NeighborRadius; q <= NeighborRadius; ++q) {
44634466
for (int p = -NeighborRadius; p <= NeighborRadius; ++p) {
@@ -4468,16 +4471,39 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44684471
const simd_fvec4 jvar = {variance[(jy + q) * input_stride + (jx + p)].v, simd_mem_aligned};
44694472
const simd_fvec4 min_var = min(ivar, jvar);
44704473

4471-
distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
4472-
(0.0001f + damping * damping * (ivar + jvar));
4474+
color_distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
4475+
(0.0001f + damping * damping * (ivar + jvar));
44734476
}
44744477
}
44754478

4476-
const float patch_distance =
4477-
0.25f * PatchDistanceNormFactor *
4478-
(distance.get<0>() + distance.get<1>() + distance.get<2>() + distance.get<3>());
4479+
const float patch_distance = 0.25f * PatchDistanceNormFactor *
4480+
(color_distance.get<0>() + color_distance.get<1>() +
4481+
color_distance.get<2>() + color_distance.get<3>());
4482+
float weight = std::exp(-std::max(0.0f, patch_distance));
44794483

4480-
const float weight = std::exp(-std::max(0.0f, patch_distance));
4484+
if (FEATURE0 || FEATURE1) {
4485+
simd_fvec4 feature_distance = {};
4486+
if (FEATURE0) {
4487+
const simd_fvec4 ipx = {feature0[iy * input_stride + ix].v, simd_mem_aligned};
4488+
const simd_fvec4 jpx = {feature0[jy * input_stride + jx].v, simd_mem_aligned};
4489+
4490+
feature_distance = feature0_weight * (ipx - jpx) * (ipx - jpx);
4491+
}
4492+
if (FEATURE1) {
4493+
const simd_fvec4 ipx = {feature1[iy * input_stride + ix].v, simd_mem_aligned};
4494+
const simd_fvec4 jpx = {feature1[jy * input_stride + jx].v, simd_mem_aligned};
4495+
4496+
feature_distance = max(feature_distance, feature1_weight * (ipx - jpx) * (ipx - jpx));
4497+
}
4498+
4499+
const float feature_patch_distance =
4500+
0.25f * (feature_distance.get<0>() + feature_distance.get<1>() + feature_distance.get<2>() +
4501+
feature_distance.get<3>());
4502+
const float feature_weight =
4503+
std::exp(-std::max(0.0f, std::min(10000.0f, feature_patch_distance)));
4504+
4505+
weight = std::min(weight, feature_weight);
4506+
}
44814507

44824508
sum_output += simd_fvec4{input[jy * input_stride + jx].v, simd_mem_aligned} * weight;
44834509
sum_weight += weight;
@@ -4493,19 +4519,49 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44934519
}
44944520
}
44954521
}
4522+
} // namespace Ref
4523+
} // namespace Ray
4524+
4525+
template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
4526+
void Ray::Ref::JointNLMFilter(const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
4527+
const float damping, const color_rgba_t variance[], const color_rgba_t feature1[],
4528+
const float feature1_weight, const color_rgba_t feature2[], const float feature2_weight,
4529+
const rect_t &output_rect, const int output_stride, color_rgba_t output[]) {
4530+
if (feature1 && feature2) {
4531+
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, true>(input, rect, input_stride, alpha, damping, variance,
4532+
feature1, feature1_weight, feature2, feature2_weight,
4533+
output_rect, output_stride, output);
4534+
} else if (feature1) {
4535+
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, false>(input, rect, input_stride, alpha, damping, variance,
4536+
feature1, feature1_weight, nullptr, 0.0f,
4537+
output_rect, output_stride, output);
4538+
} else if (feature2) {
4539+
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true, false>(input, rect, input_stride, alpha, damping, variance,
4540+
feature2, feature2_weight, nullptr, 0.0f,
4541+
output_rect, output_stride, output);
4542+
} else {
4543+
JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, false, false>(input, rect, input_stride, alpha, damping,
4544+
variance, nullptr, 0.0f, nullptr, 0.0f,
4545+
output_rect, output_stride, output);
4546+
}
4547+
}
44964548

4497-
template void Ray::Ref::NLMFilter<21 /* WINDOW_SIZE */, 5 /* NEIGHBORHOOD_SIZE */>(
4549+
template void Ray::Ref::JointNLMFilter<21 /* WINDOW_SIZE */, 5 /* NEIGHBORHOOD_SIZE */>(
44984550
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4499-
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4500-
template void Ray::Ref::NLMFilter<21 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
4551+
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4552+
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4553+
template void Ray::Ref::JointNLMFilter<21 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
45014554
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4502-
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4503-
template void Ray::Ref::NLMFilter<7 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
4555+
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4556+
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4557+
template void Ray::Ref::JointNLMFilter<7 /* WINDOW_SIZE */, 3 /* NEIGHBORHOOD_SIZE */>(
45044558
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4505-
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4506-
template void Ray::Ref::NLMFilter<3 /* WINDOW_SIZE */, 1 /* NEIGHBORHOOD_SIZE */>(
4559+
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4560+
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4561+
template void Ray::Ref::JointNLMFilter<3 /* WINDOW_SIZE */, 1 /* NEIGHBORHOOD_SIZE */>(
45074562
const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4508-
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4563+
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4564+
float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
45094565

45104566
namespace Ray {
45114567
extern const int LUT_DIMS = 48;

internal/CoreRef.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ force_inline int total_depth(const shadow_ray_t &r) {
133133
}
134134

135135
// Generation of rays
136-
void GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, int h, const float random_seq[],
137-
int iteration, const uint16_t required_samples[], aligned_vector<ray_data_t> &out_rays);
136+
void GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, int h, const float random_seq[], int iteration,
137+
const uint16_t required_samples[], aligned_vector<ray_data_t> &out_rays);
138138
void SampleMeshInTextureSpace(int iteration, int obj_index, int uv_layer, const mesh_t &mesh, const transform_t &tr,
139139
const uint32_t *vtx_indices, const vertex_t *vertices, const rect_t &r, int w, int h,
140140
const float *random_seq, aligned_vector<ray_data_t> &out_rays,
@@ -373,8 +373,10 @@ void ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t> inters, Sp
373373

374374
// Denoise
375375
template <int WINDOW_SIZE = 7, int NEIGHBORHOOD_SIZE = 3>
376-
void NLMFilter(const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
377-
const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
376+
void JointNLMFilter(const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
377+
const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight,
378+
const color_rgba_t feature1[], float feature1_weight, const rect_t &output_rect, int output_stride,
379+
color_rgba_t output[]);
378380

379381
// Tonemap
380382

internal/RendererCPU.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ template <typename SIMDPolicy> struct PassData {
195195
aligned_vector<color_rgba_t, 16> temp_final_buf;
196196
aligned_vector<color_rgba_t, 16> variance_buf;
197197
aligned_vector<color_rgba_t, 16> filtered_variance_buf;
198+
aligned_vector<color_rgba_t, 16> feature_buf1, feature_buf2;
198199

199200
aligned_vector<typename SIMDPolicy::RayHashType> hash_values;
200201
std::vector<int> head_flags;
@@ -556,6 +557,16 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
556557
p.temp_final_buf.resize(rect_ext.w * rect_ext.h);
557558
p.variance_buf.resize(rect_ext.w * rect_ext.h);
558559
p.filtered_variance_buf.resize(rect_ext.w * rect_ext.h);
560+
if (!base_color_buf_.empty()) {
561+
p.feature_buf1.resize(rect_ext.w * rect_ext.h);
562+
} else {
563+
p.feature_buf1 = {};
564+
}
565+
if (!depth_normals_buf_.empty()) {
566+
p.feature_buf2.resize(rect_ext.w * rect_ext.h);
567+
} else {
568+
p.feature_buf2 = {};
569+
}
559570

560571
#define FETCH_FINAL_BUF(_x, _y) \
561572
Ref::simd_fvec4(raw_final_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)].v, \
@@ -589,6 +600,11 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
589600
#undef FETCH_VARIANCE
590601
#undef FETCH_FINAL_BUF
591602

603+
#define FETCH_BASE_COLOR(_x, _y) \
604+
base_color_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)]
605+
#define FETCH_DEPTH_NORMALS(_x, _y) \
606+
depth_normals_buf_[std::min(std::max(_y, 0), h_ - 1) * w_ + std::min(std::max(_x, 0), w_ - 1)]
607+
592608
for (int y = 4; y < rect_ext.h - 4; ++y) {
593609
for (int x = 4; x < rect_ext.w - 4; ++x) {
594610
const Ref::simd_fvec4 center_val = {p.variance_buf[(y + 0) * rect_ext.w + x].v, Ref::simd_mem_aligned};
@@ -603,9 +619,19 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
603619

604620
res = max(res, center_val);
605621
res.store_to(p.filtered_variance_buf[y * rect_ext.w + x].v, Ref::simd_mem_aligned);
622+
623+
if (!base_color_buf_.empty()) {
624+
p.feature_buf1[y * rect_ext.w + x] = FETCH_BASE_COLOR(rect_ext.x + x, rect_ext.y + y);
625+
}
626+
if (!depth_normals_buf_.empty()) {
627+
p.feature_buf2[y * rect_ext.w + x] = FETCH_DEPTH_NORMALS(rect_ext.x + x, rect_ext.y + y);
628+
}
606629
}
607630
}
608631

632+
#undef FETCH_BASE_COLOR
633+
#undef FETCH_DEPTH_NORMALS
634+
609635
Ref::tonemap_params_t tonemap_params;
610636
float variance_threshold;
611637

@@ -633,9 +659,10 @@ template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::DenoiseImage
633659

634660
static_assert(EXT_RADIUS >= (NLM_WINDOW_SIZE - 1) / 2 + (NLM_NEIGHBORHOOD_SIZE - 1) / 2, "!");
635661

636-
Ref::NLMFilter<NLM_WINDOW_SIZE, NLM_NEIGHBORHOOD_SIZE>(
662+
Ref::JointNLMFilter<NLM_WINDOW_SIZE, NLM_NEIGHBORHOOD_SIZE>(
637663
p.temp_final_buf.data(), rect_t{EXT_RADIUS, EXT_RADIUS, rect.w, rect.h}, rect_ext.w, 1.0f, 0.45f,
638-
p.filtered_variance_buf.data(), rect, w_, raw_filtered_buf_.data());
664+
p.filtered_variance_buf.data(), !p.feature_buf1.empty() ? p.feature_buf1.data() : nullptr, 64.0f,
665+
!p.feature_buf2.empty() ? p.feature_buf2.data() : nullptr, 32.0f, rect, w_, raw_filtered_buf_.data());
639666

640667
for (int y = rect.y; y < rect.y + rect.h; ++y) {
641668
for (int x = rect.x; x < rect.x + rect.w; ++x) {

0 commit comments

Comments
 (0)