@@ -4372,7 +4372,7 @@ void Ray::Ref::ShadePrimary(const pass_settings_t &ps, Span<const hit_data_t> in
43724372 int img_w, float mix_factor, color_rgba_t *out_color, color_rgba_t *out_base_color,
43734373 color_rgba_t *out_depth_normal) {
43744374 auto clamp_direct = simd_fvec4{std::numeric_limits<float >::max ()};
4375- if (ps.clamp_direct ) {
4375+ if (ps.clamp_direct != 0 . 0f ) {
43764376 clamp_direct.set <0 >(ps.clamp_direct );
43774377 clamp_direct.set <1 >(ps.clamp_direct );
43784378 clamp_direct.set <2 >(ps.clamp_direct );
@@ -4411,7 +4411,7 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
44114411 int *out_secondary_rays_count, shadow_ray_t *out_shadow_rays, int *out_shadow_rays_count,
44124412 int img_w, color_rgba_t *out_color) {
44134413 auto clamp_indirect = simd_fvec4{std::numeric_limits<float >::max ()};
4414- if (ps.clamp_indirect ) {
4414+ if (ps.clamp_indirect != 0 . 0f ) {
44154415 clamp_indirect.set <0 >(ps.clamp_indirect );
44164416 clamp_indirect.set <1 >(ps.clamp_indirect );
44174417 clamp_indirect.set <2 >(ps.clamp_indirect );
@@ -4435,10 +4435,13 @@ void Ray::Ref::ShadeSecondary(const pass_settings_t &ps, Span<const hit_data_t>
44354435 }
44364436}
44374437
4438- template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
4439- void Ray::Ref::NLMFilter (const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
4440- const float damping, const color_rgba_t variance[], const rect_t &output_rect,
4441- const int output_stride, color_rgba_t output[]) {
4438+ namespace Ray {
4439+ namespace Ref {
4440+ template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE, bool FEATURE0, bool FEATURE1>
4441+ void JointNLMFilter (const color_rgba_t *restrict input, const rect_t &rect, const int input_stride, const float alpha,
4442+ const float damping, const color_rgba_t variance[], const color_rgba_t *restrict feature0,
4443+ const float feature0_weight, const color_rgba_t *restrict feature1, const float feature1_weight,
4444+ const rect_t &output_rect, const int output_stride, color_rgba_t *restrict output) {
44424445 const int WindowRadius = (WINDOW_SIZE - 1 ) / 2 ;
44434446 const float PatchDistanceNormFactor = NEIGHBORHOOD_SIZE * NEIGHBORHOOD_SIZE;
44444447 const int NeighborRadius = (NEIGHBORHOOD_SIZE - 1 ) / 2 ;
@@ -4457,7 +4460,7 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44574460 for (int l = -WindowRadius; l <= WindowRadius; ++l) {
44584461 const int jx = ix + l;
44594462
4460- simd_fvec4 distance = {};
4463+ simd_fvec4 color_distance = {};
44614464
44624465 for (int q = -NeighborRadius; q <= NeighborRadius; ++q) {
44634466 for (int p = -NeighborRadius; p <= NeighborRadius; ++p) {
@@ -4468,16 +4471,39 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44684471 const simd_fvec4 jvar = {variance[(jy + q) * input_stride + (jx + p)].v , simd_mem_aligned};
44694472 const simd_fvec4 min_var = min (ivar, jvar);
44704473
4471- distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
4472- (0 .0001f + damping * damping * (ivar + jvar));
4474+ color_distance += ((ipx - jpx) * (ipx - jpx) - alpha * (ivar + min_var)) /
4475+ (0 .0001f + damping * damping * (ivar + jvar));
44734476 }
44744477 }
44754478
4476- const float patch_distance =
4477- 0 .25f * PatchDistanceNormFactor *
4478- (distance.get <0 >() + distance.get <1 >() + distance.get <2 >() + distance.get <3 >());
4479+ const float patch_distance = 0 .25f * PatchDistanceNormFactor *
4480+ (color_distance.get <0 >() + color_distance.get <1 >() +
4481+ color_distance.get <2 >() + color_distance.get <3 >());
4482+ float weight = std::exp (-std::max (0 .0f , patch_distance));
44794483
4480- const float weight = std::exp (-std::max (0 .0f , patch_distance));
4484+ if (FEATURE0 || FEATURE1) {
4485+ simd_fvec4 feature_distance = {};
4486+ if (FEATURE0) {
4487+ const simd_fvec4 ipx = {feature0[iy * input_stride + ix].v , simd_mem_aligned};
4488+ const simd_fvec4 jpx = {feature0[jy * input_stride + jx].v , simd_mem_aligned};
4489+
4490+ feature_distance = feature0_weight * (ipx - jpx) * (ipx - jpx);
4491+ }
4492+ if (FEATURE1) {
4493+ const simd_fvec4 ipx = {feature1[iy * input_stride + ix].v , simd_mem_aligned};
4494+ const simd_fvec4 jpx = {feature1[jy * input_stride + jx].v , simd_mem_aligned};
4495+
4496+ feature_distance = max (feature_distance, feature1_weight * (ipx - jpx) * (ipx - jpx));
4497+ }
4498+
4499+ const float feature_patch_distance =
4500+ 0 .25f * (feature_distance.get <0 >() + feature_distance.get <1 >() + feature_distance.get <2 >() +
4501+ feature_distance.get <3 >());
4502+ const float feature_weight =
4503+ std::exp (-std::max (0 .0f , std::min (10000 .0f , feature_patch_distance)));
4504+
4505+ weight = std::min (weight, feature_weight);
4506+ }
44814507
44824508 sum_output += simd_fvec4{input[jy * input_stride + jx].v , simd_mem_aligned} * weight;
44834509 sum_weight += weight;
@@ -4493,19 +4519,49 @@ void Ray::Ref::NLMFilter(const color_rgba_t input[], const rect_t &rect, const i
44934519 }
44944520 }
44954521}
4522+ } // namespace Ref
4523+ } // namespace Ray
4524+
4525+ template <int WINDOW_SIZE, int NEIGHBORHOOD_SIZE>
4526+ void Ray::Ref::JointNLMFilter (const color_rgba_t input[], const rect_t &rect, const int input_stride, const float alpha,
4527+ const float damping, const color_rgba_t variance[], const color_rgba_t feature1[],
4528+ const float feature1_weight, const color_rgba_t feature2[], const float feature2_weight,
4529+ const rect_t &output_rect, const int output_stride, color_rgba_t output[]) {
4530+ if (feature1 && feature2) {
4531+ JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true , true >(input, rect, input_stride, alpha, damping, variance,
4532+ feature1, feature1_weight, feature2, feature2_weight,
4533+ output_rect, output_stride, output);
4534+ } else if (feature1) {
4535+ JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true , false >(input, rect, input_stride, alpha, damping, variance,
4536+ feature1, feature1_weight, nullptr , 0 .0f ,
4537+ output_rect, output_stride, output);
4538+ } else if (feature2) {
4539+ JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, true , false >(input, rect, input_stride, alpha, damping, variance,
4540+ feature2, feature2_weight, nullptr , 0 .0f ,
4541+ output_rect, output_stride, output);
4542+ } else {
4543+ JointNLMFilter<WINDOW_SIZE, NEIGHBORHOOD_SIZE, false , false >(input, rect, input_stride, alpha, damping,
4544+ variance, nullptr , 0 .0f , nullptr , 0 .0f ,
4545+ output_rect, output_stride, output);
4546+ }
4547+ }
44964548
4497- template void Ray::Ref::NLMFilter <21 /* WINDOW_SIZE */ , 5 /* NEIGHBORHOOD_SIZE */ >(
4549+ template void Ray::Ref::JointNLMFilter <21 /* WINDOW_SIZE */ , 5 /* NEIGHBORHOOD_SIZE */ >(
44984550 const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4499- const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4500- template void Ray::Ref::NLMFilter<21 /* WINDOW_SIZE */ , 3 /* NEIGHBORHOOD_SIZE */ >(
4551+ const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4552+ float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4553+ template void Ray::Ref::JointNLMFilter<21 /* WINDOW_SIZE */ , 3 /* NEIGHBORHOOD_SIZE */ >(
45014554 const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4502- const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4503- template void Ray::Ref::NLMFilter<7 /* WINDOW_SIZE */ , 3 /* NEIGHBORHOOD_SIZE */ >(
4555+ const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4556+ float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4557+ template void Ray::Ref::JointNLMFilter<7 /* WINDOW_SIZE */ , 3 /* NEIGHBORHOOD_SIZE */ >(
45044558 const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4505- const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4506- template void Ray::Ref::NLMFilter<3 /* WINDOW_SIZE */ , 1 /* NEIGHBORHOOD_SIZE */ >(
4559+ const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4560+ float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4561+ template void Ray::Ref::JointNLMFilter<3 /* WINDOW_SIZE */ , 1 /* NEIGHBORHOOD_SIZE */ >(
45074562 const color_rgba_t input[], const rect_t &rect, int input_stride, float alpha, float damping,
4508- const color_rgba_t variance[], const rect_t &output_rect, int output_stride, color_rgba_t output[]);
4563+ const color_rgba_t variance[], const color_rgba_t feature0[], float feature0_weight, const color_rgba_t feature1[],
4564+ float feature1_weight, const rect_t &output_rect, int output_stride, color_rgba_t output[]);
45094565
45104566namespace Ray {
45114567extern const int LUT_DIMS = 48 ;
0 commit comments