Skip to content

Commit

Permalink
Add forward and inverse Walsh-Hadamard transforms
Browse files Browse the repository at this point in the history
  • Loading branch information
barrbrain committed Oct 18, 2023
1 parent e678ad1 commit 1c87320
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

pub use crate::transform::inverse::rust::inverse_transform_add_lossless;

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
Expand Down
2 changes: 2 additions & 0 deletions src/asm/x86/transform/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

pub use crate::transform::forward::rust::forward_transform_lossless;

type TxfmFuncI32X8 = unsafe fn(&mut [I32X8]);

#[inline]
Expand Down
2 changes: 2 additions & 0 deletions src/asm/x86/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use crate::{Pixel, PixelType};
use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

pub use crate::transform::inverse::rust::inverse_transform_add_lossless;

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
Expand Down
30 changes: 30 additions & 0 deletions src/transform/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,36 @@ pub mod rust {
}
}

pub fn forward_transform_lossless<T: Coefficient>(
input: &[i16], output: &mut [T], stride: usize, _cpu: CpuFeatureLevel,
) {
let mut tmp = [0i32; 4 * 4];
let buf = &mut tmp[..];
let mut col_coeffs_backing = [0i32; 4];
let col_coeffs = &mut col_coeffs_backing[..];

// Columns
for c in 0..4 {
for r in 0..4 {
col_coeffs[r] = (input[r * stride + c]).into();
}
fwht4(col_coeffs);
for r in 0..4 {
buf[r * 4 + c] = col_coeffs[r];
}
}

// Rows
for r in 0..4 {
let row_coeffs = &mut buf[r * 4..];
fwht4(row_coeffs);
av1_round_shift_array(row_coeffs, 4, -2);
for c in 0..4 {
output[c * 4 + r] = T::cast_from(row_coeffs[c]);
}
}
}

/// # Panics
///
/// - If called with an invalid combination of `tx_size` and `tx_type`
Expand Down
24 changes: 24 additions & 0 deletions src/transform/forward_shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1728,6 +1728,30 @@ $($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) {
#[$m]
$($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {}

#[allow(unused)]
#[$m]
$($s)* fn fwht4<T: TxOperations>(coeffs: &mut [T]) {
assert!(coeffs.len() >= 4);
let x0 = coeffs[0];
let x1 = coeffs[1];
let x2 = coeffs[2];
let x3 = coeffs[3];

let s0 = x0.add(x1);
let s1 = x3.sub(x2);
let s2 = s0.sub_avg(s1);

let q3 = s2.sub(x2);
let q0 = s0.sub(q3);
let q2 = s2.sub(x1);
let q1 = s1.add(q2);

coeffs[0] = q0;
coeffs[1] = q1;
coeffs[2] = q2;
coeffs[3] = q3;
}

}

}
68 changes: 68 additions & 0 deletions src/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,29 @@ use super::half_btf;
use super::TxSize;
use super::TxType;

/// # Panics
///
/// - If `input` or `output` have fewer than 4 items.
pub fn av1_iwht4(input: &[i32], output: &mut [i32]) {
assert!(input.len() >= 4);
assert!(output.len() >= 4);

// <https://aomediacodec.github.io/av1-spec/#inverse-walsh-hadamard-transform-process>
let x0 = input[0];
let x1 = input[1];
let x2 = input[2];
let x3 = input[3];
let s0 = x0 + x1;
let s2 = x2 - x3;
let s4 = (s0 - s2) >> 1;
let s3 = s4 - x3;
let s1 = s4 - x1;
output[0] = s0 - s3;
output[1] = s1;
output[2] = s2 + s1;
output[3] = s3;
}

static COSPI_INV: [i32; 64] = [
4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948,
3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461,
Expand Down Expand Up @@ -1601,6 +1624,51 @@ pub(crate) mod rust {
use simd_helpers::cold_for_target_arch;
use std::cmp;

pub fn inverse_transform_add_lossless<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
_cpu: CpuFeatureLevel,
) {
// <https://aomediacodec.github.io/av1-spec/#2d-inverse-transform-process>
let input: &[T::Coeff] = &input[..4 * 4];
let mut buffer = [0i32; 4 * 4];

// perform inv txfm on every row
for (r, buffer_slice) in buffer.chunks_exact_mut(4).enumerate() {
let mut temp_in: [i32; 4] = [0; 4];
for (val, transposed) in input[r..]
.iter()
.map(|a| i32::cast_from(*a))
.step_by(4)
.zip(temp_in.iter_mut())
{
*transposed = val >> 2;
}
av1_iwht4(&temp_in, buffer_slice);
}

// perform inv txfm on every col
for c in 0..4 {
let mut temp_in: [i32; 4] = [0; 4];
let mut temp_out: [i32; 4] = [0; 4];
for (val, transposed) in buffer[c..]
.iter()
.map(|a| i32::cast_from(*a))
.step_by(4)
.zip(temp_in.iter_mut())
{
*transposed = val;
}
av1_iwht4(&temp_in, &mut temp_out);
for (temp, out) in temp_out
.iter()
.zip(output.rows_iter_mut().map(|row| &mut row[c]).take(4))
{
let v = i32::cast_from(*out) + *temp;
*out = T::cast_from(v);
}
}
}

#[cold_for_target_arch("x86_64", "aarch64")]
pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, _eob: usize,
Expand Down
28 changes: 28 additions & 0 deletions src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
pub mod forward_shared;

pub use self::forward::forward_transform;
pub use self::forward::forward_transform_lossless;
pub use self::inverse::inverse_transform_add;
pub use self::inverse::inverse_transform_add_lossless;

use crate::context::MI_SIZE_LOG2;
use crate::partition::{BlockSize, BlockSize::*};
Expand Down Expand Up @@ -490,6 +492,30 @@ mod test {
}
}

fn test_lossless_roundtrip<T: Pixel>() {
let cpu = CpuFeatureLevel::default();

let mut src_storage = [T::cast_from(0); 4 * 4];
let src = &mut src_storage[..];
// dynamic allocation: test
let mut dst = Plane::from_slice(&vec![T::cast_from(0); 4 * 4], 4);
let mut res_storage = [0i16; 4 * 4];
let res = &mut res_storage[..];
let mut freq_storage = [T::Coeff::cast_from(0); 4 * 4];
let freq = &mut freq_storage[..4 * 4];
for ((r, s), d) in
res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
{
*s = T::cast_from(random::<u8>());
*d = T::cast_from(random::<u8>());
*r = i16::cast_from(*s) - i16::cast_from(*d);
}
forward_transform_lossless(res, freq, 4, cpu);
inverse_transform_add_lossless(freq, &mut dst.as_region_mut(), cpu);

assert_eq!(&src[..], &dst.data[..]);
}

#[test]
fn log_tx_ratios() {
let combinations = [
Expand Down Expand Up @@ -575,6 +601,8 @@ mod test {
(TX_16X32, DCT_DCT, 2),
(TX_32X16, DCT_DCT, 2),
];
println!("Testing combination TX_4X4, WHT_WHT");
test_lossless_roundtrip::<T>();
for &(tx_size, tx_type, tolerance) in combinations.iter() {
println!("Testing combination {:?}, {:?}", tx_size, tx_type);
test_roundtrip::<T>(tx_size, tx_type, tolerance);
Expand Down

0 comments on commit 1c87320

Please sign in to comment.