diff --git a/DirectXTex/DirectXTexSwizzle.cpp b/DirectXTex/DirectXTexSwizzle.cpp index b310919e..14e2d881 100644 --- a/DirectXTex/DirectXTexSwizzle.cpp +++ b/DirectXTex/DirectXTexSwizzle.cpp @@ -19,6 +19,7 @@ namespace { #ifdef __AVX2__ #define deposit_bits(v,m) _pdep_u32(v,m) +#define extract_bits(v,m) _pext_u32(v,m) #else // N3864 - A constexpr bitwise operations library for C++ // https://github.com/fmatthew5876/stdcxx-bitops @@ -35,6 +36,19 @@ namespace } return res; } + uint32_t extract_bits(uint32_t val, uint32_t mask) + { + uint32_t res = 0; + for (uint32_t bb = 1; mask !=0; bb += bb) + { + if (val & mask & -mask) + { + res |= bb; + } + mask &= (mask - 1); + } + return res; + } #endif } @@ -68,20 +82,37 @@ HRESULT DirectX::StandardSwizzle(const Image& srcImage, bool toSwizzle, ScratchI size_t height = IsCompressed(srcImage.format) ? (srcImage.height + 3) / 4 : srcImage.height; size_t width = IsCompressed(srcImage.format) ? (srcImage.width + 3) / 4 : srcImage.width; - for (size_t y = 0; y < height; y++) + if (toSwizzle) { - for (size_t x = 0; x < width; x++) + size_t rowPitch = srcImage.rowPitch; + for (size_t y = 0; y < height; y++) { - uint32_t swizzleIndex = deposit_bits(x, xBytesMask) + deposit_bits(y, yBytesMask); - size_t swizzleOffset = swizzleIndex * bytesPerPixel; + for (size_t x = 0; x < width; x++) + { + uint32_t swizzleIndex = deposit_bits(x, xBytesMask) + deposit_bits(y, yBytesMask); + size_t swizzleOffset = swizzleIndex * bytesPerPixel; - size_t rowMajorOffset = y * srcImage.rowPitch + x * bytesPerPixel; + size_t rowMajorOffset = y * rowPitch + x * bytesPerPixel; - size_t sourceOffset = toSwizzle ? rowMajorOffset : swizzleOffset; - size_t destOffset = toSwizzle ? swizzleOffset : rowMajorOffset; + const uint8_t* sourcePixelPointer = sptr + rowMajorOffset; + uint8_t* destPixelPointer = dptr + swizzleOffset; + memcpy(destPixelPointer, sourcePixelPointer, bytesPerPixel); + } + } + } + else + { + size_t rowPitch = result.GetImages()[0].rowPitch; + for (size_t swizzleIndex = 0; swizzleIndex < (width * height); swizzleIndex++) + { + size_t swizzleOffset = swizzleIndex * bytesPerPixel; - const uint8_t* sourcePixelPointer = sptr + sourceOffset; - uint8_t* destPixelPointer = dptr + destOffset; + uint32_t destX = extract_bits(swizzleIndex, xBytesMask); + uint32_t destY = extract_bits(swizzleIndex, yBytesMask); + size_t rowMajorOffset = destY * rowPitch + destX * bytesPerPixel; + + const uint8_t* sourcePixelPointer = sptr + swizzleOffset; + uint8_t* destPixelPointer = dptr + rowMajorOffset; memcpy(destPixelPointer, sourcePixelPointer, bytesPerPixel); } } @@ -124,20 +155,37 @@ HRESULT DirectX::StandardSwizzle(const Image* srcImages, size_t nimages, const T uint32_t xBytesMask = 0b1010101010101010; uint32_t yBytesMask = 0b0101010101010101; - for (size_t y = 0; y