Skip to content

Commit f2b3f52

Browse files
committed
Get AVX2 into WebP lossless
Change-Id: Ifad3102c9f899a46401985515cd98f3f7a21887f
1 parent 7c70ff7 commit f2b3f52

15 files changed

+1297
-5
lines changed

Makefile.vc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ DSP_DEC_OBJS = \
231231
$(DIROBJ)\dsp\lossless_neon.obj \
232232
$(DIROBJ)\dsp\lossless_sse2.obj \
233233
$(DIROBJ)\dsp\lossless_sse41.obj \
234+
$(DIROBJ)\dsp\lossless_avx2.obj \
234235
$(DIROBJ)\dsp\rescaler.obj \
235236
$(DIROBJ)\dsp\rescaler_mips32.obj \
236237
$(DIROBJ)\dsp\rescaler_mips_dsp_r2.obj \
@@ -270,6 +271,7 @@ DSP_ENC_OBJS = \
270271
$(DIROBJ)\dsp\lossless_enc_neon.obj \
271272
$(DIROBJ)\dsp\lossless_enc_sse2.obj \
272273
$(DIROBJ)\dsp\lossless_enc_sse41.obj \
274+
$(DIROBJ)\dsp\lossless_enc_avx2.obj \
273275
$(DIROBJ)\dsp\ssim.obj \
274276
$(DIROBJ)\dsp\ssim_sse2.obj \
275277

cmake/config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@
9494
/* Set to 1 if SSE4.1 is supported */
9595
#cmakedefine WEBP_HAVE_SSE41 1
9696

97+
/* Set to 1 if AVX2 is supported */
98+
#cmakedefine WEBP_HAVE_AVX2 1
99+
97100
/* Set to 1 if TIFF library is installed */
98101
#cmakedefine WEBP_HAVE_TIFF 1
99102

cmake/cpu.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
3838
endfunction()
3939

4040
# those are included in the names of WEBP_USE_* in c++ code.
41-
set(WEBP_SIMD_FLAGS "SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
41+
set(WEBP_SIMD_FLAGS "AVX2;SSE41;SSE2;MIPS32;MIPS_DSP_R2;NEON;MSA")
4242
set(WEBP_SIMD_FILE_EXTENSIONS
43-
"_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
43+
"_avx2.c;_sse41.c;_sse2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
4444
if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
4545
# With at least Visual Studio 12 (2013)+ /arch is not necessary to build SSE2
4646
# or SSE4 code unless a lesser /arch is forced. MSVC does not have a SSE4
@@ -50,12 +50,12 @@ if(MSVC AND CMAKE_C_COMPILER_ID STREQUAL "MSVC")
5050
if(MSVC_VERSION GREATER_EQUAL 1800 AND NOT CMAKE_C_FLAGS MATCHES "/arch:")
5151
set(SIMD_ENABLE_FLAGS)
5252
else()
53-
set(SIMD_ENABLE_FLAGS "/arch:AVX;/arch:SSE2;;;;")
53+
set(SIMD_ENABLE_FLAGS "/arch:AVX2;/arch:AVX;/arch:SSE2;;;;")
5454
endif()
5555
set(SIMD_DISABLE_FLAGS)
5656
else()
57-
set(SIMD_ENABLE_FLAGS "-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
58-
set(SIMD_DISABLE_FLAGS "-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
57+
set(SIMD_ENABLE_FLAGS "-mavx2;-msse4.1;-msse2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
58+
set(SIMD_DISABLE_FLAGS "-mno-avx2;-mno-sse4.1;-mno-sse2;;-mno-dspr2;;-mno-msa")
5959
endif()
6060

6161
set(WEBP_SIMD_FILES_TO_INCLUDE)

configure.ac

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,25 @@ AS_IF([test "$GCC" = "yes" ], [
161161
AC_SUBST([AM_CFLAGS])
162162

163163
dnl === Check for machine specific flags
164+
AC_ARG_ENABLE([avx2],
165+
AS_HELP_STRING([--disable-avx2],
166+
[Disable detection of AVX2 support
167+
@<:@default=auto@:>@]))
168+
169+
AS_IF([test "x$enable_avx2" != "xno" -a "x$enable_sse4_1" != "xno"
170+
-a "x$enable_sse2" != "xno"], [
171+
AVX2_FLAGS="$INTRINSICS_CFLAGS $AVX2_FLAGS"
172+
TEST_AND_ADD_CFLAGS([AVX2_FLAGS], [-mavx2])
173+
AS_IF([test -n "$AVX2_FLAGS"], [
174+
SAVED_CFLAGS=$CFLAGS
175+
CFLAGS="$CFLAGS $AVX2_FLAGS"
176+
AC_CHECK_HEADER([immintrin.h],
177+
[AC_DEFINE(WEBP_HAVE_AVX2, [1],
178+
[Set to 1 if AVX2 is supported])],
179+
[AVX2_FLAGS=""])
180+
CFLAGS=$SAVED_CFLAGS])
181+
AC_SUBST([AVX2_FLAGS])])
182+
164183
AC_ARG_ENABLE([sse4.1],
165184
AS_HELP_STRING([--disable-sse4.1],
166185
[Disable detection of SSE4.1 support

src/dsp/Makefile.am

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ noinst_LTLIBRARIES += libwebpdsp_sse2.la
55
noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
66
noinst_LTLIBRARIES += libwebpdsp_sse41.la
77
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
8+
noinst_LTLIBRARIES += libwebpdsp_avx2.la
9+
noinst_LTLIBRARIES += libwebpdspdecode_avx2.la
810
noinst_LTLIBRARIES += libwebpdsp_neon.la
911
noinst_LTLIBRARIES += libwebpdspdecode_neon.la
1012
noinst_LTLIBRARIES += libwebpdsp_msa.la
@@ -44,6 +46,11 @@ ENC_SOURCES += lossless_enc.c
4446
ENC_SOURCES += quant.h
4547
ENC_SOURCES += ssim.c
4648

49+
libwebpdspdecode_avx2_la_SOURCES =
50+
libwebpdspdecode_avx2_la_SOURCES += lossless_avx2.c
51+
libwebpdspdecode_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
52+
libwebpdspdecode_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
53+
4754
libwebpdspdecode_sse41_la_SOURCES =
4855
libwebpdspdecode_sse41_la_SOURCES += alpha_processing_sse41.c
4956
libwebpdspdecode_sse41_la_SOURCES += dec_sse41.c
@@ -123,6 +130,12 @@ libwebpdsp_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
123130
libwebpdsp_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
124131
libwebpdsp_sse41_la_LIBADD = libwebpdspdecode_sse41.la
125132

133+
libwebpdsp_avx2_la_SOURCES =
134+
libwebpdsp_avx2_la_SOURCES += lossless_enc_avx2.c
135+
libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
136+
libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
137+
libwebpdsp_avx2_la_LIBADD = libwebpdspdecode_avx2.la
138+
126139
libwebpdsp_neon_la_SOURCES =
127140
libwebpdsp_neon_la_SOURCES += cost_neon.c
128141
libwebpdsp_neon_la_SOURCES += enc_neon.c
@@ -167,6 +180,7 @@ libwebpdsp_la_LDFLAGS = -lm
167180
libwebpdsp_la_LIBADD =
168181
libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
169182
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
183+
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
170184
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
171185
libwebpdsp_la_LIBADD += libwebpdsp_msa.la
172186
libwebpdsp_la_LIBADD += libwebpdsp_mips32.la
@@ -180,6 +194,7 @@ if BUILD_LIBWEBPDECODER
180194
libwebpdspdecode_la_LIBADD =
181195
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
182196
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
197+
libwebpdspdecode_la_LIBADD += libwebpdspdecode_avx2.la
183198
libwebpdspdecode_la_LIBADD += libwebpdspdecode_neon.la
184199
libwebpdspdecode_la_LIBADD += libwebpdspdecode_msa.la
185200
libwebpdspdecode_la_LIBADD += libwebpdspdecode_mips32.la

src/dsp/cpu.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656
(defined(_M_X64) || defined(_M_IX86))
5757
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
5858
#endif
59+
60+
#if defined(_MSC_VER) && _MSC_VER >= 1700 && \
61+
(defined(_M_X64) || defined(_M_IX86))
62+
#define WEBP_MSC_AVX2 // Visual C++ AVX2 targets
63+
#endif
5964
#endif
6065

6166
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
@@ -80,6 +85,16 @@
8085
#define WEBP_HAVE_SSE41
8186
#endif
8287

88+
#if (defined(__AVX2__) || defined(WEBP_MSC_AVX2)) && \
89+
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_AVX2))
90+
#define WEBP_USE_AVX2
91+
#endif
92+
93+
#if defined(WEBP_USE_AVX2) && !defined(WEBP_HAVE_AVX2)
94+
#define WEBP_HAVE_AVX2
95+
#endif
96+
97+
#undef WEBP_MSC_AVX2
8398
#undef WEBP_MSC_SSE41
8499
#undef WEBP_MSC_SSE2
85100

src/dsp/lossless.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,16 +577,21 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
577577
//------------------------------------------------------------------------------
578578

579579
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
580+
VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
580581
VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
582+
VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
581583
VP8LPredictorFunc VP8LPredictors[16];
582584

583585
// exposed plain-C implementations
584586
VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
585587

586588
VP8LTransformColorInverseFunc VP8LTransformColorInverse;
589+
VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
587590

588591
VP8LConvertFunc VP8LConvertBGRAToRGB;
592+
VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
589593
VP8LConvertFunc VP8LConvertBGRAToRGBA;
594+
VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
590595
VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
591596
VP8LConvertFunc VP8LConvertBGRAToRGB565;
592597
VP8LConvertFunc VP8LConvertBGRAToBGR;
@@ -597,6 +602,7 @@ VP8LMapAlphaFunc VP8LMapColor8b;
597602
extern VP8CPUInfo VP8GetCPUInfo;
598603
extern void VP8LDspInitSSE2(void);
599604
extern void VP8LDspInitSSE41(void);
605+
extern void VP8LDspInitAVX2(void);
600606
extern void VP8LDspInitNEON(void);
601607
extern void VP8LDspInitMIPSdspR2(void);
602608
extern void VP8LDspInitMSA(void);
@@ -649,6 +655,11 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
649655
#if defined(WEBP_HAVE_SSE41)
650656
if (VP8GetCPUInfo(kSSE4_1)) {
651657
VP8LDspInitSSE41();
658+
#if defined(WEBP_HAVE_AVX2)
659+
if (VP8GetCPUInfo(kAVX2)) {
660+
VP8LDspInitAVX2();
661+
}
662+
#endif
652663
}
653664
#endif
654665
}

src/dsp/lossless.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,12 @@ typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
6464
uint32_t* WEBP_RESTRICT out);
6565
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
6666
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
67+
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_SSE[16];
6768

6869
typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
6970
int num_pixels, uint32_t* dst);
7071
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
72+
extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed_SSE;
7173

7274
typedef struct {
7375
// Note: the members are uint8_t, so that any negative values are
@@ -80,6 +82,7 @@ typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
8082
const uint32_t* src,
8183
int num_pixels, uint32_t* dst);
8284
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
85+
extern VP8LTransformColorInverseFunc VP8LTransformColorInverse_SSE;
8386

8487
struct VP8LTransform; // Defined in dec/vp8li.h.
8588

@@ -99,6 +102,8 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
99102
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
100103
extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
101104
extern VP8LConvertFunc VP8LConvertBGRAToBGR;
105+
extern VP8LConvertFunc VP8LConvertBGRAToRGB_SSE;
106+
extern VP8LConvertFunc VP8LConvertBGRAToRGBA_SSE;
102107

103108
// Converts from BGRA to other color spaces.
104109
void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
@@ -149,21 +154,25 @@ void VP8LDspInit(void);
149154

150155
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
151156
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
157+
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed_SSE;
152158
typedef void (*VP8LTransformColorFunc)(
153159
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
154160
int num_pixels);
155161
extern VP8LTransformColorFunc VP8LTransformColor;
162+
extern VP8LTransformColorFunc VP8LTransformColor_SSE;
156163
typedef void (*VP8LCollectColorBlueTransformsFunc)(
157164
const uint32_t* WEBP_RESTRICT argb, int stride,
158165
int tile_width, int tile_height,
159166
int green_to_blue, int red_to_blue, uint32_t histo[]);
160167
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
168+
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms_SSE;
161169

162170
typedef void (*VP8LCollectColorRedTransformsFunc)(
163171
const uint32_t* WEBP_RESTRICT argb, int stride,
164172
int tile_width, int tile_height,
165173
int green_to_red, uint32_t histo[]);
166174
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
175+
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms_SSE;
167176

168177
// Expose some C-only fallback functions
169178
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
@@ -181,6 +190,7 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
181190

182191
extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
183192
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
193+
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
184194

185195
// -----------------------------------------------------------------------------
186196
// Huffman-cost related functions.
@@ -255,6 +265,7 @@ typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
255265
int width, int xbits,
256266
uint32_t* WEBP_RESTRICT dst);
257267
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
268+
extern VP8LBundleColorMapFunc VP8LBundleColorMap_SSE;
258269
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
259270
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
260271

0 commit comments

Comments
 (0)