Skip to content

Commit b0c9f4f

Browse files
authored
Add thumbhash encoder (#127)
The c++ implementation of thumhash encoder is based on the rust reference implementation found here: https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs We modified it in the following ways: - Make it work with OpenCV mat as input frame - Handle images with or without an alpha channel - Handle grayscale images - Perform simple downsampling of images. We don't need very many pixels to get a good hash.
1 parent 5787bc9 commit b0c9f4f

20 files changed

+440
-0
lines changed

data/coast.jpg

3.15 KB
Loading

data/fall.jpg

4.7 KB
Loading

data/field.jpg

4.34 KB
Loading

data/firefox-16bit-alpha.png

16.4 KB
Loading

data/firefox-16bit.png

14.1 KB
Loading

data/firefox-gray-alpha.webp

1.71 KB
Binary file not shown.

data/firefox-gray.jpg

3.08 KB
Loading

data/firefox.png

13 KB
Loading

data/large-sunrise.jpg

177 KB
Loading

data/mountain.jpg

2.6 KB
Loading

data/opera-gray-alpha.png

3.35 KB
Loading

data/opera.png

5.72 KB
Loading

data/street.jpg

4.94 KB
Loading

data/sunrise.jpg

2.48 KB
Loading

data/sunset.jpg

3.5 KB
Loading

lilliput.go

+4
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,9 @@ func NewEncoder(ext string, decodedBy Decoder, dst []byte) (Encoder, error) {
108108
return nil, errors.New("Encoder cannot encode into video types")
109109
}
110110

111+
if strings.ToLower(ext) == ".thumbhash" {
112+
return newThumbhashEncoder(decodedBy, dst)
113+
}
114+
111115
return newOpenCVEncoder(ext, decodedBy, dst)
112116
}

thumbhash.cpp

+282
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
#include "thumbhash.hpp"
2+
#include <stdbool.h>
3+
#include <vector>
4+
#include <cmath>
5+
#include <algorithm>
6+
#include <tuple>
7+
8+
static constexpr size_t MAX_DIMENSION = 100;
9+
static constexpr float PI = 3.14159265f;
10+
11+
struct thumbhash_encoder_struct {
12+
uint8_t* dst;
13+
size_t dst_len;
14+
};
15+
16+
thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len)
17+
{
18+
thumbhash_encoder e = new struct thumbhash_encoder_struct();
19+
if (!e) {
20+
return NULL;
21+
}
22+
memset(e, 0, sizeof(struct thumbhash_encoder_struct));
23+
e->dst = (uint8_t*)(buf);
24+
e->dst_len = buf_len;
25+
26+
return e;
27+
}
28+
29+
static std::tuple<float, std::vector<float>, float> encode_channel(
30+
const std::vector<float>& channel,
31+
size_t nx,
32+
size_t ny,
33+
size_t w,
34+
size_t h)
35+
{
36+
float dc = 0.0f;
37+
std::vector<float> ac;
38+
ac.reserve(nx * ny / 2);
39+
float scale = 0.0f;
40+
std::vector<float> fx(w, 0.0f);
41+
for (size_t cy = 0; cy < ny; ++cy) {
42+
size_t cx = 0;
43+
while (cx * ny < nx * (ny - cy)) {
44+
float f = 0.0f;
45+
for (size_t x = 0; x < w; ++x) {
46+
fx[x] = cos(PI / static_cast<float>(w) * static_cast<float>(cx) *
47+
(static_cast<float>(x) + 0.5f));
48+
}
49+
for (size_t y = 0; y < h; ++y) {
50+
float fy = cos(PI / static_cast<float>(h) * static_cast<float>(cy) *
51+
(static_cast<float>(y) + 0.5f));
52+
for (size_t x = 0; x < w; ++x) {
53+
f += channel[x + y * w] * fx[x] * fy;
54+
}
55+
}
56+
f /= static_cast<float>(w * h);
57+
if (cx > 0 || cy > 0) {
58+
ac.push_back(f);
59+
scale = std::max(std::abs(f), scale);
60+
}
61+
else {
62+
dc = f;
63+
}
64+
cx += 1;
65+
}
66+
}
67+
if (scale > 0.0) {
68+
for (auto& ac_val : ac) {
69+
ac_val = 0.5f + 0.5f / scale * ac_val;
70+
}
71+
}
72+
return std::make_tuple(dc, ac, scale);
73+
}
74+
75+
// This C++ thumbhash encode function is based on the rust reference
76+
// implementation found here:
77+
//
78+
// https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs
79+
//
80+
// We modified the logic in the following ways:
81+
//
82+
// - Make it work with OpenCV mat as input frame
83+
// - Handle images with or without an alpha channel
84+
// - Handle grayscale images
85+
// - Perform simple downscaling of large images. We don't need very many pixels
86+
// to get a good hash.
87+
int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opaque_frame)
88+
{
89+
auto frame = static_cast<const cv::Mat*>(opaque_frame);
90+
91+
size_t orig_w = frame->cols;
92+
size_t orig_h = frame->rows;
93+
size_t w = orig_w, h = orig_h;
94+
95+
// We don't need very many pixels to get a good hash. Downsample the image
96+
// when its dimensions exceed the limit.
97+
if (orig_w > MAX_DIMENSION || orig_h > MAX_DIMENSION) {
98+
float aspect_ratio = static_cast<float>(orig_w) / orig_h;
99+
if (orig_w > orig_h) {
100+
w = MAX_DIMENSION;
101+
h = static_cast<size_t>(w / aspect_ratio);
102+
}
103+
else {
104+
h = MAX_DIMENSION;
105+
w = static_cast<size_t>(h * aspect_ratio);
106+
}
107+
}
108+
109+
float row_ratio = static_cast<float>(orig_h) / h;
110+
float col_ratio = static_cast<float>(orig_w) / w;
111+
112+
bool has_alpha = false;
113+
std::vector<float> l, p, q, a;
114+
l.reserve(w * h);
115+
p.reserve(w * h);
116+
q.reserve(w * h);
117+
a.reserve(w * h);
118+
119+
if (frame->type() == CV_8UC4) {
120+
float avg_r = 0.0;
121+
float avg_g = 0.0;
122+
float avg_b = 0.0;
123+
float avg_a = 0.0;
124+
125+
// 4 channels (BGRA)
126+
for (int i = 0; i < h; ++i) {
127+
for (int j = 0; j < w; ++j) {
128+
size_t orig_i = static_cast<size_t>(i * row_ratio);
129+
size_t orig_j = static_cast<size_t>(j * col_ratio);
130+
const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j);
131+
float alpha = static_cast<float>(pixel[3]) / 255.0f; // A
132+
avg_b += (alpha / 255.0f) * static_cast<float>(pixel[0]); // B
133+
avg_g += (alpha / 255.0f) * static_cast<float>(pixel[1]); // G
134+
avg_r += (alpha / 255.0f) * static_cast<float>(pixel[2]); // R
135+
avg_a += alpha;
136+
}
137+
}
138+
if (avg_a > 0.0f) {
139+
avg_r /= avg_a;
140+
avg_g /= avg_a;
141+
avg_b /= avg_a;
142+
}
143+
has_alpha = avg_a < static_cast<float>(w * h);
144+
145+
for (int i = 0; i < h; ++i) {
146+
for (int j = 0; j < w; ++j) {
147+
size_t orig_i = static_cast<size_t>(i * row_ratio);
148+
size_t orig_j = static_cast<size_t>(j * col_ratio);
149+
const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j);
150+
float alpha = static_cast<float>(pixel[3]) / 255.0f; // A
151+
float b =
152+
avg_b * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[0]); // B
153+
float g =
154+
avg_g * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[1]); // G
155+
float r =
156+
avg_r * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[2]); // R
157+
l.push_back((r + g + b) / 3.0f);
158+
p.push_back((r + g) / 2.0f - b);
159+
q.push_back(r - g);
160+
a.push_back(alpha);
161+
}
162+
}
163+
}
164+
else if (frame->type() == CV_8UC3) {
165+
// 3 channels (BGR)
166+
for (int i = 0; i < h; ++i) {
167+
for (int j = 0; j < w; ++j) {
168+
size_t orig_i = static_cast<size_t>(i * row_ratio);
169+
size_t orig_j = static_cast<size_t>(j * col_ratio);
170+
const cv::Vec3b& pixel = frame->at<cv::Vec3b>(orig_i, orig_j);
171+
float b = (1.0f / 255.0f) * static_cast<float>(pixel[0]); // B
172+
float g = (1.0f / 255.0f) * static_cast<float>(pixel[1]); // G
173+
float r = (1.0f / 255.0f) * static_cast<float>(pixel[2]); // R
174+
l.push_back((r + g + b) / 3.0f);
175+
p.push_back((r + g) / 2.0f - b);
176+
q.push_back(r - g);
177+
a.push_back(1.0f);
178+
}
179+
}
180+
}
181+
else if (frame->type() == CV_8U) {
182+
for (int i = 0; i < h; ++i) {
183+
for (int j = 0; j < w; ++j) {
184+
size_t orig_i = static_cast<size_t>(i * row_ratio);
185+
size_t orig_j = static_cast<size_t>(j * col_ratio);
186+
uchar pixel = frame->at<uchar>(orig_i, orig_j);
187+
float l_val = static_cast<float>(pixel) / 255.0f;
188+
l.push_back(l_val);
189+
p.push_back(0.0f);
190+
q.push_back(0.0f);
191+
a.push_back(1.0f);
192+
}
193+
}
194+
}
195+
else {
196+
// Unsupported format
197+
return -1;
198+
}
199+
200+
size_t l_limit = has_alpha ? 5 : 7; // Use fewer luminance bits if there's alpha
201+
202+
size_t lx = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * w) /
203+
static_cast<float>(std::max(w, h)))),
204+
static_cast<size_t>(1));
205+
size_t ly = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * h) /
206+
static_cast<float>(std::max(w, h)))),
207+
static_cast<size_t>(1));
208+
209+
float l_dc, l_scale, p_dc, p_scale, q_dc, q_scale, a_dc, a_scale;
210+
std::vector<float> l_ac, p_ac, q_ac, a_ac;
211+
std::tie(l_dc, l_ac, l_scale) = encode_channel(
212+
l, std::max(lx, static_cast<size_t>(3)), std::max(ly, static_cast<size_t>(3)), w, h);
213+
std::tie(p_dc, p_ac, p_scale) = encode_channel(p, 3, 3, w, h);
214+
std::tie(q_dc, q_ac, q_scale) = encode_channel(q, 3, 3, w, h);
215+
if (has_alpha) {
216+
std::tie(a_dc, a_ac, a_scale) = encode_channel(a, 5, 5, w, h);
217+
}
218+
else {
219+
a_dc = 1.0f;
220+
a_scale = 1.0f;
221+
}
222+
223+
bool is_landscape = w > h;
224+
uint32_t header24 = static_cast<uint32_t>(std::round(63.0f * l_dc)) |
225+
(static_cast<uint32_t>(std::round(31.5f + 31.5f * p_dc)) << 6) |
226+
(static_cast<uint32_t>(std::round(31.5f + 31.5f * q_dc)) << 12) |
227+
(static_cast<uint32_t>(std::round(31.0f * l_scale)) << 18) | (has_alpha ? 1 << 23 : 0);
228+
uint16_t header16 = static_cast<uint16_t>(is_landscape ? ly : lx) |
229+
(static_cast<uint16_t>(std::round(63.0f * p_scale)) << 3) |
230+
(static_cast<uint16_t>(std::round(63.0f * q_scale)) << 9) | (is_landscape ? 1 << 15 : 0);
231+
232+
std::vector<uint8_t> hash;
233+
hash.reserve(25);
234+
235+
hash.push_back(header24 & 255);
236+
hash.push_back((header24 >> 8) & 255);
237+
hash.push_back(header24 >> 16);
238+
hash.push_back(header16 & 255);
239+
hash.push_back(header16 >> 8);
240+
bool is_odd = false;
241+
if (has_alpha) {
242+
hash.push_back(static_cast<uint8_t>(std::round(15.0f * a_dc)) |
243+
(static_cast<uint8_t>(std::round(15.0f * a_scale)) << 4));
244+
}
245+
for (auto ac : {l_ac, p_ac, q_ac}) {
246+
for (float f : ac) {
247+
uint8_t u = static_cast<uint8_t>(std::round(15.0f * f));
248+
if (is_odd) {
249+
*hash.rbegin() |= u << 4;
250+
}
251+
else {
252+
hash.push_back(u);
253+
}
254+
is_odd = !is_odd;
255+
}
256+
}
257+
if (has_alpha) {
258+
for (float f : a_ac) {
259+
uint8_t u = static_cast<uint8_t>(std::round(15.0f * f));
260+
if (is_odd) {
261+
*hash.rbegin() |= u << 4;
262+
}
263+
else {
264+
hash.push_back(u);
265+
}
266+
is_odd = !is_odd;
267+
}
268+
}
269+
270+
if (hash.size() <= e->dst_len) {
271+
std::copy(hash.begin(), hash.end(), e->dst);
272+
}
273+
else {
274+
return -1;
275+
}
276+
return hash.size();
277+
}
278+
279+
void thumbhash_encoder_release(thumbhash_encoder e)
280+
{
281+
delete e;
282+
}

thumbhash.go

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package lilliput
2+
3+
// #cgo CFLAGS: -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx
4+
// #cgo darwin CFLAGS: -I${SRCDIR}/deps/osx/include
5+
// #cgo linux CFLAGS: -I${SRCDIR}/deps/linux/include
6+
// #cgo CXXFLAGS: -std=c++11
7+
// #cgo darwin CXXFLAGS: -I${SRCDIR}/deps/osx/include
8+
// #cgo linux CXXFLAGS: -I${SRCDIR}/deps/linux/include
9+
// #cgo LDFLAGS: -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -ljpeg -lpng -lwebp -lippicv -lz
10+
// #cgo darwin LDFLAGS: -L${SRCDIR}/deps/osx/lib -L${SRCDIR}/deps/osx/share/OpenCV/3rdparty/lib
11+
// #cgo linux LDFLAGS: -L${SRCDIR}/deps/linux/lib -L${SRCDIR}/deps/linux/share/OpenCV/3rdparty/lib
12+
// #include "thumbhash.hpp"
13+
import "C"
14+
15+
import (
16+
"io"
17+
"unsafe"
18+
)
19+
20+
type thumbhashEncoder struct {
21+
encoder C.thumbhash_encoder
22+
buf []byte
23+
}
24+
25+
func newThumbhashEncoder(decodedBy Decoder, buf []byte) (*thumbhashEncoder, error) {
26+
buf = buf[:1]
27+
enc := C.thumbhash_encoder_create(unsafe.Pointer(&buf[0]), C.size_t(cap(buf)))
28+
if enc == nil {
29+
return nil, ErrBufTooSmall
30+
}
31+
return &thumbhashEncoder{
32+
encoder: enc,
33+
buf: buf,
34+
}, nil
35+
}
36+
37+
func (e *thumbhashEncoder) Encode(f *Framebuffer, opt map[int]int) ([]byte, error) {
38+
if f == nil {
39+
return nil, io.EOF
40+
}
41+
42+
length := C.thumbhash_encoder_encode(e.encoder, f.mat)
43+
if length <= 0 {
44+
return nil, ErrInvalidImage
45+
}
46+
47+
return e.buf[:length], nil
48+
}
49+
50+
func (e *thumbhashEncoder) Close() {
51+
C.thumbhash_encoder_release(e.encoder)
52+
}

thumbhash.hpp

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef LILLIPUT_THUMBHASH_HPP
2+
#define LILLIPUT_THUMBHASH_HPP
3+
4+
#include "opencv.hpp"
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
typedef struct thumbhash_encoder_struct* thumbhash_encoder;
11+
12+
thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len);
13+
int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opqaue_frame);
14+
void thumbhash_encoder_release(thumbhash_encoder e);
15+
16+
#ifdef __cplusplus
17+
}
18+
#endif
19+
20+
#endif

0 commit comments

Comments
 (0)