Skip to content

Commit

Permalink
Add thumbhash encoder (discord#127)
Browse files Browse the repository at this point in the history
The c++ implementation of thumhash encoder is based on the rust reference implementation found here:

https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs

We modified it in the following ways:

- Make it work with OpenCV mat as input frame
- Handle images with or without an alpha channel
- Handle grayscale images
- Perform simple downsampling of images. We don't need very many pixels to get a good hash.
  • Loading branch information
slam authored Jul 14, 2023
1 parent 5787bc9 commit b0c9f4f
Show file tree
Hide file tree
Showing 20 changed files with 440 additions and 0 deletions.
Binary file added data/coast.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/fall.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/field.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/firefox-16bit-alpha.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/firefox-16bit.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/firefox-gray-alpha.webp
Binary file not shown.
Binary file added data/firefox-gray.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/firefox.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/large-sunrise.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/mountain.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/opera-gray-alpha.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/opera.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/street.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/sunrise.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/sunset.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions lilliput.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,9 @@ func NewEncoder(ext string, decodedBy Decoder, dst []byte) (Encoder, error) {
return nil, errors.New("Encoder cannot encode into video types")
}

if strings.ToLower(ext) == ".thumbhash" {
return newThumbhashEncoder(decodedBy, dst)
}

return newOpenCVEncoder(ext, decodedBy, dst)
}
282 changes: 282 additions & 0 deletions thumbhash.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
#include "thumbhash.hpp"
#include <stdbool.h>
#include <vector>
#include <cmath>
#include <algorithm>
#include <tuple>

static constexpr size_t MAX_DIMENSION = 100;
static constexpr float PI = 3.14159265f;

struct thumbhash_encoder_struct {
uint8_t* dst;
size_t dst_len;
};

thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len)
{
thumbhash_encoder e = new struct thumbhash_encoder_struct();
if (!e) {
return NULL;
}
memset(e, 0, sizeof(struct thumbhash_encoder_struct));
e->dst = (uint8_t*)(buf);
e->dst_len = buf_len;

return e;
}

static std::tuple<float, std::vector<float>, float> encode_channel(
const std::vector<float>& channel,
size_t nx,
size_t ny,
size_t w,
size_t h)
{
float dc = 0.0f;
std::vector<float> ac;
ac.reserve(nx * ny / 2);
float scale = 0.0f;
std::vector<float> fx(w, 0.0f);
for (size_t cy = 0; cy < ny; ++cy) {
size_t cx = 0;
while (cx * ny < nx * (ny - cy)) {
float f = 0.0f;
for (size_t x = 0; x < w; ++x) {
fx[x] = cos(PI / static_cast<float>(w) * static_cast<float>(cx) *
(static_cast<float>(x) + 0.5f));
}
for (size_t y = 0; y < h; ++y) {
float fy = cos(PI / static_cast<float>(h) * static_cast<float>(cy) *
(static_cast<float>(y) + 0.5f));
for (size_t x = 0; x < w; ++x) {
f += channel[x + y * w] * fx[x] * fy;
}
}
f /= static_cast<float>(w * h);
if (cx > 0 || cy > 0) {
ac.push_back(f);
scale = std::max(std::abs(f), scale);
}
else {
dc = f;
}
cx += 1;
}
}
if (scale > 0.0) {
for (auto& ac_val : ac) {
ac_val = 0.5f + 0.5f / scale * ac_val;
}
}
return std::make_tuple(dc, ac, scale);
}

// This C++ thumbhash encode function is based on the rust reference
// implementation found here:
//
// https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs
//
// We modified the logic in the following ways:
//
// - Make it work with OpenCV mat as input frame
// - Handle images with or without an alpha channel
// - Handle grayscale images
// - Perform simple downscaling of large images. We don't need very many pixels
// to get a good hash.
int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opaque_frame)
{
auto frame = static_cast<const cv::Mat*>(opaque_frame);

size_t orig_w = frame->cols;
size_t orig_h = frame->rows;
size_t w = orig_w, h = orig_h;

// We don't need very many pixels to get a good hash. Downsample the image
// when its dimensions exceed the limit.
if (orig_w > MAX_DIMENSION || orig_h > MAX_DIMENSION) {
float aspect_ratio = static_cast<float>(orig_w) / orig_h;
if (orig_w > orig_h) {
w = MAX_DIMENSION;
h = static_cast<size_t>(w / aspect_ratio);
}
else {
h = MAX_DIMENSION;
w = static_cast<size_t>(h * aspect_ratio);
}
}

float row_ratio = static_cast<float>(orig_h) / h;
float col_ratio = static_cast<float>(orig_w) / w;

bool has_alpha = false;
std::vector<float> l, p, q, a;
l.reserve(w * h);
p.reserve(w * h);
q.reserve(w * h);
a.reserve(w * h);

if (frame->type() == CV_8UC4) {
float avg_r = 0.0;
float avg_g = 0.0;
float avg_b = 0.0;
float avg_a = 0.0;

// 4 channels (BGRA)
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
size_t orig_i = static_cast<size_t>(i * row_ratio);
size_t orig_j = static_cast<size_t>(j * col_ratio);
const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j);
float alpha = static_cast<float>(pixel[3]) / 255.0f; // A
avg_b += (alpha / 255.0f) * static_cast<float>(pixel[0]); // B
avg_g += (alpha / 255.0f) * static_cast<float>(pixel[1]); // G
avg_r += (alpha / 255.0f) * static_cast<float>(pixel[2]); // R
avg_a += alpha;
}
}
if (avg_a > 0.0f) {
avg_r /= avg_a;
avg_g /= avg_a;
avg_b /= avg_a;
}
has_alpha = avg_a < static_cast<float>(w * h);

for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
size_t orig_i = static_cast<size_t>(i * row_ratio);
size_t orig_j = static_cast<size_t>(j * col_ratio);
const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j);
float alpha = static_cast<float>(pixel[3]) / 255.0f; // A
float b =
avg_b * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[0]); // B
float g =
avg_g * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[1]); // G
float r =
avg_r * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[2]); // R
l.push_back((r + g + b) / 3.0f);
p.push_back((r + g) / 2.0f - b);
q.push_back(r - g);
a.push_back(alpha);
}
}
}
else if (frame->type() == CV_8UC3) {
// 3 channels (BGR)
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
size_t orig_i = static_cast<size_t>(i * row_ratio);
size_t orig_j = static_cast<size_t>(j * col_ratio);
const cv::Vec3b& pixel = frame->at<cv::Vec3b>(orig_i, orig_j);
float b = (1.0f / 255.0f) * static_cast<float>(pixel[0]); // B
float g = (1.0f / 255.0f) * static_cast<float>(pixel[1]); // G
float r = (1.0f / 255.0f) * static_cast<float>(pixel[2]); // R
l.push_back((r + g + b) / 3.0f);
p.push_back((r + g) / 2.0f - b);
q.push_back(r - g);
a.push_back(1.0f);
}
}
}
else if (frame->type() == CV_8U) {
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
size_t orig_i = static_cast<size_t>(i * row_ratio);
size_t orig_j = static_cast<size_t>(j * col_ratio);
uchar pixel = frame->at<uchar>(orig_i, orig_j);
float l_val = static_cast<float>(pixel) / 255.0f;
l.push_back(l_val);
p.push_back(0.0f);
q.push_back(0.0f);
a.push_back(1.0f);
}
}
}
else {
// Unsupported format
return -1;
}

size_t l_limit = has_alpha ? 5 : 7; // Use fewer luminance bits if there's alpha

size_t lx = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * w) /
static_cast<float>(std::max(w, h)))),
static_cast<size_t>(1));
size_t ly = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * h) /
static_cast<float>(std::max(w, h)))),
static_cast<size_t>(1));

float l_dc, l_scale, p_dc, p_scale, q_dc, q_scale, a_dc, a_scale;
std::vector<float> l_ac, p_ac, q_ac, a_ac;
std::tie(l_dc, l_ac, l_scale) = encode_channel(
l, std::max(lx, static_cast<size_t>(3)), std::max(ly, static_cast<size_t>(3)), w, h);
std::tie(p_dc, p_ac, p_scale) = encode_channel(p, 3, 3, w, h);
std::tie(q_dc, q_ac, q_scale) = encode_channel(q, 3, 3, w, h);
if (has_alpha) {
std::tie(a_dc, a_ac, a_scale) = encode_channel(a, 5, 5, w, h);
}
else {
a_dc = 1.0f;
a_scale = 1.0f;
}

bool is_landscape = w > h;
uint32_t header24 = static_cast<uint32_t>(std::round(63.0f * l_dc)) |
(static_cast<uint32_t>(std::round(31.5f + 31.5f * p_dc)) << 6) |
(static_cast<uint32_t>(std::round(31.5f + 31.5f * q_dc)) << 12) |
(static_cast<uint32_t>(std::round(31.0f * l_scale)) << 18) | (has_alpha ? 1 << 23 : 0);
uint16_t header16 = static_cast<uint16_t>(is_landscape ? ly : lx) |
(static_cast<uint16_t>(std::round(63.0f * p_scale)) << 3) |
(static_cast<uint16_t>(std::round(63.0f * q_scale)) << 9) | (is_landscape ? 1 << 15 : 0);

std::vector<uint8_t> hash;
hash.reserve(25);

hash.push_back(header24 & 255);
hash.push_back((header24 >> 8) & 255);
hash.push_back(header24 >> 16);
hash.push_back(header16 & 255);
hash.push_back(header16 >> 8);
bool is_odd = false;
if (has_alpha) {
hash.push_back(static_cast<uint8_t>(std::round(15.0f * a_dc)) |
(static_cast<uint8_t>(std::round(15.0f * a_scale)) << 4));
}
for (auto ac : {l_ac, p_ac, q_ac}) {
for (float f : ac) {
uint8_t u = static_cast<uint8_t>(std::round(15.0f * f));
if (is_odd) {
*hash.rbegin() |= u << 4;
}
else {
hash.push_back(u);
}
is_odd = !is_odd;
}
}
if (has_alpha) {
for (float f : a_ac) {
uint8_t u = static_cast<uint8_t>(std::round(15.0f * f));
if (is_odd) {
*hash.rbegin() |= u << 4;
}
else {
hash.push_back(u);
}
is_odd = !is_odd;
}
}

if (hash.size() <= e->dst_len) {
std::copy(hash.begin(), hash.end(), e->dst);
}
else {
return -1;
}
return hash.size();
}

void thumbhash_encoder_release(thumbhash_encoder e)
{
delete e;
}
52 changes: 52 additions & 0 deletions thumbhash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package lilliput

// #cgo CFLAGS: -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx
// #cgo darwin CFLAGS: -I${SRCDIR}/deps/osx/include
// #cgo linux CFLAGS: -I${SRCDIR}/deps/linux/include
// #cgo CXXFLAGS: -std=c++11
// #cgo darwin CXXFLAGS: -I${SRCDIR}/deps/osx/include
// #cgo linux CXXFLAGS: -I${SRCDIR}/deps/linux/include
// #cgo LDFLAGS: -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -ljpeg -lpng -lwebp -lippicv -lz
// #cgo darwin LDFLAGS: -L${SRCDIR}/deps/osx/lib -L${SRCDIR}/deps/osx/share/OpenCV/3rdparty/lib
// #cgo linux LDFLAGS: -L${SRCDIR}/deps/linux/lib -L${SRCDIR}/deps/linux/share/OpenCV/3rdparty/lib
// #include "thumbhash.hpp"
import "C"

import (
"io"
"unsafe"
)

type thumbhashEncoder struct {
encoder C.thumbhash_encoder
buf []byte
}

func newThumbhashEncoder(decodedBy Decoder, buf []byte) (*thumbhashEncoder, error) {
buf = buf[:1]
enc := C.thumbhash_encoder_create(unsafe.Pointer(&buf[0]), C.size_t(cap(buf)))
if enc == nil {
return nil, ErrBufTooSmall
}
return &thumbhashEncoder{
encoder: enc,
buf: buf,
}, nil
}

func (e *thumbhashEncoder) Encode(f *Framebuffer, opt map[int]int) ([]byte, error) {
if f == nil {
return nil, io.EOF
}

length := C.thumbhash_encoder_encode(e.encoder, f.mat)
if length <= 0 {
return nil, ErrInvalidImage
}

return e.buf[:length], nil
}

func (e *thumbhashEncoder) Close() {
C.thumbhash_encoder_release(e.encoder)
}
20 changes: 20 additions & 0 deletions thumbhash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef LILLIPUT_THUMBHASH_HPP
#define LILLIPUT_THUMBHASH_HPP

#include "opencv.hpp"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct thumbhash_encoder_struct* thumbhash_encoder;

thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len);
int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opqaue_frame);
void thumbhash_encoder_release(thumbhash_encoder e);

#ifdef __cplusplus
}
#endif

#endif
Loading

0 comments on commit b0c9f4f

Please sign in to comment.