diff --git a/data/coast.jpg b/data/coast.jpg new file mode 100644 index 00000000..2427318d Binary files /dev/null and b/data/coast.jpg differ diff --git a/data/fall.jpg b/data/fall.jpg new file mode 100644 index 00000000..5ebf25f4 Binary files /dev/null and b/data/fall.jpg differ diff --git a/data/field.jpg b/data/field.jpg new file mode 100644 index 00000000..82ebf791 Binary files /dev/null and b/data/field.jpg differ diff --git a/data/firefox-16bit-alpha.png b/data/firefox-16bit-alpha.png new file mode 100644 index 00000000..1d13e8d7 Binary files /dev/null and b/data/firefox-16bit-alpha.png differ diff --git a/data/firefox-16bit.png b/data/firefox-16bit.png new file mode 100644 index 00000000..86e3a0a4 Binary files /dev/null and b/data/firefox-16bit.png differ diff --git a/data/firefox-gray-alpha.webp b/data/firefox-gray-alpha.webp new file mode 100644 index 00000000..3d30e2b1 Binary files /dev/null and b/data/firefox-gray-alpha.webp differ diff --git a/data/firefox-gray.jpg b/data/firefox-gray.jpg new file mode 100644 index 00000000..9a241616 Binary files /dev/null and b/data/firefox-gray.jpg differ diff --git a/data/firefox.png b/data/firefox.png new file mode 100644 index 00000000..9b7da5c0 Binary files /dev/null and b/data/firefox.png differ diff --git a/data/large-sunrise.jpg b/data/large-sunrise.jpg new file mode 100644 index 00000000..c89e143b Binary files /dev/null and b/data/large-sunrise.jpg differ diff --git a/data/mountain.jpg b/data/mountain.jpg new file mode 100644 index 00000000..b9a6cb4e Binary files /dev/null and b/data/mountain.jpg differ diff --git a/data/opera-gray-alpha.png b/data/opera-gray-alpha.png new file mode 100644 index 00000000..96877d6c Binary files /dev/null and b/data/opera-gray-alpha.png differ diff --git a/data/opera.png b/data/opera.png new file mode 100644 index 00000000..24f2bffb Binary files /dev/null and b/data/opera.png differ diff --git a/data/street.jpg b/data/street.jpg new file mode 100644 index 00000000..23bc42d6 Binary files /dev/null and b/data/street.jpg differ diff --git a/data/sunrise.jpg b/data/sunrise.jpg new file mode 100644 index 00000000..9f55d514 Binary files /dev/null and b/data/sunrise.jpg differ diff --git a/data/sunset.jpg b/data/sunset.jpg new file mode 100644 index 00000000..23b98c37 Binary files /dev/null and b/data/sunset.jpg differ diff --git a/lilliput.go b/lilliput.go index a6b7ed5c..974f63ad 100644 --- a/lilliput.go +++ b/lilliput.go @@ -108,5 +108,9 @@ func NewEncoder(ext string, decodedBy Decoder, dst []byte) (Encoder, error) { return nil, errors.New("Encoder cannot encode into video types") } + if strings.ToLower(ext) == ".thumbhash" { + return newThumbhashEncoder(decodedBy, dst) + } + return newOpenCVEncoder(ext, decodedBy, dst) } diff --git a/thumbhash.cpp b/thumbhash.cpp new file mode 100644 index 00000000..e1add5e8 --- /dev/null +++ b/thumbhash.cpp @@ -0,0 +1,282 @@ +#include "thumbhash.hpp" +#include +#include +#include +#include +#include + +static constexpr size_t MAX_DIMENSION = 100; +static constexpr float PI = 3.14159265f; + +struct thumbhash_encoder_struct { + uint8_t* dst; + size_t dst_len; +}; + +thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len) +{ + thumbhash_encoder e = new struct thumbhash_encoder_struct(); + if (!e) { + return NULL; + } + memset(e, 0, sizeof(struct thumbhash_encoder_struct)); + e->dst = (uint8_t*)(buf); + e->dst_len = buf_len; + + return e; +} + +static std::tuple, float> encode_channel( + const std::vector& channel, + size_t nx, + size_t ny, + size_t w, + size_t h) +{ + float dc = 0.0f; + std::vector ac; + ac.reserve(nx * ny / 2); + float scale = 0.0f; + std::vector fx(w, 0.0f); + for (size_t cy = 0; cy < ny; ++cy) { + size_t cx = 0; + while (cx * ny < nx * (ny - cy)) { + float f = 0.0f; + for (size_t x = 0; x < w; ++x) { + fx[x] = cos(PI / static_cast(w) * static_cast(cx) * + (static_cast(x) + 0.5f)); + } + for (size_t y = 0; y < h; ++y) { + float fy = cos(PI / static_cast(h) * static_cast(cy) * + (static_cast(y) + 0.5f)); + for (size_t x = 0; x < w; ++x) { + f += channel[x + y * w] * fx[x] * fy; + } + } + f /= static_cast(w * h); + if (cx > 0 || cy > 0) { + ac.push_back(f); + scale = std::max(std::abs(f), scale); + } + else { + dc = f; + } + cx += 1; + } + } + if (scale > 0.0) { + for (auto& ac_val : ac) { + ac_val = 0.5f + 0.5f / scale * ac_val; + } + } + return std::make_tuple(dc, ac, scale); +} + +// This C++ thumbhash encode function is based on the rust reference +// implementation found here: +// +// https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs +// +// We modified the logic in the following ways: +// +// - Make it work with OpenCV mat as input frame +// - Handle images with or without an alpha channel +// - Handle grayscale images +// - Perform simple downscaling of large images. We don't need very many pixels +// to get a good hash. +int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opaque_frame) +{ + auto frame = static_cast(opaque_frame); + + size_t orig_w = frame->cols; + size_t orig_h = frame->rows; + size_t w = orig_w, h = orig_h; + + // We don't need very many pixels to get a good hash. Downsample the image + // when its dimensions exceed the limit. + if (orig_w > MAX_DIMENSION || orig_h > MAX_DIMENSION) { + float aspect_ratio = static_cast(orig_w) / orig_h; + if (orig_w > orig_h) { + w = MAX_DIMENSION; + h = static_cast(w / aspect_ratio); + } + else { + h = MAX_DIMENSION; + w = static_cast(h * aspect_ratio); + } + } + + float row_ratio = static_cast(orig_h) / h; + float col_ratio = static_cast(orig_w) / w; + + bool has_alpha = false; + std::vector l, p, q, a; + l.reserve(w * h); + p.reserve(w * h); + q.reserve(w * h); + a.reserve(w * h); + + if (frame->type() == CV_8UC4) { + float avg_r = 0.0; + float avg_g = 0.0; + float avg_b = 0.0; + float avg_a = 0.0; + + // 4 channels (BGRA) + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; ++j) { + size_t orig_i = static_cast(i * row_ratio); + size_t orig_j = static_cast(j * col_ratio); + const cv::Vec4b& pixel = frame->at(orig_i, orig_j); + float alpha = static_cast(pixel[3]) / 255.0f; // A + avg_b += (alpha / 255.0f) * static_cast(pixel[0]); // B + avg_g += (alpha / 255.0f) * static_cast(pixel[1]); // G + avg_r += (alpha / 255.0f) * static_cast(pixel[2]); // R + avg_a += alpha; + } + } + if (avg_a > 0.0f) { + avg_r /= avg_a; + avg_g /= avg_a; + avg_b /= avg_a; + } + has_alpha = avg_a < static_cast(w * h); + + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; ++j) { + size_t orig_i = static_cast(i * row_ratio); + size_t orig_j = static_cast(j * col_ratio); + const cv::Vec4b& pixel = frame->at(orig_i, orig_j); + float alpha = static_cast(pixel[3]) / 255.0f; // A + float b = + avg_b * (1.0f - alpha) + (alpha / 255.0f) * static_cast(pixel[0]); // B + float g = + avg_g * (1.0f - alpha) + (alpha / 255.0f) * static_cast(pixel[1]); // G + float r = + avg_r * (1.0f - alpha) + (alpha / 255.0f) * static_cast(pixel[2]); // R + l.push_back((r + g + b) / 3.0f); + p.push_back((r + g) / 2.0f - b); + q.push_back(r - g); + a.push_back(alpha); + } + } + } + else if (frame->type() == CV_8UC3) { + // 3 channels (BGR) + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; ++j) { + size_t orig_i = static_cast(i * row_ratio); + size_t orig_j = static_cast(j * col_ratio); + const cv::Vec3b& pixel = frame->at(orig_i, orig_j); + float b = (1.0f / 255.0f) * static_cast(pixel[0]); // B + float g = (1.0f / 255.0f) * static_cast(pixel[1]); // G + float r = (1.0f / 255.0f) * static_cast(pixel[2]); // R + l.push_back((r + g + b) / 3.0f); + p.push_back((r + g) / 2.0f - b); + q.push_back(r - g); + a.push_back(1.0f); + } + } + } + else if (frame->type() == CV_8U) { + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; ++j) { + size_t orig_i = static_cast(i * row_ratio); + size_t orig_j = static_cast(j * col_ratio); + uchar pixel = frame->at(orig_i, orig_j); + float l_val = static_cast(pixel) / 255.0f; + l.push_back(l_val); + p.push_back(0.0f); + q.push_back(0.0f); + a.push_back(1.0f); + } + } + } + else { + // Unsupported format + return -1; + } + + size_t l_limit = has_alpha ? 5 : 7; // Use fewer luminance bits if there's alpha + + size_t lx = std::max(static_cast(std::round(static_cast(l_limit * w) / + static_cast(std::max(w, h)))), + static_cast(1)); + size_t ly = std::max(static_cast(std::round(static_cast(l_limit * h) / + static_cast(std::max(w, h)))), + static_cast(1)); + + float l_dc, l_scale, p_dc, p_scale, q_dc, q_scale, a_dc, a_scale; + std::vector l_ac, p_ac, q_ac, a_ac; + std::tie(l_dc, l_ac, l_scale) = encode_channel( + l, std::max(lx, static_cast(3)), std::max(ly, static_cast(3)), w, h); + std::tie(p_dc, p_ac, p_scale) = encode_channel(p, 3, 3, w, h); + std::tie(q_dc, q_ac, q_scale) = encode_channel(q, 3, 3, w, h); + if (has_alpha) { + std::tie(a_dc, a_ac, a_scale) = encode_channel(a, 5, 5, w, h); + } + else { + a_dc = 1.0f; + a_scale = 1.0f; + } + + bool is_landscape = w > h; + uint32_t header24 = static_cast(std::round(63.0f * l_dc)) | + (static_cast(std::round(31.5f + 31.5f * p_dc)) << 6) | + (static_cast(std::round(31.5f + 31.5f * q_dc)) << 12) | + (static_cast(std::round(31.0f * l_scale)) << 18) | (has_alpha ? 1 << 23 : 0); + uint16_t header16 = static_cast(is_landscape ? ly : lx) | + (static_cast(std::round(63.0f * p_scale)) << 3) | + (static_cast(std::round(63.0f * q_scale)) << 9) | (is_landscape ? 1 << 15 : 0); + + std::vector hash; + hash.reserve(25); + + hash.push_back(header24 & 255); + hash.push_back((header24 >> 8) & 255); + hash.push_back(header24 >> 16); + hash.push_back(header16 & 255); + hash.push_back(header16 >> 8); + bool is_odd = false; + if (has_alpha) { + hash.push_back(static_cast(std::round(15.0f * a_dc)) | + (static_cast(std::round(15.0f * a_scale)) << 4)); + } + for (auto ac : {l_ac, p_ac, q_ac}) { + for (float f : ac) { + uint8_t u = static_cast(std::round(15.0f * f)); + if (is_odd) { + *hash.rbegin() |= u << 4; + } + else { + hash.push_back(u); + } + is_odd = !is_odd; + } + } + if (has_alpha) { + for (float f : a_ac) { + uint8_t u = static_cast(std::round(15.0f * f)); + if (is_odd) { + *hash.rbegin() |= u << 4; + } + else { + hash.push_back(u); + } + is_odd = !is_odd; + } + } + + if (hash.size() <= e->dst_len) { + std::copy(hash.begin(), hash.end(), e->dst); + } + else { + return -1; + } + return hash.size(); +} + +void thumbhash_encoder_release(thumbhash_encoder e) +{ + delete e; +} diff --git a/thumbhash.go b/thumbhash.go new file mode 100644 index 00000000..3f6aa55b --- /dev/null +++ b/thumbhash.go @@ -0,0 +1,52 @@ +package lilliput + +// #cgo CFLAGS: -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx +// #cgo darwin CFLAGS: -I${SRCDIR}/deps/osx/include +// #cgo linux CFLAGS: -I${SRCDIR}/deps/linux/include +// #cgo CXXFLAGS: -std=c++11 +// #cgo darwin CXXFLAGS: -I${SRCDIR}/deps/osx/include +// #cgo linux CXXFLAGS: -I${SRCDIR}/deps/linux/include +// #cgo LDFLAGS: -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -ljpeg -lpng -lwebp -lippicv -lz +// #cgo darwin LDFLAGS: -L${SRCDIR}/deps/osx/lib -L${SRCDIR}/deps/osx/share/OpenCV/3rdparty/lib +// #cgo linux LDFLAGS: -L${SRCDIR}/deps/linux/lib -L${SRCDIR}/deps/linux/share/OpenCV/3rdparty/lib +// #include "thumbhash.hpp" +import "C" + +import ( + "io" + "unsafe" +) + +type thumbhashEncoder struct { + encoder C.thumbhash_encoder + buf []byte +} + +func newThumbhashEncoder(decodedBy Decoder, buf []byte) (*thumbhashEncoder, error) { + buf = buf[:1] + enc := C.thumbhash_encoder_create(unsafe.Pointer(&buf[0]), C.size_t(cap(buf))) + if enc == nil { + return nil, ErrBufTooSmall + } + return &thumbhashEncoder{ + encoder: enc, + buf: buf, + }, nil +} + +func (e *thumbhashEncoder) Encode(f *Framebuffer, opt map[int]int) ([]byte, error) { + if f == nil { + return nil, io.EOF + } + + length := C.thumbhash_encoder_encode(e.encoder, f.mat) + if length <= 0 { + return nil, ErrInvalidImage + } + + return e.buf[:length], nil +} + +func (e *thumbhashEncoder) Close() { + C.thumbhash_encoder_release(e.encoder) +} diff --git a/thumbhash.hpp b/thumbhash.hpp new file mode 100644 index 00000000..3b65499a --- /dev/null +++ b/thumbhash.hpp @@ -0,0 +1,20 @@ +#ifndef LILLIPUT_THUMBHASH_HPP +#define LILLIPUT_THUMBHASH_HPP + +#include "opencv.hpp" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct thumbhash_encoder_struct* thumbhash_encoder; + +thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len); +int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opqaue_frame); +void thumbhash_encoder_release(thumbhash_encoder e); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/thumbhash_test.go b/thumbhash_test.go new file mode 100644 index 00000000..ae2ee4f7 --- /dev/null +++ b/thumbhash_test.go @@ -0,0 +1,82 @@ +package lilliput + +import ( + "encoding/base64" + "io/ioutil" + "testing" +) + +func TestThumbhash(t *testing.T) { + checkImage := func(expectedB64Hash, filePath string, ops *ImageOps, dst []byte) { + inputBuf, err := ioutil.ReadFile(filePath) + if err != nil { + t.Fatalf("failed to read input file %q: %v", filePath, err) + } + + decoder, err := NewDecoder(inputBuf) + if err != nil { + t.Fatalf("error decoding image %q: %v", filePath, err) + } + defer decoder.Close() + + header, err := decoder.Header() + if err != nil { + t.Fatalf("error reading image header of %q: %v", filePath, err) + } + + opts := &ImageOptions{ + FileType: ".thumbhash", + Width: header.width, + Height: header.height, + ResizeMethod: ImageOpsNoResize, + NormalizeOrientation: true, + } + hash, err := ops.Transform(decoder, opts, dst) + if err != nil { + t.Fatalf("error transforming image %q: %v", filePath, err) + } + b64Hash := base64.StdEncoding.EncodeToString(hash) + + if b64Hash != expectedB64Hash { + t.Errorf("hash of %q is %q but should be %q", + filePath, b64Hash, expectedB64Hash) + } + } + + ops := NewImageOps(8192) + defer ops.Close() + dst := make([]byte, 0, 1024*1024) + + // These test images came from the demo page at: + // https://evanw.github.io/thumbhash/ + // + // The expected thumbhashes in the tests were generated using the reference + // rust implementation there. + // + // Note the thumbhashes for 'field.jpg' and 'opera.png' generated by the + // rust reference code were slightly different than the respective hashes in + // the demo page (presumably generated by the JS reference implementation). + // + // This is not very surprising given the heavy reliance on floating point + // math. The differences were likely rounding errors. The decoded images + // from those hashes were visually identical. + checkImage("1QcSHQRnh493V4dIh4eXh1h4kJUI", "data/sunrise.jpg", ops, dst) + checkImage("3PcNNYSFeXh/d3eld0iHZoZgVwh2", "data/sunset.jpg", ops, dst) + checkImage("3OcRJYB4d3h/iIeHeEh3eIhw+j3A", "data/field.jpg", ops, dst) + checkImage("HBkSHYSIeHiPiHh8eJd4eTN0EEQG", "data/fall.jpg", ops, dst) + checkImage("VggKDYAW6lZvdYd6d2iZh/p4GE/k", "data/street.jpg", ops, dst) + checkImage("2fcZFIB3iId/h3iJh4aIYJ2V8g==", "data/mountain.jpg", ops, dst) + checkImage("IQgSLYZ6iHePh4h1eFeHh4dwgwg3", "data/coast.jpg", ops, dst) + checkImage("YJqGPQw7sFlslqhFafSE+Q6oJ1h2iHB2Rw==", "data/firefox.png", ops, dst) + checkImage("mYqDBQQnxnj0JoLYdN7f8JhpuDeHiHdwZw==", "data/opera.png", ops, dst) + + // Test other image formats, bit depths, and color spaces. + checkImage("YJqGPQw7oFlslqhGafOE+Q6oJ1h2iHBlVw==", "data/firefox-16bit.png", ops, dst) + checkImage("YJqGPQw7sFlslqhFafSE+Q6oJ1h2iHB2Rw==", "data/firefox-16bit-alpha.png", ops, dst) + checkImage("FwgOBwAxOWl4l3aQpFiIN5iHBgAAAAAA", "data/firefox-gray.jpg", ops, dst) + checkImage("4AeKBQA7oFl7lqhmaDBp92yJJ1h2iHB2Rw==", "data/firefox-gray-alpha.webp", ops, dst) + checkImage("EwiCBQAnwnjzJpHIZAAAAAAAuDeHiHdwZw==", "data/opera-gray-alpha.png", ops, dst) + + // Test downsampling. + checkImage("VvYRNQRod3x3B4iHeHhYiHeAeQUo", "data/large-sunrise.jpg", ops, dst) +}