|
| 1 | +#include "thumbhash.hpp" |
| 2 | +#include <stdbool.h> |
| 3 | +#include <vector> |
| 4 | +#include <cmath> |
| 5 | +#include <algorithm> |
| 6 | +#include <tuple> |
| 7 | + |
| 8 | +static constexpr size_t MAX_DIMENSION = 100; |
| 9 | +static constexpr float PI = 3.14159265f; |
| 10 | + |
| 11 | +struct thumbhash_encoder_struct { |
| 12 | + uint8_t* dst; |
| 13 | + size_t dst_len; |
| 14 | +}; |
| 15 | + |
| 16 | +thumbhash_encoder thumbhash_encoder_create(void* buf, size_t buf_len) |
| 17 | +{ |
| 18 | + thumbhash_encoder e = new struct thumbhash_encoder_struct(); |
| 19 | + if (!e) { |
| 20 | + return NULL; |
| 21 | + } |
| 22 | + memset(e, 0, sizeof(struct thumbhash_encoder_struct)); |
| 23 | + e->dst = (uint8_t*)(buf); |
| 24 | + e->dst_len = buf_len; |
| 25 | + |
| 26 | + return e; |
| 27 | +} |
| 28 | + |
| 29 | +static std::tuple<float, std::vector<float>, float> encode_channel( |
| 30 | + const std::vector<float>& channel, |
| 31 | + size_t nx, |
| 32 | + size_t ny, |
| 33 | + size_t w, |
| 34 | + size_t h) |
| 35 | +{ |
| 36 | + float dc = 0.0f; |
| 37 | + std::vector<float> ac; |
| 38 | + ac.reserve(nx * ny / 2); |
| 39 | + float scale = 0.0f; |
| 40 | + std::vector<float> fx(w, 0.0f); |
| 41 | + for (size_t cy = 0; cy < ny; ++cy) { |
| 42 | + size_t cx = 0; |
| 43 | + while (cx * ny < nx * (ny - cy)) { |
| 44 | + float f = 0.0f; |
| 45 | + for (size_t x = 0; x < w; ++x) { |
| 46 | + fx[x] = cos(PI / static_cast<float>(w) * static_cast<float>(cx) * |
| 47 | + (static_cast<float>(x) + 0.5f)); |
| 48 | + } |
| 49 | + for (size_t y = 0; y < h; ++y) { |
| 50 | + float fy = cos(PI / static_cast<float>(h) * static_cast<float>(cy) * |
| 51 | + (static_cast<float>(y) + 0.5f)); |
| 52 | + for (size_t x = 0; x < w; ++x) { |
| 53 | + f += channel[x + y * w] * fx[x] * fy; |
| 54 | + } |
| 55 | + } |
| 56 | + f /= static_cast<float>(w * h); |
| 57 | + if (cx > 0 || cy > 0) { |
| 58 | + ac.push_back(f); |
| 59 | + scale = std::max(std::abs(f), scale); |
| 60 | + } |
| 61 | + else { |
| 62 | + dc = f; |
| 63 | + } |
| 64 | + cx += 1; |
| 65 | + } |
| 66 | + } |
| 67 | + if (scale > 0.0) { |
| 68 | + for (auto& ac_val : ac) { |
| 69 | + ac_val = 0.5f + 0.5f / scale * ac_val; |
| 70 | + } |
| 71 | + } |
| 72 | + return std::make_tuple(dc, ac, scale); |
| 73 | +} |
| 74 | + |
| 75 | +// This C++ thumbhash encode function is based on the rust reference |
| 76 | +// implementation found here: |
| 77 | +// |
| 78 | +// https://github.com/evanw/thumbhash/blob/main/rust/src/lib.rs |
| 79 | +// |
| 80 | +// We modified the logic in the following ways: |
| 81 | +// |
| 82 | +// - Make it work with OpenCV mat as input frame |
| 83 | +// - Handle images with or without an alpha channel |
| 84 | +// - Handle grayscale images |
| 85 | +// - Perform simple downscaling of large images. We don't need very many pixels |
| 86 | +// to get a good hash. |
| 87 | +int thumbhash_encoder_encode(thumbhash_encoder e, const opencv_mat opaque_frame) |
| 88 | +{ |
| 89 | + auto frame = static_cast<const cv::Mat*>(opaque_frame); |
| 90 | + |
| 91 | + size_t orig_w = frame->cols; |
| 92 | + size_t orig_h = frame->rows; |
| 93 | + size_t w = orig_w, h = orig_h; |
| 94 | + |
| 95 | + // We don't need very many pixels to get a good hash. Downsample the image |
| 96 | + // when its dimensions exceed the limit. |
| 97 | + if (orig_w > MAX_DIMENSION || orig_h > MAX_DIMENSION) { |
| 98 | + float aspect_ratio = static_cast<float>(orig_w) / orig_h; |
| 99 | + if (orig_w > orig_h) { |
| 100 | + w = MAX_DIMENSION; |
| 101 | + h = static_cast<size_t>(w / aspect_ratio); |
| 102 | + } |
| 103 | + else { |
| 104 | + h = MAX_DIMENSION; |
| 105 | + w = static_cast<size_t>(h * aspect_ratio); |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + float row_ratio = static_cast<float>(orig_h) / h; |
| 110 | + float col_ratio = static_cast<float>(orig_w) / w; |
| 111 | + |
| 112 | + bool has_alpha = false; |
| 113 | + std::vector<float> l, p, q, a; |
| 114 | + l.reserve(w * h); |
| 115 | + p.reserve(w * h); |
| 116 | + q.reserve(w * h); |
| 117 | + a.reserve(w * h); |
| 118 | + |
| 119 | + if (frame->type() == CV_8UC4) { |
| 120 | + float avg_r = 0.0; |
| 121 | + float avg_g = 0.0; |
| 122 | + float avg_b = 0.0; |
| 123 | + float avg_a = 0.0; |
| 124 | + |
| 125 | + // 4 channels (BGRA) |
| 126 | + for (int i = 0; i < h; ++i) { |
| 127 | + for (int j = 0; j < w; ++j) { |
| 128 | + size_t orig_i = static_cast<size_t>(i * row_ratio); |
| 129 | + size_t orig_j = static_cast<size_t>(j * col_ratio); |
| 130 | + const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j); |
| 131 | + float alpha = static_cast<float>(pixel[3]) / 255.0f; // A |
| 132 | + avg_b += (alpha / 255.0f) * static_cast<float>(pixel[0]); // B |
| 133 | + avg_g += (alpha / 255.0f) * static_cast<float>(pixel[1]); // G |
| 134 | + avg_r += (alpha / 255.0f) * static_cast<float>(pixel[2]); // R |
| 135 | + avg_a += alpha; |
| 136 | + } |
| 137 | + } |
| 138 | + if (avg_a > 0.0f) { |
| 139 | + avg_r /= avg_a; |
| 140 | + avg_g /= avg_a; |
| 141 | + avg_b /= avg_a; |
| 142 | + } |
| 143 | + has_alpha = avg_a < static_cast<float>(w * h); |
| 144 | + |
| 145 | + for (int i = 0; i < h; ++i) { |
| 146 | + for (int j = 0; j < w; ++j) { |
| 147 | + size_t orig_i = static_cast<size_t>(i * row_ratio); |
| 148 | + size_t orig_j = static_cast<size_t>(j * col_ratio); |
| 149 | + const cv::Vec4b& pixel = frame->at<cv::Vec4b>(orig_i, orig_j); |
| 150 | + float alpha = static_cast<float>(pixel[3]) / 255.0f; // A |
| 151 | + float b = |
| 152 | + avg_b * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[0]); // B |
| 153 | + float g = |
| 154 | + avg_g * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[1]); // G |
| 155 | + float r = |
| 156 | + avg_r * (1.0f - alpha) + (alpha / 255.0f) * static_cast<float>(pixel[2]); // R |
| 157 | + l.push_back((r + g + b) / 3.0f); |
| 158 | + p.push_back((r + g) / 2.0f - b); |
| 159 | + q.push_back(r - g); |
| 160 | + a.push_back(alpha); |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + else if (frame->type() == CV_8UC3) { |
| 165 | + // 3 channels (BGR) |
| 166 | + for (int i = 0; i < h; ++i) { |
| 167 | + for (int j = 0; j < w; ++j) { |
| 168 | + size_t orig_i = static_cast<size_t>(i * row_ratio); |
| 169 | + size_t orig_j = static_cast<size_t>(j * col_ratio); |
| 170 | + const cv::Vec3b& pixel = frame->at<cv::Vec3b>(orig_i, orig_j); |
| 171 | + float b = (1.0f / 255.0f) * static_cast<float>(pixel[0]); // B |
| 172 | + float g = (1.0f / 255.0f) * static_cast<float>(pixel[1]); // G |
| 173 | + float r = (1.0f / 255.0f) * static_cast<float>(pixel[2]); // R |
| 174 | + l.push_back((r + g + b) / 3.0f); |
| 175 | + p.push_back((r + g) / 2.0f - b); |
| 176 | + q.push_back(r - g); |
| 177 | + a.push_back(1.0f); |
| 178 | + } |
| 179 | + } |
| 180 | + } |
| 181 | + else if (frame->type() == CV_8U) { |
| 182 | + for (int i = 0; i < h; ++i) { |
| 183 | + for (int j = 0; j < w; ++j) { |
| 184 | + size_t orig_i = static_cast<size_t>(i * row_ratio); |
| 185 | + size_t orig_j = static_cast<size_t>(j * col_ratio); |
| 186 | + uchar pixel = frame->at<uchar>(orig_i, orig_j); |
| 187 | + float l_val = static_cast<float>(pixel) / 255.0f; |
| 188 | + l.push_back(l_val); |
| 189 | + p.push_back(0.0f); |
| 190 | + q.push_back(0.0f); |
| 191 | + a.push_back(1.0f); |
| 192 | + } |
| 193 | + } |
| 194 | + } |
| 195 | + else { |
| 196 | + // Unsupported format |
| 197 | + return -1; |
| 198 | + } |
| 199 | + |
| 200 | + size_t l_limit = has_alpha ? 5 : 7; // Use fewer luminance bits if there's alpha |
| 201 | + |
| 202 | + size_t lx = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * w) / |
| 203 | + static_cast<float>(std::max(w, h)))), |
| 204 | + static_cast<size_t>(1)); |
| 205 | + size_t ly = std::max(static_cast<size_t>(std::round(static_cast<float>(l_limit * h) / |
| 206 | + static_cast<float>(std::max(w, h)))), |
| 207 | + static_cast<size_t>(1)); |
| 208 | + |
| 209 | + float l_dc, l_scale, p_dc, p_scale, q_dc, q_scale, a_dc, a_scale; |
| 210 | + std::vector<float> l_ac, p_ac, q_ac, a_ac; |
| 211 | + std::tie(l_dc, l_ac, l_scale) = encode_channel( |
| 212 | + l, std::max(lx, static_cast<size_t>(3)), std::max(ly, static_cast<size_t>(3)), w, h); |
| 213 | + std::tie(p_dc, p_ac, p_scale) = encode_channel(p, 3, 3, w, h); |
| 214 | + std::tie(q_dc, q_ac, q_scale) = encode_channel(q, 3, 3, w, h); |
| 215 | + if (has_alpha) { |
| 216 | + std::tie(a_dc, a_ac, a_scale) = encode_channel(a, 5, 5, w, h); |
| 217 | + } |
| 218 | + else { |
| 219 | + a_dc = 1.0f; |
| 220 | + a_scale = 1.0f; |
| 221 | + } |
| 222 | + |
| 223 | + bool is_landscape = w > h; |
| 224 | + uint32_t header24 = static_cast<uint32_t>(std::round(63.0f * l_dc)) | |
| 225 | + (static_cast<uint32_t>(std::round(31.5f + 31.5f * p_dc)) << 6) | |
| 226 | + (static_cast<uint32_t>(std::round(31.5f + 31.5f * q_dc)) << 12) | |
| 227 | + (static_cast<uint32_t>(std::round(31.0f * l_scale)) << 18) | (has_alpha ? 1 << 23 : 0); |
| 228 | + uint16_t header16 = static_cast<uint16_t>(is_landscape ? ly : lx) | |
| 229 | + (static_cast<uint16_t>(std::round(63.0f * p_scale)) << 3) | |
| 230 | + (static_cast<uint16_t>(std::round(63.0f * q_scale)) << 9) | (is_landscape ? 1 << 15 : 0); |
| 231 | + |
| 232 | + std::vector<uint8_t> hash; |
| 233 | + hash.reserve(25); |
| 234 | + |
| 235 | + hash.push_back(header24 & 255); |
| 236 | + hash.push_back((header24 >> 8) & 255); |
| 237 | + hash.push_back(header24 >> 16); |
| 238 | + hash.push_back(header16 & 255); |
| 239 | + hash.push_back(header16 >> 8); |
| 240 | + bool is_odd = false; |
| 241 | + if (has_alpha) { |
| 242 | + hash.push_back(static_cast<uint8_t>(std::round(15.0f * a_dc)) | |
| 243 | + (static_cast<uint8_t>(std::round(15.0f * a_scale)) << 4)); |
| 244 | + } |
| 245 | + for (auto ac : {l_ac, p_ac, q_ac}) { |
| 246 | + for (float f : ac) { |
| 247 | + uint8_t u = static_cast<uint8_t>(std::round(15.0f * f)); |
| 248 | + if (is_odd) { |
| 249 | + *hash.rbegin() |= u << 4; |
| 250 | + } |
| 251 | + else { |
| 252 | + hash.push_back(u); |
| 253 | + } |
| 254 | + is_odd = !is_odd; |
| 255 | + } |
| 256 | + } |
| 257 | + if (has_alpha) { |
| 258 | + for (float f : a_ac) { |
| 259 | + uint8_t u = static_cast<uint8_t>(std::round(15.0f * f)); |
| 260 | + if (is_odd) { |
| 261 | + *hash.rbegin() |= u << 4; |
| 262 | + } |
| 263 | + else { |
| 264 | + hash.push_back(u); |
| 265 | + } |
| 266 | + is_odd = !is_odd; |
| 267 | + } |
| 268 | + } |
| 269 | + |
| 270 | + if (hash.size() <= e->dst_len) { |
| 271 | + std::copy(hash.begin(), hash.end(), e->dst); |
| 272 | + } |
| 273 | + else { |
| 274 | + return -1; |
| 275 | + } |
| 276 | + return hash.size(); |
| 277 | +} |
| 278 | + |
| 279 | +void thumbhash_encoder_release(thumbhash_encoder e) |
| 280 | +{ |
| 281 | + delete e; |
| 282 | +} |
0 commit comments