Update README.md
[tar-legacy.git] / MCDV / stb_dxt.h
1 // stb_dxt.h - v1.08b - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 // call stb_compress_dxt_block() for every block (you must pad)
7 // source should be a 4x4 block of RGBA data in row-major order;
8 // A is ignored if you specify alpha=0; you can turn on dithering
9 // and "high quality" using mode.
10 //
11 // version history:
12 // v1.08 - (sbt) fix bug in dxt-with-alpha block
13 // v1.07 - (stb) bc4; allow not using libc; add STB_DXT_STATIC
14 // v1.06 - (stb) fix to known-broken 1.05
15 // v1.05 - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
16 // v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
17 // single color match fix (allow for inexact color interpolation);
18 // optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
19 // v1.03 - (stb) endianness support
20 // v1.02 - (stb) fix alpha encoding bug
21 // v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
22 // v1.00 - (stb) first release
23 //
24 // contributors:
25 // Kevin Schmidt (#defines for "freestanding" compilation)
26 // github:ppiastucki (BC4 support)
27 //
28 // LICENSE
29 //
30 // See end of file for license information.
31
32 #ifndef STB_INCLUDE_STB_DXT_H
33 #define STB_INCLUDE_STB_DXT_H
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 #ifdef STB_DXT_STATIC
40 #define STBDDEF static
41 #else
42 #define STBDDEF extern
43 #endif
44
45 // compression mode (bitflags)
46 #define STB_DXT_NORMAL 0
47 #define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
48 #define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
49
50 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
51 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
52 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
53
54 #define STB_COMPRESS_DXT_BLOCK
55
56 #ifdef __cplusplus
57 }
58 #endif
59 #endif // STB_INCLUDE_STB_DXT_H
60
61 #ifdef STB_DXT_IMPLEMENTATION
62
63 // configuration options for DXT encoder. set them in the project/makefile or just define
64 // them at the top.
65
66 // STB_DXT_USE_ROUNDING_BIAS
67 // use a rounding bias during color interpolation. this is closer to what "ideal"
68 // interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
69 // implicitly had this turned on.
70 //
71 // in case you're targeting a specific type of hardware (e.g. console programmers):
72 // NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
73 // to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
74 // you also see "(a*5 + b*3) / 8" on some old GPU designs.
75 // #define STB_DXT_USE_ROUNDING_BIAS
76
77 #include <stdlib.h>
78
79 #if !defined(STBD_ABS) || !defined(STBI_FABS)
80 #include <math.h>
81 #endif
82
83 #ifndef STBD_ABS
84 #define STBD_ABS(i) abs(i)
85 #endif
86
87 #ifndef STBD_FABS
88 #define STBD_FABS(x) fabs(x)
89 #endif
90
91 #ifndef STBD_MEMSET
92 #include <string.h>
93 #define STBD_MEMSET memset
94 #endif
95
96 static unsigned char stb__Expand5[32];
97 static unsigned char stb__Expand6[64];
98 static unsigned char stb__OMatch5[256][2];
99 static unsigned char stb__OMatch6[256][2];
100 static unsigned char stb__QuantRBTab[256 + 16];
101 static unsigned char stb__QuantGTab[256 + 16];
102
103 static int stb__Mul8Bit(int a, int b)
104 {
105 int t = a * b + 128;
106 return (t + (t >> 8)) >> 8;
107 }
108
109 static void stb__From16Bit(unsigned char *out, unsigned short v)
110 {
111 int rv = (v & 0xf800) >> 11;
112 int gv = (v & 0x07e0) >> 5;
113 int bv = (v & 0x001f) >> 0;
114
115 out[0] = stb__Expand5[rv];
116 out[1] = stb__Expand6[gv];
117 out[2] = stb__Expand5[bv];
118 out[3] = 0;
119 }
120
121 static unsigned short stb__As16Bit(int r, int g, int b)
122 {
123 return (stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) + stb__Mul8Bit(b, 31);
124 }
125
126 // linear interpolation at 1/3 point between a and b, using desired rounding type
127 static int stb__Lerp13(int a, int b)
128 {
129 #ifdef STB_DXT_USE_ROUNDING_BIAS
130 // with rounding bias
131 return a + stb__Mul8Bit(b - a, 0x55);
132 #else
133 // without rounding bias
134 // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
135 return (2 * a + b) / 3;
136 #endif
137 }
138
139 // lerp RGB color
140 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
141 {
142 out[0] = stb__Lerp13(p1[0], p2[0]);
143 out[1] = stb__Lerp13(p1[1], p2[1]);
144 out[2] = stb__Lerp13(p1[2], p2[2]);
145 }
146
147 /****************************************************************************/
148
149 // compute table to reproduce constant colors as accurately as possible
150 static void stb__PrepareOptTable(unsigned char *Table, const unsigned char *expand, int size)
151 {
152 int i, mn, mx;
153 for (i = 0; i<256; i++) {
154 int bestErr = 256;
155 for (mn = 0; mn<size; mn++) {
156 for (mx = 0; mx<size; mx++) {
157 int mine = expand[mn];
158 int maxe = expand[mx];
159 int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
160
161 // DX10 spec says that interpolation must be within 3% of "correct" result,
162 // add this as error term. (normally we'd expect a random distribution of
163 // +-1.5% error, but nowhere in the spec does it say that the error has to be
164 // unbiased - better safe than sorry).
165 err += STBD_ABS(maxe - mine) * 3 / 100;
166
167 if (err < bestErr)
168 {
169 Table[i * 2 + 0] = mx;
170 Table[i * 2 + 1] = mn;
171 bestErr = err;
172 }
173 }
174 }
175 }
176 }
177
178 static void stb__EvalColors(unsigned char *color, unsigned short c0, unsigned short c1)
179 {
180 stb__From16Bit(color + 0, c0);
181 stb__From16Bit(color + 4, c1);
182 stb__Lerp13RGB(color + 8, color + 0, color + 4);
183 stb__Lerp13RGB(color + 12, color + 4, color + 0);
184 }
185
186 // Block dithering function. Simply dithers a block to 565 RGB.
187 // (Floyd-Steinberg)
188 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
189 {
190 int err[8], *ep1 = err, *ep2 = err + 4, *et;
191 int ch, y;
192
193 // process channels separately
194 for (ch = 0; ch<3; ++ch) {
195 unsigned char *bp = block + ch, *dp = dest + ch;
196 unsigned char *quant = (ch == 1) ? stb__QuantGTab + 8 : stb__QuantRBTab + 8;
197 STBD_MEMSET(err, 0, sizeof(err));
198 for (y = 0; y<4; ++y) {
199 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)];
200 ep1[0] = bp[0] - dp[0];
201 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)];
202 ep1[1] = bp[4] - dp[4];
203 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)];
204 ep1[2] = bp[8] - dp[8];
205 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)];
206 ep1[3] = bp[12] - dp[12];
207 bp += 16;
208 dp += 16;
209 et = ep1, ep1 = ep2, ep2 = et; // swap
210 }
211 }
212 }
213
214 // The color matching function
215 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color, int dither)
216 {
217 unsigned int mask = 0;
218 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
219 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
220 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
221 int dots[16];
222 int stops[4];
223 int i;
224 int c0Point, halfPoint, c3Point;
225
226 for (i = 0; i<16; i++)
227 dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
228
229 for (i = 0; i<4; i++)
230 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
231
232 // think of the colors as arranged on a line; project point onto that line, then choose
233 // next color out of available ones. we compute the crossover points for "best color in top
234 // half"/"best in bottom half" and then the same inside that subinterval.
235 //
236 // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
237 // but it's very close and a lot faster.
238 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
239
240 c0Point = (stops[1] + stops[3]) >> 1;
241 halfPoint = (stops[3] + stops[2]) >> 1;
242 c3Point = (stops[2] + stops[0]) >> 1;
243
244 if (!dither) {
245 // the version without dithering is straightforward
246 for (i = 15; i >= 0; i--) {
247 int dot = dots[i];
248 mask <<= 2;
249
250 if (dot < halfPoint)
251 mask |= (dot < c0Point) ? 1 : 3;
252 else
253 mask |= (dot < c3Point) ? 2 : 0;
254 }
255 }
256 else {
257 // with floyd-steinberg dithering
258 int err[8], *ep1 = err, *ep2 = err + 4;
259 int *dp = dots, y;
260
261 c0Point <<= 4;
262 halfPoint <<= 4;
263 c3Point <<= 4;
264 for (i = 0; i<8; i++)
265 err[i] = 0;
266
267 for (y = 0; y<4; y++)
268 {
269 int dot, lmask, step;
270
271 dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]);
272 if (dot < halfPoint)
273 step = (dot < c0Point) ? 1 : 3;
274 else
275 step = (dot < c3Point) ? 2 : 0;
276 ep1[0] = dp[0] - stops[step];
277 lmask = step;
278
279 dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]);
280 if (dot < halfPoint)
281 step = (dot < c0Point) ? 1 : 3;
282 else
283 step = (dot < c3Point) ? 2 : 0;
284 ep1[1] = dp[1] - stops[step];
285 lmask |= step << 2;
286
287 dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]);
288 if (dot < halfPoint)
289 step = (dot < c0Point) ? 1 : 3;
290 else
291 step = (dot < c3Point) ? 2 : 0;
292 ep1[2] = dp[2] - stops[step];
293 lmask |= step << 4;
294
295 dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]);
296 if (dot < halfPoint)
297 step = (dot < c0Point) ? 1 : 3;
298 else
299 step = (dot < c3Point) ? 2 : 0;
300 ep1[3] = dp[3] - stops[step];
301 lmask |= step << 6;
302
303 dp += 4;
304 mask |= lmask << (y * 8);
305 { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
306 }
307 }
308
309 return mask;
310 }
311
312 // The color optimization function. (Clever code, part 1)
313 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
314 {
315 int mind = 0x7fffffff, maxd = -0x7fffffff;
316 unsigned char *minp, *maxp;
317 double magn;
318 int v_r, v_g, v_b;
319 static const int nIterPower = 4;
320 float covf[6], vfr, vfg, vfb;
321
322 // determine color distribution
323 int cov[6];
324 int mu[3], min[3], max[3];
325 int ch, i, iter;
326
327 for (ch = 0; ch<3; ch++)
328 {
329 const unsigned char *bp = ((const unsigned char *)block) + ch;
330 int muv, minv, maxv;
331
332 muv = minv = maxv = bp[0];
333 for (i = 4; i<64; i += 4)
334 {
335 muv += bp[i];
336 if (bp[i] < minv) minv = bp[i];
337 else if (bp[i] > maxv) maxv = bp[i];
338 }
339
340 mu[ch] = (muv + 8) >> 4;
341 min[ch] = minv;
342 max[ch] = maxv;
343 }
344
345 // determine covariance matrix
346 for (i = 0; i<6; i++)
347 cov[i] = 0;
348
349 for (i = 0; i<16; i++)
350 {
351 int r = block[i * 4 + 0] - mu[0];
352 int g = block[i * 4 + 1] - mu[1];
353 int b = block[i * 4 + 2] - mu[2];
354
355 cov[0] += r * r;
356 cov[1] += r * g;
357 cov[2] += r * b;
358 cov[3] += g * g;
359 cov[4] += g * b;
360 cov[5] += b * b;
361 }
362
363 // convert covariance matrix to float, find principal axis via power iter
364 for (i = 0; i<6; i++)
365 covf[i] = cov[i] / 255.0f;
366
367 vfr = (float)(max[0] - min[0]);
368 vfg = (float)(max[1] - min[1]);
369 vfb = (float)(max[2] - min[2]);
370
371 for (iter = 0; iter<nIterPower; iter++)
372 {
373 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
374 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
375 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
376
377 vfr = r;
378 vfg = g;
379 vfb = b;
380 }
381
382 magn = STBD_FABS(vfr);
383 if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
384 if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
385
386 if (magn < 4.0f) { // too small, default to luminance
387 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
388 v_g = 587;
389 v_b = 114;
390 }
391 else {
392 magn = 512.0 / magn;
393 v_r = (int)(vfr * magn);
394 v_g = (int)(vfg * magn);
395 v_b = (int)(vfb * magn);
396 }
397
398 // Pick colors at extreme points
399 for (i = 0; i<16; i++)
400 {
401 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
402
403 if (dot < mind) {
404 mind = dot;
405 minp = block + i * 4;
406 }
407
408 if (dot > maxd) {
409 maxd = dot;
410 maxp = block + i * 4;
411 }
412 }
413
414 *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
415 *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
416 }
417
418 static int stb__sclamp(float y, int p0, int p1)
419 {
420 int x = (int)y;
421 if (x < p0) return p0;
422 if (x > p1) return p1;
423 return x;
424 }
425
426 // The refinement function. (Clever code, part 2)
427 // Tries to optimize colors to suit block contents better.
428 // (By solving a least squares system via normal equations+Cramer's rule)
429 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
430 {
431 static const int w1Tab[4] = { 3,0,2,1 };
432 static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
433 // ^some magic to save a lot of multiplies in the accumulating loop...
434 // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
435
436 float frb, fg;
437 unsigned short oldMin, oldMax, min16, max16;
438 int i, akku = 0, xx, xy, yy;
439 int At1_r, At1_g, At1_b;
440 int At2_r, At2_g, At2_b;
441 unsigned int cm = mask;
442
443 oldMin = *pmin16;
444 oldMax = *pmax16;
445
446 if ((mask ^ (mask << 2)) < 4) // all pixels have the same index?
447 {
448 // yes, linear system would be singular; solve using optimal
449 // single-color match on average color
450 int r = 8, g = 8, b = 8;
451 for (i = 0; i<16; ++i) {
452 r += block[i * 4 + 0];
453 g += block[i * 4 + 1];
454 b += block[i * 4 + 2];
455 }
456
457 r >>= 4; g >>= 4; b >>= 4;
458
459 max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0];
460 min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1];
461 }
462 else {
463 At1_r = At1_g = At1_b = 0;
464 At2_r = At2_g = At2_b = 0;
465 for (i = 0; i<16; ++i, cm >>= 2) {
466 int step = cm & 3;
467 int w1 = w1Tab[step];
468 int r = block[i * 4 + 0];
469 int g = block[i * 4 + 1];
470 int b = block[i * 4 + 2];
471
472 akku += prods[step];
473 At1_r += w1 * r;
474 At1_g += w1 * g;
475 At1_b += w1 * b;
476 At2_r += r;
477 At2_g += g;
478 At2_b += b;
479 }
480
481 At2_r = 3 * At2_r - At1_r;
482 At2_g = 3 * At2_g - At1_g;
483 At2_b = 3 * At2_b - At1_b;
484
485 // extract solutions and decide solvability
486 xx = akku >> 16;
487 yy = (akku >> 8) & 0xff;
488 xy = (akku >> 0) & 0xff;
489
490 frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy * xy);
491 fg = frb * 63.0f / 31.0f;
492
493 // solve.
494 max16 = stb__sclamp((At1_r*yy - At2_r * xy)*frb + 0.5f, 0, 31) << 11;
495 max16 |= stb__sclamp((At1_g*yy - At2_g * xy)*fg + 0.5f, 0, 63) << 5;
496 max16 |= stb__sclamp((At1_b*yy - At2_b * xy)*frb + 0.5f, 0, 31) << 0;
497
498 min16 = stb__sclamp((At2_r*xx - At1_r * xy)*frb + 0.5f, 0, 31) << 11;
499 min16 |= stb__sclamp((At2_g*xx - At1_g * xy)*fg + 0.5f, 0, 63) << 5;
500 min16 |= stb__sclamp((At2_b*xx - At1_b * xy)*frb + 0.5f, 0, 31) << 0;
501 }
502
503 *pmin16 = min16;
504 *pmax16 = max16;
505 return oldMin != min16 || oldMax != max16;
506 }
507
508 // Color block compression
509 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
510 {
511 unsigned int mask;
512 int i;
513 int dither;
514 int refinecount;
515 unsigned short max16, min16;
516 unsigned char dblock[16 * 4], color[4 * 4];
517
518 dither = mode & STB_DXT_DITHER;
519 refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
520
521 // check if block is constant
522 for (i = 1; i<16; i++)
523 if (((unsigned int *)block)[i] != ((unsigned int *)block)[0])
524 break;
525
526 if (i == 16) { // constant color
527 int r = block[0], g = block[1], b = block[2];
528 mask = 0xaaaaaaaa;
529 max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0];
530 min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1];
531 }
532 else {
533 // first step: compute dithered version for PCA if desired
534 if (dither)
535 stb__DitherBlock(dblock, block);
536
537 // second step: pca+map along principal axis
538 stb__OptimizeColorsBlock(dither ? dblock : block, &max16, &min16);
539 if (max16 != min16) {
540 stb__EvalColors(color, max16, min16);
541 mask = stb__MatchColorsBlock(block, color, dither);
542 }
543 else
544 mask = 0;
545
546 // third step: refine (multiple times if requested)
547 for (i = 0; i<refinecount; i++) {
548 unsigned int lastmask = mask;
549
550 if (stb__RefineBlock(dither ? dblock : block, &max16, &min16, mask)) {
551 if (max16 != min16) {
552 stb__EvalColors(color, max16, min16);
553 mask = stb__MatchColorsBlock(block, color, dither);
554 }
555 else {
556 mask = 0;
557 break;
558 }
559 }
560
561 if (mask == lastmask)
562 break;
563 }
564 }
565
566 // write the color block
567 if (max16 < min16)
568 {
569 unsigned short t = min16;
570 min16 = max16;
571 max16 = t;
572 mask ^= 0x55555555;
573 }
574
575 dest[0] = (unsigned char)(max16);
576 dest[1] = (unsigned char)(max16 >> 8);
577 dest[2] = (unsigned char)(min16);
578 dest[3] = (unsigned char)(min16 >> 8);
579 dest[4] = (unsigned char)(mask);
580 dest[5] = (unsigned char)(mask >> 8);
581 dest[6] = (unsigned char)(mask >> 16);
582 dest[7] = (unsigned char)(mask >> 24);
583 }
584
585 // Alpha block compression (this is easy for a change)
586 static void stb__CompressAlphaBlock(unsigned char *dest, unsigned char *src, int stride)
587 {
588 int i, dist, bias, dist4, dist2, bits, mask;
589
590 // find min/max color
591 int mn, mx;
592 mn = mx = src[0];
593
594 for (i = 1; i<16; i++)
595 {
596 if (src[i*stride] < mn) mn = src[i*stride];
597 else if (src[i*stride] > mx) mx = src[i*stride];
598 }
599
600 // encode them
601 ((unsigned char *)dest)[0] = mx;
602 ((unsigned char *)dest)[1] = mn;
603 dest += 2;
604
605 // determine bias and emit color indices
606 // given the choice of mx/mn, these indices are optimal:
607 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
608 dist = mx - mn;
609 dist4 = dist * 4;
610 dist2 = dist * 2;
611 bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
612 bias -= mn * 7;
613 bits = 0, mask = 0;
614
615 for (i = 0; i<16; i++) {
616 int a = src[i*stride] * 7 + bias;
617 int ind, t;
618
619 // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
620 t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
621 t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
622 ind += (a >= dist);
623
624 // turn linear scale into DXT index (0/1 are extremal pts)
625 ind = -ind & 7;
626 ind ^= (2 > ind);
627
628 // write index
629 mask |= ind << bits;
630 if ((bits += 3) >= 8) {
631 *dest++ = mask;
632 mask >>= 8;
633 bits -= 8;
634 }
635 }
636 }
637
638 static void stb__InitDXT()
639 {
640 int i;
641 for (i = 0; i<32; i++)
642 stb__Expand5[i] = (i << 3) | (i >> 2);
643
644 for (i = 0; i<64; i++)
645 stb__Expand6[i] = (i << 2) | (i >> 4);
646
647 for (i = 0; i<256 + 16; i++)
648 {
649 int v = i - 8 < 0 ? 0 : i - 8 > 255 ? 255 : i - 8;
650 stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v, 31)];
651 stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v, 63)];
652 }
653
654 stb__PrepareOptTable(&stb__OMatch5[0][0], stb__Expand5, 32);
655 stb__PrepareOptTable(&stb__OMatch6[0][0], stb__Expand6, 64);
656 }
657
658 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
659 {
660 unsigned char data[16][4];
661 static int init = 1;
662 if (init) {
663 stb__InitDXT();
664 init = 0;
665 }
666
667 if (alpha) {
668 int i;
669 stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4);
670 dest += 8;
671 // make a new copy of the data in which alpha is opaque,
672 // because code uses a fast test for color constancy
673 memcpy(data, src, 4 * 16);
674 for (i = 0; i < 16; ++i)
675 data[i][3] = 255;
676 src = &data[0][0];
677 }
678
679 stb__CompressColorBlock(dest, (unsigned char*)src, mode);
680 }
681
682 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
683 {
684 stb__CompressAlphaBlock(dest, (unsigned char*)src, 1);
685 }
686
687 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
688 {
689 stb__CompressAlphaBlock(dest, (unsigned char*)src, 2);
690 stb__CompressAlphaBlock(dest + 8, (unsigned char*)src + 1, 2);
691 }
692 #endif // STB_DXT_IMPLEMENTATION
693
694 /*
695 ------------------------------------------------------------------------------
696 This software is available under 2 licenses -- choose whichever you prefer.
697 ------------------------------------------------------------------------------
698 ALTERNATIVE A - MIT License
699 Copyright (c) 2017 Sean Barrett
700 Permission is hereby granted, free of charge, to any person obtaining a copy of
701 this software and associated documentation files (the "Software"), to deal in
702 the Software without restriction, including without limitation the rights to
703 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
704 of the Software, and to permit persons to whom the Software is furnished to do
705 so, subject to the following conditions:
706 The above copyright notice and this permission notice shall be included in all
707 copies or substantial portions of the Software.
708 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
709 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
710 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
711 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
712 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
713 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
714 SOFTWARE.
715 ------------------------------------------------------------------------------
716 ALTERNATIVE B - Public Domain (www.unlicense.org)
717 This is free and unencumbered software released into the public domain.
718 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
719 software, either in source code form or as a compiled binary, for any purpose,
720 commercial or non-commercial, and by any means.
721 In jurisdictions that recognize copyright laws, the author or authors of this
722 software dedicate any and all copyright interest in the software to the public
723 domain. We make this dedication for the benefit of the public at large and to
724 the detriment of our heirs and successors. We intend this dedication to be an
725 overt act of relinquishment in perpetuity of all present and future rights to
726 this software under copyright law.
727 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
728 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
729 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
730 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
731 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
732 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
733 ------------------------------------------------------------------------------
734 */