init
[convexer.git] / src / nbvtf / stb / stb_dxt.h
1 // stb_dxt.h - v1.10 - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 // call stb_compress_dxt_block() for every block (you must pad)
7 // source should be a 4x4 block of RGBA data in row-major order;
8 // Alpha channel is not stored if you specify alpha=0 (but you
9 // must supply some constant alpha in the alpha channel).
10 // You can turn on dithering and "high quality" using mode.
11 //
12 // version history:
13 // v1.10 - (i.c) various small quality improvements
14 // v1.09 - (stb) update documentation re: surprising alpha channel requirement
15 // v1.08 - (stb) fix bug in dxt-with-alpha block
16 // v1.07 - (stb) bc4; allow not using libc; add STB_DXT_STATIC
17 // v1.06 - (stb) fix to known-broken 1.05
18 // v1.05 - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
19 // v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
20 // single color match fix (allow for inexact color interpolation);
21 // optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
22 // v1.03 - (stb) endianness support
23 // v1.02 - (stb) fix alpha encoding bug
24 // v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
25 // v1.00 - (stb) first release
26 //
27 // contributors:
28 // Rich Geldreich (more accurate index selection)
29 // Kevin Schmidt (#defines for "freestanding" compilation)
30 // github:ppiastucki (BC4 support)
31 // Ignacio Castano - improve DXT endpoint quantization
32 //
33 // LICENSE
34 //
35 // See end of file for license information.
36
37 #ifndef STB_INCLUDE_STB_DXT_H
38 #define STB_INCLUDE_STB_DXT_H
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 #ifdef STB_DXT_STATIC
45 #define STBDDEF static
46 #else
47 #define STBDDEF extern
48 #endif
49
50 // compression mode (bitflags)
51 #define STB_DXT_NORMAL 0
52 #define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
53 #define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
54
55 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
56 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
57 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
58
59 #define STB_COMPRESS_DXT_BLOCK
60
61 #ifdef __cplusplus
62 }
63 #endif
64 #endif // STB_INCLUDE_STB_DXT_H
65
66 #ifdef STB_DXT_IMPLEMENTATION
67
68 // configuration options for DXT encoder. set them in the project/makefile or just define
69 // them at the top.
70
71 // STB_DXT_USE_ROUNDING_BIAS
72 // use a rounding bias during color interpolation. this is closer to what "ideal"
73 // interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
74 // implicitly had this turned on.
75 //
76 // in case you're targeting a specific type of hardware (e.g. console programmers):
77 // NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
78 // to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
79 // you also see "(a*5 + b*3) / 8" on some old GPU designs.
80 // #define STB_DXT_USE_ROUNDING_BIAS
81
82 #include <stdlib.h>
83
84 #if !defined(STBD_ABS) || !defined(STBI_FABS)
85 #include <math.h>
86 #endif
87
88 #ifndef STBD_ABS
89 #define STBD_ABS(i) abs(i)
90 #endif
91
92 #ifndef STBD_FABS
93 #define STBD_FABS(x) fabs(x)
94 #endif
95
96 #ifndef STBD_MEMSET
97 #include <string.h>
98 #define STBD_MEMSET memset
99 #endif
100
101 static unsigned char stb__Expand5[32];
102 static unsigned char stb__Expand6[64];
103 static unsigned char stb__OMatch5[256][2];
104 static unsigned char stb__OMatch6[256][2];
105 static unsigned char stb__QuantRBTab[256+16];
106 static unsigned char stb__QuantGTab[256+16];
107
108 static int stb__Mul8Bit(int a, int b)
109 {
110 int t = a*b + 128;
111 return (t + (t >> 8)) >> 8;
112 }
113
114 static void stb__From16Bit(unsigned char *out, unsigned short v)
115 {
116 int rv = (v & 0xf800) >> 11;
117 int gv = (v & 0x07e0) >> 5;
118 int bv = (v & 0x001f) >> 0;
119
120 out[0] = stb__Expand5[rv];
121 out[1] = stb__Expand6[gv];
122 out[2] = stb__Expand5[bv];
123 out[3] = 0;
124 }
125
126 static unsigned short stb__As16Bit(int r, int g, int b)
127 {
128 return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31));
129 }
130
131 // linear interpolation at 1/3 point between a and b, using desired rounding type
132 static int stb__Lerp13(int a, int b)
133 {
134 #ifdef STB_DXT_USE_ROUNDING_BIAS
135 // with rounding bias
136 return a + stb__Mul8Bit(b-a, 0x55);
137 #else
138 // without rounding bias
139 // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
140 return (2*a + b) / 3;
141 #endif
142 }
143
144 // lerp RGB color
145 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
146 {
147 out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
148 out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
149 out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
150 }
151
152 /****************************************************************************/
153
154 // compute table to reproduce constant colors as accurately as possible
155 static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
156 {
157 int i,mn,mx;
158 for (i=0;i<256;i++) {
159 int bestErr = 256;
160 for (mn=0;mn<size;mn++) {
161 for (mx=0;mx<size;mx++) {
162 int mine = expand[mn];
163 int maxe = expand[mx];
164 int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
165
166 // DX10 spec says that interpolation must be within 3% of "correct" result,
167 // add this as error term. (normally we'd expect a random distribution of
168 // +-1.5% error, but nowhere in the spec does it say that the error has to be
169 // unbiased - better safe than sorry).
170 err += STBD_ABS(maxe - mine) * 3 / 100;
171
172 if(err < bestErr)
173 {
174 Table[i*2+0] = (unsigned char)mx;
175 Table[i*2+1] = (unsigned char)mn;
176 bestErr = err;
177 }
178 }
179 }
180 }
181 }
182
183 static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
184 {
185 stb__From16Bit(color+ 0, c0);
186 stb__From16Bit(color+ 4, c1);
187 stb__Lerp13RGB(color+ 8, color+0, color+4);
188 stb__Lerp13RGB(color+12, color+4, color+0);
189 }
190
191 // Block dithering function. Simply dithers a block to 565 RGB.
192 // (Floyd-Steinberg)
193 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
194 {
195 int err[8],*ep1 = err,*ep2 = err+4, *et;
196 int ch,y;
197
198 // process channels separately
199 for (ch=0; ch<3; ++ch) {
200 unsigned char *bp = block+ch, *dp = dest+ch;
201 unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
202 STBD_MEMSET(err, 0, sizeof(err));
203 for(y=0; y<4; ++y) {
204 dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
205 ep1[0] = bp[ 0] - dp[ 0];
206 dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
207 ep1[1] = bp[ 4] - dp[ 4];
208 dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
209 ep1[2] = bp[ 8] - dp[ 8];
210 dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
211 ep1[3] = bp[12] - dp[12];
212 bp += 16;
213 dp += 16;
214 et = ep1, ep1 = ep2, ep2 = et; // swap
215 }
216 }
217 }
218
219 // The color matching function
220 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
221 {
222 unsigned int mask = 0;
223 int dirr = color[0*4+0] - color[1*4+0];
224 int dirg = color[0*4+1] - color[1*4+1];
225 int dirb = color[0*4+2] - color[1*4+2];
226 int dots[16];
227 int stops[4];
228 int i;
229 int c0Point, halfPoint, c3Point;
230
231 for(i=0;i<16;i++)
232 dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
233
234 for(i=0;i<4;i++)
235 stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
236
237 // think of the colors as arranged on a line; project point onto that line, then choose
238 // next color out of available ones. we compute the crossover points for "best color in top
239 // half"/"best in bottom half" and then the same inside that subinterval.
240 //
241 // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
242 // but it's very close and a lot faster.
243 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
244
245 c0Point = (stops[1] + stops[3]);
246 halfPoint = (stops[3] + stops[2]);
247 c3Point = (stops[2] + stops[0]);
248
249 if(!dither) {
250 // the version without dithering is straightforward
251 for (i=15;i>=0;i--) {
252 int dot = dots[i]*2;
253 mask <<= 2;
254
255 if(dot < halfPoint)
256 mask |= (dot < c0Point) ? 1 : 3;
257 else
258 mask |= (dot < c3Point) ? 2 : 0;
259 }
260 } else {
261 // with floyd-steinberg dithering
262 int err[8],*ep1 = err,*ep2 = err+4;
263 int *dp = dots, y;
264
265 c0Point <<= 3;
266 halfPoint <<= 3;
267 c3Point <<= 3;
268 for(i=0;i<8;i++)
269 err[i] = 0;
270
271 for(y=0;y<4;y++)
272 {
273 int dot,lmask,step;
274
275 dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
276 if(dot < halfPoint)
277 step = (dot < c0Point) ? 1 : 3;
278 else
279 step = (dot < c3Point) ? 2 : 0;
280 ep1[0] = dp[0] - stops[step];
281 lmask = step;
282
283 dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
284 if(dot < halfPoint)
285 step = (dot < c0Point) ? 1 : 3;
286 else
287 step = (dot < c3Point) ? 2 : 0;
288 ep1[1] = dp[1] - stops[step];
289 lmask |= step<<2;
290
291 dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
292 if(dot < halfPoint)
293 step = (dot < c0Point) ? 1 : 3;
294 else
295 step = (dot < c3Point) ? 2 : 0;
296 ep1[2] = dp[2] - stops[step];
297 lmask |= step<<4;
298
299 dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
300 if(dot < halfPoint)
301 step = (dot < c0Point) ? 1 : 3;
302 else
303 step = (dot < c3Point) ? 2 : 0;
304 ep1[3] = dp[3] - stops[step];
305 lmask |= step<<6;
306
307 dp += 4;
308 mask |= lmask << (y*8);
309 { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
310 }
311 }
312
313 return mask;
314 }
315
316 // The color optimization function. (Clever code, part 1)
317 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
318 {
319 int mind = 0x7fffffff,maxd = -0x7fffffff;
320 unsigned char *minp, *maxp;
321 double magn;
322 int v_r,v_g,v_b;
323 static const int nIterPower = 4;
324 float covf[6],vfr,vfg,vfb;
325
326 // determine color distribution
327 int cov[6];
328 int mu[3],min[3],max[3];
329 int ch,i,iter;
330
331 for(ch=0;ch<3;ch++)
332 {
333 const unsigned char *bp = ((const unsigned char *) block) + ch;
334 int muv,minv,maxv;
335
336 muv = minv = maxv = bp[0];
337 for(i=4;i<64;i+=4)
338 {
339 muv += bp[i];
340 if (bp[i] < minv) minv = bp[i];
341 else if (bp[i] > maxv) maxv = bp[i];
342 }
343
344 mu[ch] = (muv + 8) >> 4;
345 min[ch] = minv;
346 max[ch] = maxv;
347 }
348
349 // determine covariance matrix
350 for (i=0;i<6;i++)
351 cov[i] = 0;
352
353 for (i=0;i<16;i++)
354 {
355 int r = block[i*4+0] - mu[0];
356 int g = block[i*4+1] - mu[1];
357 int b = block[i*4+2] - mu[2];
358
359 cov[0] += r*r;
360 cov[1] += r*g;
361 cov[2] += r*b;
362 cov[3] += g*g;
363 cov[4] += g*b;
364 cov[5] += b*b;
365 }
366
367 // convert covariance matrix to float, find principal axis via power iter
368 for(i=0;i<6;i++)
369 covf[i] = cov[i] / 255.0f;
370
371 vfr = (float) (max[0] - min[0]);
372 vfg = (float) (max[1] - min[1]);
373 vfb = (float) (max[2] - min[2]);
374
375 for(iter=0;iter<nIterPower;iter++)
376 {
377 float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
378 float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
379 float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
380
381 vfr = r;
382 vfg = g;
383 vfb = b;
384 }
385
386 magn = STBD_FABS(vfr);
387 if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
388 if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
389
390 if(magn < 4.0f) { // too small, default to luminance
391 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
392 v_g = 587;
393 v_b = 114;
394 } else {
395 magn = 512.0 / magn;
396 v_r = (int) (vfr * magn);
397 v_g = (int) (vfg * magn);
398 v_b = (int) (vfb * magn);
399 }
400
401 // Pick colors at extreme points
402 for(i=0;i<16;i++)
403 {
404 int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
405
406 if (dot < mind) {
407 mind = dot;
408 minp = block+i*4;
409 }
410
411 if (dot > maxd) {
412 maxd = dot;
413 maxp = block+i*4;
414 }
415 }
416
417 *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
418 *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
419 }
420
421 static const float midpoints5[32] = {
422 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
423 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
424 };
425
426 static const float midpoints6[64] = {
427 0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
428 0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
429 0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
430 0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f
431 };
432
433 static unsigned short stb__Quantize5(float x)
434 {
435 unsigned short q;
436 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
437 q = (unsigned short)(x * 31);
438 q += (x > midpoints5[q]);
439 return q;
440 }
441
442 static unsigned short stb__Quantize6(float x)
443 {
444 unsigned short q;
445 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
446 q = (unsigned short)(x * 63);
447 q += (x > midpoints6[q]);
448 return q;
449 }
450
451 // The refinement function. (Clever code, part 2)
452 // Tries to optimize colors to suit block contents better.
453 // (By solving a least squares system via normal equations+Cramer's rule)
454 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
455 {
456 static const int w1Tab[4] = { 3,0,2,1 };
457 static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
458 // ^some magic to save a lot of multiplies in the accumulating loop...
459 // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
460
461 float f;
462 unsigned short oldMin, oldMax, min16, max16;
463 int i, akku = 0, xx,xy,yy;
464 int At1_r,At1_g,At1_b;
465 int At2_r,At2_g,At2_b;
466 unsigned int cm = mask;
467
468 oldMin = *pmin16;
469 oldMax = *pmax16;
470
471 if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
472 {
473 // yes, linear system would be singular; solve using optimal
474 // single-color match on average color
475 int r = 8, g = 8, b = 8;
476 for (i=0;i<16;++i) {
477 r += block[i*4+0];
478 g += block[i*4+1];
479 b += block[i*4+2];
480 }
481
482 r >>= 4; g >>= 4; b >>= 4;
483
484 max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
485 min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
486 } else {
487 At1_r = At1_g = At1_b = 0;
488 At2_r = At2_g = At2_b = 0;
489 for (i=0;i<16;++i,cm>>=2) {
490 int step = cm&3;
491 int w1 = w1Tab[step];
492 int r = block[i*4+0];
493 int g = block[i*4+1];
494 int b = block[i*4+2];
495
496 akku += prods[step];
497 At1_r += w1*r;
498 At1_g += w1*g;
499 At1_b += w1*b;
500 At2_r += r;
501 At2_g += g;
502 At2_b += b;
503 }
504
505 At2_r = 3*At2_r - At1_r;
506 At2_g = 3*At2_g - At1_g;
507 At2_b = 3*At2_b - At1_b;
508
509 // extract solutions and decide solvability
510 xx = akku >> 16;
511 yy = (akku >> 8) & 0xff;
512 xy = (akku >> 0) & 0xff;
513
514 f = 3.0f / 255.0f / (xx*yy - xy*xy);
515
516 max16 = stb__Quantize5((At1_r*yy - At2_r * xy) * f) << 11;
517 max16 |= stb__Quantize6((At1_g*yy - At2_g * xy) * f) << 5;
518 max16 |= stb__Quantize5((At1_b*yy - At2_b * xy) * f) << 0;
519
520 min16 = stb__Quantize5((At2_r*xx - At1_r * xy) * f) << 11;
521 min16 |= stb__Quantize6((At2_g*xx - At1_g * xy) * f) << 5;
522 min16 |= stb__Quantize5((At2_b*xx - At1_b * xy) * f) << 0;
523 }
524
525 *pmin16 = min16;
526 *pmax16 = max16;
527 return oldMin != min16 || oldMax != max16;
528 }
529
530 // Color block compression
531 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
532 {
533 unsigned int mask;
534 int i;
535 int dither;
536 int refinecount;
537 unsigned short max16, min16;
538 unsigned char dblock[16*4],color[4*4];
539
540 dither = mode & STB_DXT_DITHER;
541 refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
542
543 // check if block is constant
544 for (i=1;i<16;i++)
545 if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
546 break;
547
548 if(i == 16) { // constant color
549 int r = block[0], g = block[1], b = block[2];
550 mask = 0xaaaaaaaa;
551 max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
552 min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
553 } else {
554 // first step: compute dithered version for PCA if desired
555 if(dither)
556 stb__DitherBlock(dblock,block);
557
558 // second step: pca+map along principal axis
559 stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
560 if (max16 != min16) {
561 stb__EvalColors(color,max16,min16);
562 mask = stb__MatchColorsBlock(block,color,dither);
563 } else
564 mask = 0;
565
566 // third step: refine (multiple times if requested)
567 for (i=0;i<refinecount;i++) {
568 unsigned int lastmask = mask;
569
570 if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
571 if (max16 != min16) {
572 stb__EvalColors(color,max16,min16);
573 mask = stb__MatchColorsBlock(block,color,dither);
574 } else {
575 mask = 0;
576 break;
577 }
578 }
579
580 if(mask == lastmask)
581 break;
582 }
583 }
584
585 // write the color block
586 if(max16 < min16)
587 {
588 unsigned short t = min16;
589 min16 = max16;
590 max16 = t;
591 mask ^= 0x55555555;
592 }
593
594 dest[0] = (unsigned char) (max16);
595 dest[1] = (unsigned char) (max16 >> 8);
596 dest[2] = (unsigned char) (min16);
597 dest[3] = (unsigned char) (min16 >> 8);
598 dest[4] = (unsigned char) (mask);
599 dest[5] = (unsigned char) (mask >> 8);
600 dest[6] = (unsigned char) (mask >> 16);
601 dest[7] = (unsigned char) (mask >> 24);
602 }
603
604 // Alpha block compression (this is easy for a change)
605 static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
606 {
607 int i,dist,bias,dist4,dist2,bits,mask;
608
609 // find min/max color
610 int mn,mx;
611 mn = mx = src[0];
612
613 for (i=1;i<16;i++)
614 {
615 if (src[i*stride] < mn) mn = src[i*stride];
616 else if (src[i*stride] > mx) mx = src[i*stride];
617 }
618
619 // encode them
620 dest[0] = (unsigned char)mx;
621 dest[1] = (unsigned char)mn;
622 dest += 2;
623
624 // determine bias and emit color indices
625 // given the choice of mx/mn, these indices are optimal:
626 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
627 dist = mx-mn;
628 dist4 = dist*4;
629 dist2 = dist*2;
630 bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
631 bias -= mn * 7;
632 bits = 0,mask=0;
633
634 for (i=0;i<16;i++) {
635 int a = src[i*stride]*7 + bias;
636 int ind,t;
637
638 // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
639 t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
640 t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
641 ind += (a >= dist);
642
643 // turn linear scale into DXT index (0/1 are extremal pts)
644 ind = -ind & 7;
645 ind ^= (2 > ind);
646
647 // write index
648 mask |= ind << bits;
649 if((bits += 3) >= 8) {
650 *dest++ = (unsigned char)mask;
651 mask >>= 8;
652 bits -= 8;
653 }
654 }
655 }
656
657 static void stb__InitDXT()
658 {
659 int i;
660 for(i=0;i<32;i++)
661 stb__Expand5[i] = (unsigned char)((i<<3)|(i>>2));
662
663 for(i=0;i<64;i++)
664 stb__Expand6[i] = (unsigned char)((i<<2)|(i>>4));
665
666 for(i=0;i<256+16;i++)
667 {
668 int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
669 stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
670 stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
671 }
672
673 stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
674 stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
675 }
676
677 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
678 {
679 unsigned char data[16][4];
680 static int init=1;
681 if (init) {
682 stb__InitDXT();
683 init=0;
684 }
685
686 if (alpha) {
687 int i;
688 stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
689 dest += 8;
690 // make a new copy of the data in which alpha is opaque,
691 // because code uses a fast test for color constancy
692 memcpy(data, src, 4*16);
693 for (i=0; i < 16; ++i)
694 data[i][3] = 255;
695 src = &data[0][0];
696 }
697
698 stb__CompressColorBlock(dest,(unsigned char*) src,mode);
699 }
700
701 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
702 {
703 stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
704 }
705
706 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
707 {
708 stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
709 stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
710 }
711 #endif // STB_DXT_IMPLEMENTATION
712
713 /*
714 ------------------------------------------------------------------------------
715 This software is available under 2 licenses -- choose whichever you prefer.
716 ------------------------------------------------------------------------------
717 ALTERNATIVE A - MIT License
718 Copyright (c) 2017 Sean Barrett
719 Permission is hereby granted, free of charge, to any person obtaining a copy of
720 this software and associated documentation files (the "Software"), to deal in
721 the Software without restriction, including without limitation the rights to
722 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
723 of the Software, and to permit persons to whom the Software is furnished to do
724 so, subject to the following conditions:
725 The above copyright notice and this permission notice shall be included in all
726 copies or substantial portions of the Software.
727 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
728 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
729 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
730 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
731 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
732 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
733 SOFTWARE.
734 ------------------------------------------------------------------------------
735 ALTERNATIVE B - Public Domain (www.unlicense.org)
736 This is free and unencumbered software released into the public domain.
737 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
738 software, either in source code form or as a compiled binary, for any purpose,
739 commercial or non-commercial, and by any means.
740 In jurisdictions that recognize copyright laws, the author or authors of this
741 software dedicate any and all copyright interest in the software to the public
742 domain. We make this dedication for the benefit of the public at large and to
743 the detriment of our heirs and successors. We intend this dedication to be an
744 overt act of relinquishment in perpetuity of all present and future rights to
745 this software under copyright law.
746 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
747 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
748 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
749 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
750 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
751 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
752 ------------------------------------------------------------------------------
753 */