From dbd21c8ec5cbf8c3f9e48bc9af94192130053c74 Mon Sep 17 00:00:00 2001 From: Terri00 Date: Wed, 13 Mar 2019 11:49:11 +0000 Subject: [PATCH] DDS Writer (basic) --- MCDV.sln | 37 -- MCDV/MCDV.vcxproj | 18 +- MCDV/MCDV.vcxproj.filters | 47 +-- MCDV/dds.hpp | 209 +++++++++++ MCDV/main.cpp | 11 + MCDV/stb_dxt.h | 734 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 988 insertions(+), 68 deletions(-) create mode 100644 MCDV/dds.hpp create mode 100644 MCDV/stb_dxt.h diff --git a/MCDV.sln b/MCDV.sln index f9b9d19..b1be4ed 100644 --- a/MCDV.sln +++ b/MCDV.sln @@ -5,12 +5,6 @@ VisualStudioVersion = 15.0.27004.2006 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MCDV", "MCDV\MCDV.vcxproj", "{3F5631FE-0F0C-4285-B301-66DA219121EC}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MCDV_Lib", "MCDV_Lib\MCDV_Lib.vcxproj", "{21F22CE8-5445-44FA-8561-D3B8E94D55C5}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MCDV_Lib_Sharp", "MCDV_Lib_Sharp\MCDV_Lib_Sharp.csproj", "{71666EC8-527E-4C98-BD6F-2FC0AA104350}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MCDV_Processor", "MCDV_Processor\MCDV_Processor.csproj", "{B9C44160-6699-4DFF-AD66-AD39D83E8A21}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -31,37 +25,6 @@ Global {3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x64.Build.0 = Release|x64 {3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x86.ActiveCfg = Release|Win32 {3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x86.Build.0 = Release|Win32 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|Any CPU.ActiveCfg = Debug|Win32 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x64.ActiveCfg = Debug|x64 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x64.Build.0 = Debug|x64 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x86.ActiveCfg = Debug|Win32 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x86.Build.0 = Debug|Win32 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|Any CPU.ActiveCfg = Release|Win32 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x64.ActiveCfg = Release|x64 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x64.Build.0 = Release|x64 - {21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x86.ActiveCfg = Release|Win32 - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|Any CPU.Build.0 = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x64.ActiveCfg = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x64.Build.0 = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x86.ActiveCfg = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x86.Build.0 = Debug|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|Any CPU.ActiveCfg = Release|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|Any CPU.Build.0 = Release|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x64.ActiveCfg = Release|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x64.Build.0 = Release|Any CPU - {71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x86.ActiveCfg = Release|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|Any CPU.Build.0 = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x64.ActiveCfg = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x64.Build.0 = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x86.ActiveCfg = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x86.Build.0 = Debug|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|Any CPU.ActiveCfg = Release|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|Any CPU.Build.0 = Release|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x64.ActiveCfg = Release|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x64.Build.0 = Release|Any CPU - {B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x86.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/MCDV/MCDV.vcxproj b/MCDV/MCDV.vcxproj index ab168a3..a3d13e1 100644 --- a/MCDV/MCDV.vcxproj +++ b/MCDV/MCDV.vcxproj @@ -126,6 +126,7 @@ + @@ -139,6 +140,7 @@ + @@ -158,24 +160,22 @@ - - - - + + + + + - - - - - + + diff --git a/MCDV/MCDV.vcxproj.filters b/MCDV/MCDV.vcxproj.filters index ae6bd95..13f3d0d 100644 --- a/MCDV/MCDV.vcxproj.filters +++ b/MCDV/MCDV.vcxproj.filters @@ -33,6 +33,9 @@ {1d19365b-710e-41b8-862f-283759638b5c} + + {57e9ec73-5c2f-4840-b186-2e1c3287cf65} + @@ -119,6 +122,12 @@ Header Files\math + + Header Files\direct3d + + + OpenGL\stb + @@ -132,54 +141,48 @@ - - OpenGL\Shader Files - - - OpenGL\Shader Files - - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - - Source Files - - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + OpenGL\Shader Files - + + Font Files + + Font Files diff --git a/MCDV/dds.hpp b/MCDV/dds.hpp new file mode 100644 index 0000000..bb3b67f --- /dev/null +++ b/MCDV/dds.hpp @@ -0,0 +1,209 @@ +#include +#include +#include + +#define STB_DXT_IMPLEMENTATION +#include "stb_dxt.h" + +#define __max(a,b) (((a) > (b)) ? (a) : (b)) +#define __min(a,b) (((a) < (b)) ? (a) : (b)) +#pragma once + +#pragma pack(push, 1) +struct DDS_PIXELFORMAT { + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwFourCC; + uint32_t dwRGBBitCount; + uint32_t dwRBitMask; + uint32_t dwGBitMask; + uint32_t dwBBitMask; + uint32_t dwABitMask; +}; + +typedef struct { + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwHeight; + uint32_t dwWidth; + uint32_t dwPitchOrLinearSize; + uint32_t dwDepth; + uint32_t dwMipMapCount; + uint32_t dwReserved1[11]; + DDS_PIXELFORMAT ddspf; + uint32_t dwCaps; + uint32_t dwCaps2; + uint32_t dwCaps3; + uint32_t dwCaps4; + uint32_t dwReserved2; +} DDS_HEADER; +#pragma pack(pop) + +enum IMG { + MODE_RGB888, + MODE_RGBA8888, + MODE_DXT1, + MODE_DXT5 +}; + +UINT32 SwapEndian(UINT32 val) +{ + return (val << 24) | ((val << 8) & 0x00ff0000) | + ((val >> 8) & 0x0000ff00) | (val >> 24); +} + +#define DDSD_CAPS 0x1 +#define DDSD_HEIGHT 0x2 +#define DDSD_WIDTH 0x4 +#define DDSD_PITCH 0x8 +#define DDSD_PIXELFORMAT 0x1000 +#define DDSD_MIPMAPCOUNT 0x20000 +#define DDSD_LINEARSIZE 0x80000 +#define DDSD_DEPTH 0x800000 + +#define DDPF_ALPHAPIXELS 0x1 +#define DDPF_ALPHA 0x2 +#define DDPF_FOURCC 0x4 +#define DDPF_RGB 0x40 +#define DDPF_YUV 0x200 +#define DDPF_LUMINANCE 0x20000 + +#define DDSCAPS_COMPLEX 0x8 +#define DDSCAPS_MIPMAP 0x400000 +#define DDSCAPS_TEXTURE 0x1000 + +#define BLOCK_SIZE_DXT1 8 +#define BLOCK_SIZE_DXT5 16 + +#define BBP_RGB888 24 +#define BBP_RGBA8888 32 + +#define DDS_HEADER_SIZE 124 +#define DDS_HEADER_PFSIZE 32 +#define DDS_MAGICNUM 0x20534444; + +/* +imageData: Pointer to image data +compressedSize: Pointer to final data size +w: image width +h: image height +mode: compression mode to use +*/ +uint8_t* compressImageDXT1(uint8_t* buf_RGB, uint32_t w, uint32_t h, uint32_t* cSize) { + *cSize = ((w / 4) * (h / 4)) * BLOCK_SIZE_DXT1; + + //Create output buffer + uint8_t* outBuffer = (uint8_t*)malloc(*cSize); + + int blocks_x = w / 4; + int blocks_y = h / 4; + + //Fill + for (int y = 0; y < blocks_y; y++){ + for (int x = 0; x < blocks_x; x++){ + + int blockindex = x + (y * blocks_x); + int globalX = x * 4; + int globalY = y * 4; + + uint8_t* src = new uint8_t[64]; //Create source RGBA buffer + for (int _y = 0; _y < 4; _y++) { + for (int _x = 0; _x < 4; _x++) { + src[(_x + (_y * 4)) * 4 + 0] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 0]; + src[(_x + (_y * 4)) * 4 + 1] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 1]; + src[(_x + (_y * 4)) * 4 + 2] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 2]; + src[(_x + (_y * 4)) * 4 + 3] = 0xFF; + } + } + + stb_compress_dxt_block((unsigned char*)outBuffer + (blockindex * BLOCK_SIZE_DXT1), src, 0, STB_DXT_HIGHQUAL); + + free(src); + } + } + + return outBuffer; +} + +bool dds_write(uint8_t* imageData, const char* filename, uint32_t w, uint32_t h, IMG mode) { + DDS_HEADER header = DDS_HEADER(); + header.dwSize = DDS_HEADER_SIZE; + header.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT; + header.dwHeight = h; + header.dwWidth = w; + + header.ddspf.dwSize = DDS_HEADER_PFSIZE; + + int final_image_size = 0; + + switch (mode) { + case IMG::MODE_DXT1: + header.dwPitchOrLinearSize = SwapEndian(__max(1, ((w + 3) / 4)) * BLOCK_SIZE_DXT1); + header.ddspf.dwFlags |= DDPF_FOURCC; + header.ddspf.dwFourCC = SwapEndian((uint32_t)'DXT1'); + header.dwFlags |= DDSD_LINEARSIZE; + + break; + case IMG::MODE_DXT5: + header.dwPitchOrLinearSize = __max(1, ((w + 3) / 4)) * BLOCK_SIZE_DXT5; + + header.ddspf.dwFlags |= DDPF_FOURCC; + header.ddspf.dwFlags |= DDPF_ALPHA; + header.ddspf.dwFourCC = SwapEndian((uint32_t)'DXT5'); + header.dwFlags |= DDSD_LINEARSIZE; + throw new std::exception("DXT5 Not implemented"); + break; + case IMG::MODE_RGB888: + header.dwPitchOrLinearSize = w * (BBP_RGB888 / 8); + final_image_size = w * h * (BBP_RGB888 / 8); + + header.ddspf.dwFlags |= DDPF_RGB; + header.dwFlags |= DDSD_PITCH; + header.ddspf.dwRGBBitCount = BBP_RGB888; + header.ddspf.dwRBitMask = SwapEndian(0xff000000); + header.ddspf.dwGBitMask = SwapEndian(0x00ff0000); + header.ddspf.dwBBitMask = SwapEndian(0x0000ff00); + break; + case IMG::MODE_RGBA8888: + header.dwPitchOrLinearSize = w * (BBP_RGBA8888 / 8); + final_image_size = w * h * (BBP_RGBA8888 / 8); + + header.ddspf.dwFlags |= DDPF_RGB; + header.dwFlags |= DDSD_PITCH; + header.ddspf.dwFlags |= DDPF_ALPHA; + header.ddspf.dwRGBBitCount = BBP_RGBA8888; + header.ddspf.dwRBitMask = SwapEndian(0xff000000); + header.ddspf.dwGBitMask = SwapEndian(0x00ff0000); + header.ddspf.dwBBitMask = SwapEndian(0x0000ff00); + header.ddspf.dwABitMask = SwapEndian(0x000000ff); + throw new std::exception("RGBA8888 Not implemented"); + break; + + default: return false; //Mode not supported + } + + header.dwMipMapCount = 0; + header.dwCaps = DDSCAPS_TEXTURE; + + // Magic number + uint32_t magic = DDS_MAGICNUM; + std::fstream output; + output.open(filename, std::ios::out | std::ios::binary); + + output.write((char*)&magic, sizeof(uint32_t)); + output.write((char*)&header, DDS_HEADER_SIZE); + + if (mode == IMG::MODE_DXT1) + { + uint32_t size; + uint8_t* outputBuffer = compressImageDXT1(imageData, w, h, &size); + output.write((char*)outputBuffer, size); + } + else + { + output.write((char*)imageData, final_image_size); + } + + output.close(); + return true; +} \ No newline at end of file diff --git a/MCDV/main.cpp b/MCDV/main.cpp index fa39f73..66d5b94 100644 --- a/MCDV/main.cpp +++ b/MCDV/main.cpp @@ -25,6 +25,8 @@ #define STBI_MSC_SECURE_CRT #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" +//#include "stb_image.h" +#include "dds.hpp" void render_to_png(int x, int y, const char* filepath){ void* data = malloc(4 * x * y); @@ -38,6 +40,15 @@ void render_to_png(int x, int y, const char* filepath){ } int main(int argc, char* argv[]) { + int _w, _h, _nrc; + unsigned char* data = stbi_load("textures/test.jpg", &_w, &_h, &_nrc, 3); + + dds_write(data, "output.dds", _w, _h, IMG::MODE_DXT1); + + stbi_image_free(data); + + return 0; + std::cout << "Initializing OpenGL\n"; #pragma region init_opengl diff --git a/MCDV/stb_dxt.h b/MCDV/stb_dxt.h new file mode 100644 index 0000000..c887cc4 --- /dev/null +++ b/MCDV/stb_dxt.h @@ -0,0 +1,734 @@ +// stb_dxt.h - v1.08b - DXT1/DXT5 compressor - public domain +// original by fabian "ryg" giesen - ported to C by stb +// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation +// +// USAGE: +// call stb_compress_dxt_block() for every block (you must pad) +// source should be a 4x4 block of RGBA data in row-major order; +// A is ignored if you specify alpha=0; you can turn on dithering +// and "high quality" using mode. +// +// version history: +// v1.08 - (sbt) fix bug in dxt-with-alpha block +// v1.07 - (stb) bc4; allow not using libc; add STB_DXT_STATIC +// v1.06 - (stb) fix to known-broken 1.05 +// v1.05 - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski) +// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec); +// single color match fix (allow for inexact color interpolation); +// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps. +// v1.03 - (stb) endianness support +// v1.02 - (stb) fix alpha encoding bug +// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom +// v1.00 - (stb) first release +// +// contributors: +// Kevin Schmidt (#defines for "freestanding" compilation) +// github:ppiastucki (BC4 support) +// +// LICENSE +// +// See end of file for license information. + +#ifndef STB_INCLUDE_STB_DXT_H +#define STB_INCLUDE_STB_DXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_DXT_STATIC +#define STBDDEF static +#else +#define STBDDEF extern +#endif + + // compression mode (bitflags) +#define STB_DXT_NORMAL 0 +#define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like! +#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower. + + STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode); + STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel); + STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel); + +#define STB_COMPRESS_DXT_BLOCK + +#ifdef __cplusplus +} +#endif +#endif // STB_INCLUDE_STB_DXT_H + +#ifdef STB_DXT_IMPLEMENTATION + +// configuration options for DXT encoder. set them in the project/makefile or just define +// them at the top. + +// STB_DXT_USE_ROUNDING_BIAS +// use a rounding bias during color interpolation. this is closer to what "ideal" +// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03) +// implicitly had this turned on. +// +// in case you're targeting a specific type of hardware (e.g. console programmers): +// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer +// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias. +// you also see "(a*5 + b*3) / 8" on some old GPU designs. +// #define STB_DXT_USE_ROUNDING_BIAS + +#include + +#if !defined(STBD_ABS) || !defined(STBI_FABS) +#include +#endif + +#ifndef STBD_ABS +#define STBD_ABS(i) abs(i) +#endif + +#ifndef STBD_FABS +#define STBD_FABS(x) fabs(x) +#endif + +#ifndef STBD_MEMSET +#include +#define STBD_MEMSET memset +#endif + +static unsigned char stb__Expand5[32]; +static unsigned char stb__Expand6[64]; +static unsigned char stb__OMatch5[256][2]; +static unsigned char stb__OMatch6[256][2]; +static unsigned char stb__QuantRBTab[256 + 16]; +static unsigned char stb__QuantGTab[256 + 16]; + +static int stb__Mul8Bit(int a, int b) +{ + int t = a * b + 128; + return (t + (t >> 8)) >> 8; +} + +static void stb__From16Bit(unsigned char *out, unsigned short v) +{ + int rv = (v & 0xf800) >> 11; + int gv = (v & 0x07e0) >> 5; + int bv = (v & 0x001f) >> 0; + + out[0] = stb__Expand5[rv]; + out[1] = stb__Expand6[gv]; + out[2] = stb__Expand5[bv]; + out[3] = 0; +} + +static unsigned short stb__As16Bit(int r, int g, int b) +{ + return (stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) + stb__Mul8Bit(b, 31); +} + +// linear interpolation at 1/3 point between a and b, using desired rounding type +static int stb__Lerp13(int a, int b) +{ +#ifdef STB_DXT_USE_ROUNDING_BIAS + // with rounding bias + return a + stb__Mul8Bit(b - a, 0x55); +#else + // without rounding bias + // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed. + return (2 * a + b) / 3; +#endif +} + +// lerp RGB color +static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2) +{ + out[0] = stb__Lerp13(p1[0], p2[0]); + out[1] = stb__Lerp13(p1[1], p2[1]); + out[2] = stb__Lerp13(p1[2], p2[2]); +} + +/****************************************************************************/ + +// compute table to reproduce constant colors as accurately as possible +static void stb__PrepareOptTable(unsigned char *Table, const unsigned char *expand, int size) +{ + int i, mn, mx; + for (i = 0; i<256; i++) { + int bestErr = 256; + for (mn = 0; mn> 4)]; + ep1[0] = bp[0] - dp[0]; + dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[4] - dp[4]; + dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[8] - dp[8]; + dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + bp += 16; + dp += 16; + et = ep1, ep1 = ep2, ep2 = et; // swap + } + } +} + +// The color matching function +static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color, int dither) +{ + unsigned int mask = 0; + int dirr = color[0 * 4 + 0] - color[1 * 4 + 0]; + int dirg = color[0 * 4 + 1] - color[1 * 4 + 1]; + int dirb = color[0 * 4 + 2] - color[1 * 4 + 2]; + int dots[16]; + int stops[4]; + int i; + int c0Point, halfPoint, c3Point; + + for (i = 0; i<16; i++) + dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb; + + for (i = 0; i<4; i++) + stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb; + + // think of the colors as arranged on a line; project point onto that line, then choose + // next color out of available ones. we compute the crossover points for "best color in top + // half"/"best in bottom half" and then the same inside that subinterval. + // + // relying on this 1d approximation isn't always optimal in terms of euclidean distance, + // but it's very close and a lot faster. + // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html + + c0Point = (stops[1] + stops[3]) >> 1; + halfPoint = (stops[3] + stops[2]) >> 1; + c3Point = (stops[2] + stops[0]) >> 1; + + if (!dither) { + // the version without dithering is straightforward + for (i = 15; i >= 0; i--) { + int dot = dots[i]; + mask <<= 2; + + if (dot < halfPoint) + mask |= (dot < c0Point) ? 1 : 3; + else + mask |= (dot < c3Point) ? 2 : 0; + } + } + else { + // with floyd-steinberg dithering + int err[8], *ep1 = err, *ep2 = err + 4; + int *dp = dots, y; + + c0Point <<= 4; + halfPoint <<= 4; + c3Point <<= 4; + for (i = 0; i<8; i++) + err[i] = 0; + + for (y = 0; y<4; y++) + { + int dot, lmask, step; + + dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]); + if (dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[0] = dp[0] - stops[step]; + lmask = step; + + dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]); + if (dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[1] = dp[1] - stops[step]; + lmask |= step << 2; + + dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]); + if (dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[2] = dp[2] - stops[step]; + lmask |= step << 4; + + dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]); + if (dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[3] = dp[3] - stops[step]; + lmask |= step << 6; + + dp += 4; + mask |= lmask << (y * 8); + { int *et = ep1; ep1 = ep2; ep2 = et; } // swap + } + } + + return mask; +} + +// The color optimization function. (Clever code, part 1) +static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16) +{ + int mind = 0x7fffffff, maxd = -0x7fffffff; + unsigned char *minp, *maxp; + double magn; + int v_r, v_g, v_b; + static const int nIterPower = 4; + float covf[6], vfr, vfg, vfb; + + // determine color distribution + int cov[6]; + int mu[3], min[3], max[3]; + int ch, i, iter; + + for (ch = 0; ch<3; ch++) + { + const unsigned char *bp = ((const unsigned char *)block) + ch; + int muv, minv, maxv; + + muv = minv = maxv = bp[0]; + for (i = 4; i<64; i += 4) + { + muv += bp[i]; + if (bp[i] < minv) minv = bp[i]; + else if (bp[i] > maxv) maxv = bp[i]; + } + + mu[ch] = (muv + 8) >> 4; + min[ch] = minv; + max[ch] = maxv; + } + + // determine covariance matrix + for (i = 0; i<6; i++) + cov[i] = 0; + + for (i = 0; i<16; i++) + { + int r = block[i * 4 + 0] - mu[0]; + int g = block[i * 4 + 1] - mu[1]; + int b = block[i * 4 + 2] - mu[2]; + + cov[0] += r * r; + cov[1] += r * g; + cov[2] += r * b; + cov[3] += g * g; + cov[4] += g * b; + cov[5] += b * b; + } + + // convert covariance matrix to float, find principal axis via power iter + for (i = 0; i<6; i++) + covf[i] = cov[i] / 255.0f; + + vfr = (float)(max[0] - min[0]); + vfg = (float)(max[1] - min[1]); + vfb = (float)(max[2] - min[2]); + + for (iter = 0; iter magn) magn = STBD_FABS(vfg); + if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb); + + if (magn < 4.0f) { // too small, default to luminance + v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000. + v_g = 587; + v_b = 114; + } + else { + magn = 512.0 / magn; + v_r = (int)(vfr * magn); + v_g = (int)(vfg * magn); + v_b = (int)(vfb * magn); + } + + // Pick colors at extreme points + for (i = 0; i<16; i++) + { + int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b; + + if (dot < mind) { + mind = dot; + minp = block + i * 4; + } + + if (dot > maxd) { + maxd = dot; + maxp = block + i * 4; + } + } + + *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]); + *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]); +} + +static int stb__sclamp(float y, int p0, int p1) +{ + int x = (int)y; + if (x < p0) return p0; + if (x > p1) return p1; + return x; +} + +// The refinement function. (Clever code, part 2) +// Tries to optimize colors to suit block contents better. +// (By solving a least squares system via normal equations+Cramer's rule) +static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask) +{ + static const int w1Tab[4] = { 3,0,2,1 }; + static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 }; + // ^some magic to save a lot of multiplies in the accumulating loop... + // (precomputed products of weights for least squares system, accumulated inside one 32-bit register) + + float frb, fg; + unsigned short oldMin, oldMax, min16, max16; + int i, akku = 0, xx, xy, yy; + int At1_r, At1_g, At1_b; + int At2_r, At2_g, At2_b; + unsigned int cm = mask; + + oldMin = *pmin16; + oldMax = *pmax16; + + if ((mask ^ (mask << 2)) < 4) // all pixels have the same index? + { + // yes, linear system would be singular; solve using optimal + // single-color match on average color + int r = 8, g = 8, b = 8; + for (i = 0; i<16; ++i) { + r += block[i * 4 + 0]; + g += block[i * 4 + 1]; + b += block[i * 4 + 2]; + } + + r >>= 4; g >>= 4; b >>= 4; + + max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1]; + } + else { + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for (i = 0; i<16; ++i, cm >>= 2) { + int step = cm & 3; + int w1 = w1Tab[step]; + int r = block[i * 4 + 0]; + int g = block[i * 4 + 1]; + int b = block[i * 4 + 2]; + + akku += prods[step]; + At1_r += w1 * r; + At1_g += w1 * g; + At1_b += w1 * b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3 * At2_r - At1_r; + At2_g = 3 * At2_g - At1_g; + At2_b = 3 * At2_b - At1_b; + + // extract solutions and decide solvability + xx = akku >> 16; + yy = (akku >> 8) & 0xff; + xy = (akku >> 0) & 0xff; + + frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy * xy); + fg = frb * 63.0f / 31.0f; + + // solve. + max16 = stb__sclamp((At1_r*yy - At2_r * xy)*frb + 0.5f, 0, 31) << 11; + max16 |= stb__sclamp((At1_g*yy - At2_g * xy)*fg + 0.5f, 0, 63) << 5; + max16 |= stb__sclamp((At1_b*yy - At2_b * xy)*frb + 0.5f, 0, 31) << 0; + + min16 = stb__sclamp((At2_r*xx - At1_r * xy)*frb + 0.5f, 0, 31) << 11; + min16 |= stb__sclamp((At2_g*xx - At1_g * xy)*fg + 0.5f, 0, 63) << 5; + min16 |= stb__sclamp((At2_b*xx - At1_b * xy)*frb + 0.5f, 0, 31) << 0; + } + + *pmin16 = min16; + *pmax16 = max16; + return oldMin != min16 || oldMax != max16; +} + +// Color block compression +static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode) +{ + unsigned int mask; + int i; + int dither; + int refinecount; + unsigned short max16, min16; + unsigned char dblock[16 * 4], color[4 * 4]; + + dither = mode & STB_DXT_DITHER; + refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1; + + // check if block is constant + for (i = 1; i<16; i++) + if (((unsigned int *)block)[i] != ((unsigned int *)block)[0]) + break; + + if (i == 16) { // constant color + int r = block[0], g = block[1], b = block[2]; + mask = 0xaaaaaaaa; + max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1]; + } + else { + // first step: compute dithered version for PCA if desired + if (dither) + stb__DitherBlock(dblock, block); + + // second step: pca+map along principal axis + stb__OptimizeColorsBlock(dither ? dblock : block, &max16, &min16); + if (max16 != min16) { + stb__EvalColors(color, max16, min16); + mask = stb__MatchColorsBlock(block, color, dither); + } + else + mask = 0; + + // third step: refine (multiple times if requested) + for (i = 0; i> 8); + dest[2] = (unsigned char)(min16); + dest[3] = (unsigned char)(min16 >> 8); + dest[4] = (unsigned char)(mask); + dest[5] = (unsigned char)(mask >> 8); + dest[6] = (unsigned char)(mask >> 16); + dest[7] = (unsigned char)(mask >> 24); +} + +// Alpha block compression (this is easy for a change) +static void stb__CompressAlphaBlock(unsigned char *dest, unsigned char *src, int stride) +{ + int i, dist, bias, dist4, dist2, bits, mask; + + // find min/max color + int mn, mx; + mn = mx = src[0]; + + for (i = 1; i<16; i++) + { + if (src[i*stride] < mn) mn = src[i*stride]; + else if (src[i*stride] > mx) mx = src[i*stride]; + } + + // encode them + ((unsigned char *)dest)[0] = mx; + ((unsigned char *)dest)[1] = mn; + dest += 2; + + // determine bias and emit color indices + // given the choice of mx/mn, these indices are optimal: + // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/ + dist = mx - mn; + dist4 = dist * 4; + dist2 = dist * 2; + bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2); + bias -= mn * 7; + bits = 0, mask = 0; + + for (i = 0; i<16; i++) { + int a = src[i*stride] * 7 + bias; + int ind, t; + + // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max). + t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t; + t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t; + ind += (a >= dist); + + // turn linear scale into DXT index (0/1 are extremal pts) + ind = -ind & 7; + ind ^= (2 > ind); + + // write index + mask |= ind << bits; + if ((bits += 3) >= 8) { + *dest++ = mask; + mask >>= 8; + bits -= 8; + } + } +} + +static void stb__InitDXT() +{ + int i; + for (i = 0; i<32; i++) + stb__Expand5[i] = (i << 3) | (i >> 2); + + for (i = 0; i<64; i++) + stb__Expand6[i] = (i << 2) | (i >> 4); + + for (i = 0; i<256 + 16; i++) + { + int v = i - 8 < 0 ? 0 : i - 8 > 255 ? 255 : i - 8; + stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v, 31)]; + stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v, 63)]; + } + + stb__PrepareOptTable(&stb__OMatch5[0][0], stb__Expand5, 32); + stb__PrepareOptTable(&stb__OMatch6[0][0], stb__Expand6, 64); +} + +void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode) +{ + unsigned char data[16][4]; + static int init = 1; + if (init) { + stb__InitDXT(); + init = 0; + } + + if (alpha) { + int i; + stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4); + dest += 8; + // make a new copy of the data in which alpha is opaque, + // because code uses a fast test for color constancy + memcpy(data, src, 4 * 16); + for (i = 0; i < 16; ++i) + data[i][3] = 255; + src = &data[0][0]; + } + + stb__CompressColorBlock(dest, (unsigned char*)src, mode); +} + +void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src) +{ + stb__CompressAlphaBlock(dest, (unsigned char*)src, 1); +} + +void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src) +{ + stb__CompressAlphaBlock(dest, (unsigned char*)src, 2); + stb__CompressAlphaBlock(dest + 8, (unsigned char*)src + 1, 2); +} +#endif // STB_DXT_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ \ No newline at end of file -- 2.25.1