From dbd21c8ec5cbf8c3f9e48bc9af94192130053c74 Mon Sep 17 00:00:00 2001
From: Terri00 <thrustmediaproductions@gmail.com>
Date: Wed, 13 Mar 2019 11:49:11 +0000
Subject: [PATCH] DDS Writer (basic)

---
 MCDV.sln                  |  37 --
 MCDV/MCDV.vcxproj         |  18 +-
 MCDV/MCDV.vcxproj.filters |  47 +--
 MCDV/dds.hpp              | 209 +++++++++++
 MCDV/main.cpp             |  11 +
 MCDV/stb_dxt.h            | 734 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 988 insertions(+), 68 deletions(-)
 create mode 100644 MCDV/dds.hpp
 create mode 100644 MCDV/stb_dxt.h

diff --git a/MCDV.sln b/MCDV.sln
index f9b9d19..b1be4ed 100644
--- a/MCDV.sln
+++ b/MCDV.sln
@@ -5,12 +5,6 @@ VisualStudioVersion = 15.0.27004.2006
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MCDV", "MCDV\MCDV.vcxproj", "{3F5631FE-0F0C-4285-B301-66DA219121EC}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MCDV_Lib", "MCDV_Lib\MCDV_Lib.vcxproj", "{21F22CE8-5445-44FA-8561-D3B8E94D55C5}"
-EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MCDV_Lib_Sharp", "MCDV_Lib_Sharp\MCDV_Lib_Sharp.csproj", "{71666EC8-527E-4C98-BD6F-2FC0AA104350}"
-EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MCDV_Processor", "MCDV_Processor\MCDV_Processor.csproj", "{B9C44160-6699-4DFF-AD66-AD39D83E8A21}"
-EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -31,37 +25,6 @@ Global
 		{3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x64.Build.0 = Release|x64
 		{3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x86.ActiveCfg = Release|Win32
 		{3F5631FE-0F0C-4285-B301-66DA219121EC}.Release|x86.Build.0 = Release|Win32
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|Any CPU.ActiveCfg = Debug|Win32
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x64.ActiveCfg = Debug|x64
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x64.Build.0 = Debug|x64
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x86.ActiveCfg = Debug|Win32
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Debug|x86.Build.0 = Debug|Win32
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|Any CPU.ActiveCfg = Release|Win32
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x64.ActiveCfg = Release|x64
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x64.Build.0 = Release|x64
-		{21F22CE8-5445-44FA-8561-D3B8E94D55C5}.Release|x86.ActiveCfg = Release|Win32
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x64.Build.0 = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x86.ActiveCfg = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Debug|x86.Build.0 = Debug|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|Any CPU.Build.0 = Release|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x64.ActiveCfg = Release|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x64.Build.0 = Release|Any CPU
-		{71666EC8-527E-4C98-BD6F-2FC0AA104350}.Release|x86.ActiveCfg = Release|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x64.Build.0 = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x86.ActiveCfg = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Debug|x86.Build.0 = Debug|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|Any CPU.Build.0 = Release|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x64.ActiveCfg = Release|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x64.Build.0 = Release|Any CPU
-		{B9C44160-6699-4DFF-AD66-AD39D83E8A21}.Release|x86.ActiveCfg = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/MCDV/MCDV.vcxproj b/MCDV/MCDV.vcxproj
index ab168a3..a3d13e1 100644
--- a/MCDV/MCDV.vcxproj
+++ b/MCDV/MCDV.vcxproj
@@ -126,6 +126,7 @@
     <ClInclude Include="Camera.hpp" />
     <ClInclude Include="Console.hpp" />
     <ClInclude Include="convexPolytope.h" />
+    <ClInclude Include="dds.hpp" />
     <ClInclude Include="FrameBuffer.hpp" />
     <ClInclude Include="fuzzy_select.h" />
     <ClInclude Include="gamelump.hpp" />
@@ -139,6 +140,7 @@
     <ClInclude Include="plane.h" />
     <ClInclude Include="radar.hpp" />
     <ClInclude Include="Shader.hpp" />
+    <ClInclude Include="stb_dxt.h" />
     <ClInclude Include="stb_image.h" />
     <ClInclude Include="stb_image_write.h" />
     <ClInclude Include="tbsp.hpp" />
@@ -158,24 +160,22 @@
     <ClCompile Include="main.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="depth.vs" />
-    <None Include="lit.fs" />
-    <None Include="lit.vs" />
-    <None Include="main.cpp.backup.temp" />
     <None Include="shaders\depth.fs" />
+    <None Include="shaders\depth.vs" />
     <None Include="shaders\fullscreenbase.vs" />
     <None Include="shaders\ss_precomp_objectives.fs" />
     <None Include="shaders\ss_precomp_playspace.fs" />
     <None Include="shaders\ss_test.fs" />
+    <None Include="shaders\textfont.fs" />
+    <None Include="shaders\textfont.vs" />
+    <None Include="shaders\unlit.fs" />
+    <None Include="shaders\unlit.vs" />
     <None Include="shaders\worldgrid.fs" />
     <None Include="shaders\worldgrid.vs" />
-    <None Include="textfont.fs" />
-    <None Include="textfont.vs" />
-    <None Include="unlit.fs" />
-    <None Include="unlit.vs" />
   </ItemGroup>
   <ItemGroup>
-    <Image Include="dina-r.png" />
+    <Image Include="fonts\dina-r-background.png" />
+    <Image Include="fonts\dina-r.png" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/MCDV/MCDV.vcxproj.filters b/MCDV/MCDV.vcxproj.filters
index ae6bd95..13f3d0d 100644
--- a/MCDV/MCDV.vcxproj.filters
+++ b/MCDV/MCDV.vcxproj.filters
@@ -33,6 +33,9 @@
     <Filter Include="Font Files">
       <UniqueIdentifier>{1d19365b-710e-41b8-862f-283759638b5c}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Header Files\direct3d">
+      <UniqueIdentifier>{57e9ec73-5c2f-4840-b186-2e1c3287cf65}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="interpolation.h">
@@ -119,6 +122,12 @@
     <ClInclude Include="fuzzy_select.h">
       <Filter>Header Files\math</Filter>
     </ClInclude>
+    <ClInclude Include="dds.hpp">
+      <Filter>Header Files\direct3d</Filter>
+    </ClInclude>
+    <ClInclude Include="stb_dxt.h">
+      <Filter>OpenGL\stb</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="main.cpp">
@@ -132,54 +141,48 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <None Include="lit.fs">
-      <Filter>OpenGL\Shader Files</Filter>
-    </None>
-    <None Include="lit.vs">
-      <Filter>OpenGL\Shader Files</Filter>
-    </None>
-    <None Include="unlit.fs">
+    <None Include="shaders\worldgrid.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="unlit.vs">
+    <None Include="shaders\worldgrid.vs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="depth.vs">
+    <None Include="shaders\depth.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="textfont.vs">
+    <None Include="shaders\fullscreenbase.vs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="textfont.fs">
+    <None Include="shaders\ss_test.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\worldgrid.fs">
+    <None Include="shaders\ss_precomp_playspace.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\worldgrid.vs">
+    <None Include="shaders\ss_precomp_objectives.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="main.cpp.backup.temp">
-      <Filter>Source Files</Filter>
-    </None>
-    <None Include="shaders\depth.fs">
+    <None Include="shaders\textfont.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\fullscreenbase.vs">
+    <None Include="shaders\textfont.vs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\ss_test.fs">
+    <None Include="shaders\unlit.fs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\ss_precomp_playspace.fs">
+    <None Include="shaders\unlit.vs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
-    <None Include="shaders\ss_precomp_objectives.fs">
+    <None Include="shaders\depth.vs">
       <Filter>OpenGL\Shader Files</Filter>
     </None>
   </ItemGroup>
   <ItemGroup>
-    <Image Include="dina-r.png">
+    <Image Include="fonts\dina-r.png">
+      <Filter>Font Files</Filter>
+    </Image>
+    <Image Include="fonts\dina-r-background.png">
       <Filter>Font Files</Filter>
     </Image>
   </ItemGroup>
diff --git a/MCDV/dds.hpp b/MCDV/dds.hpp
new file mode 100644
index 0000000..bb3b67f
--- /dev/null
+++ b/MCDV/dds.hpp
@@ -0,0 +1,209 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h> 
+
+#define STB_DXT_IMPLEMENTATION
+#include "stb_dxt.h"
+
+#define __max(a,b)            (((a) > (b)) ? (a) : (b))
+#define __min(a,b)            (((a) < (b)) ? (a) : (b))
+#pragma once
+
+#pragma pack(push, 1)
+struct DDS_PIXELFORMAT {
+	uint32_t dwSize;
+	uint32_t dwFlags;
+	uint32_t dwFourCC;
+	uint32_t dwRGBBitCount;
+	uint32_t dwRBitMask;
+	uint32_t dwGBitMask;
+	uint32_t dwBBitMask;
+	uint32_t dwABitMask;
+};
+
+typedef struct {
+	uint32_t           dwSize;
+	uint32_t           dwFlags;
+	uint32_t           dwHeight;
+	uint32_t           dwWidth;
+	uint32_t           dwPitchOrLinearSize;
+	uint32_t           dwDepth;
+	uint32_t           dwMipMapCount;
+	uint32_t           dwReserved1[11];
+	DDS_PIXELFORMAT	   ddspf;
+	uint32_t           dwCaps;
+	uint32_t           dwCaps2;
+	uint32_t           dwCaps3;
+	uint32_t           dwCaps4;
+	uint32_t           dwReserved2;
+} DDS_HEADER;
+#pragma pack(pop)
+
+enum IMG {
+	MODE_RGB888,
+	MODE_RGBA8888,
+	MODE_DXT1,
+	MODE_DXT5
+};
+
+UINT32 SwapEndian(UINT32 val)
+{
+	return (val << 24) | ((val << 8) & 0x00ff0000) |
+		((val >> 8) & 0x0000ff00) | (val >> 24);
+}
+
+#define DDSD_CAPS 0x1
+#define DDSD_HEIGHT 0x2
+#define DDSD_WIDTH 0x4
+#define DDSD_PITCH 0x8
+#define DDSD_PIXELFORMAT 0x1000
+#define DDSD_MIPMAPCOUNT 0x20000
+#define DDSD_LINEARSIZE 0x80000
+#define DDSD_DEPTH 0x800000
+
+#define DDPF_ALPHAPIXELS 0x1
+#define DDPF_ALPHA 0x2
+#define DDPF_FOURCC 0x4
+#define DDPF_RGB 0x40
+#define DDPF_YUV 0x200
+#define DDPF_LUMINANCE 0x20000
+
+#define DDSCAPS_COMPLEX 0x8
+#define DDSCAPS_MIPMAP 0x400000
+#define DDSCAPS_TEXTURE 0x1000
+
+#define BLOCK_SIZE_DXT1 8
+#define BLOCK_SIZE_DXT5 16
+
+#define BBP_RGB888 24
+#define BBP_RGBA8888 32
+
+#define DDS_HEADER_SIZE 124
+#define DDS_HEADER_PFSIZE 32
+#define DDS_MAGICNUM 0x20534444;
+
+/*
+imageData:	Pointer to image data
+compressedSize: Pointer to final data size
+w: image width
+h: image height
+mode: compression mode to use
+*/
+uint8_t* compressImageDXT1(uint8_t* buf_RGB, uint32_t w, uint32_t h, uint32_t* cSize) {
+	*cSize = ((w / 4) * (h / 4)) * BLOCK_SIZE_DXT1;
+
+	//Create output buffer
+	uint8_t* outBuffer = (uint8_t*)malloc(*cSize);
+
+	int blocks_x = w / 4;
+	int blocks_y = h / 4;
+
+	//Fill
+	for (int y = 0; y < blocks_y; y++){
+		for (int x = 0; x < blocks_x; x++){
+
+			int blockindex = x + (y * blocks_x);
+			int globalX = x * 4;
+			int globalY = y * 4;
+			
+			uint8_t* src = new uint8_t[64]; //Create source RGBA buffer
+			for (int _y = 0; _y < 4; _y++) {
+				for (int _x = 0; _x < 4; _x++) {
+					src[(_x + (_y * 4)) * 4 + 0] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 0];
+					src[(_x + (_y * 4)) * 4 + 1] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 1];
+					src[(_x + (_y * 4)) * 4 + 2] = buf_RGB[(globalX + _x + ((globalY + _y) * w)) * 3 + 2];
+					src[(_x + (_y * 4)) * 4 + 3] = 0xFF;
+				}
+			}
+
+			stb_compress_dxt_block((unsigned char*)outBuffer + (blockindex * BLOCK_SIZE_DXT1), src, 0, STB_DXT_HIGHQUAL);
+
+			free(src);
+		}
+	}
+
+	return outBuffer;
+}
+
+bool dds_write(uint8_t* imageData, const char* filename, uint32_t w, uint32_t h, IMG mode) {
+	DDS_HEADER header = DDS_HEADER();
+	header.dwSize = DDS_HEADER_SIZE;
+	header.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT;
+	header.dwHeight = h;
+	header.dwWidth = w;
+
+	header.ddspf.dwSize = DDS_HEADER_PFSIZE;
+
+	int final_image_size = 0;
+
+	switch (mode) {
+	case IMG::MODE_DXT1:
+		header.dwPitchOrLinearSize = SwapEndian(__max(1, ((w + 3) / 4)) * BLOCK_SIZE_DXT1);
+		header.ddspf.dwFlags |= DDPF_FOURCC;
+		header.ddspf.dwFourCC = SwapEndian((uint32_t)'DXT1');
+		header.dwFlags |= DDSD_LINEARSIZE;
+		
+		break;
+	case IMG::MODE_DXT5:
+		header.dwPitchOrLinearSize = __max(1, ((w + 3) / 4)) * BLOCK_SIZE_DXT5; 
+		
+		header.ddspf.dwFlags |= DDPF_FOURCC;
+		header.ddspf.dwFlags |= DDPF_ALPHA;
+		header.ddspf.dwFourCC = SwapEndian((uint32_t)'DXT5');
+		header.dwFlags |= DDSD_LINEARSIZE;
+		throw new std::exception("DXT5 Not implemented");
+		break;
+	case IMG::MODE_RGB888:
+		header.dwPitchOrLinearSize = w * (BBP_RGB888 / 8);
+		final_image_size = w * h * (BBP_RGB888 / 8);
+		
+		header.ddspf.dwFlags |= DDPF_RGB;
+		header.dwFlags |= DDSD_PITCH;
+		header.ddspf.dwRGBBitCount = BBP_RGB888;
+		header.ddspf.dwRBitMask = SwapEndian(0xff000000);
+		header.ddspf.dwGBitMask = SwapEndian(0x00ff0000);
+		header.ddspf.dwBBitMask = SwapEndian(0x0000ff00);
+		break;
+	case IMG::MODE_RGBA8888:
+		header.dwPitchOrLinearSize = w * (BBP_RGBA8888 / 8);
+		final_image_size = w * h * (BBP_RGBA8888 / 8);
+		
+		header.ddspf.dwFlags |= DDPF_RGB;
+		header.dwFlags |= DDSD_PITCH;
+		header.ddspf.dwFlags |= DDPF_ALPHA;
+		header.ddspf.dwRGBBitCount = BBP_RGBA8888;
+		header.ddspf.dwRBitMask = SwapEndian(0xff000000);
+		header.ddspf.dwGBitMask = SwapEndian(0x00ff0000);
+		header.ddspf.dwBBitMask = SwapEndian(0x0000ff00);
+		header.ddspf.dwABitMask = SwapEndian(0x000000ff);
+		throw new std::exception("RGBA8888 Not implemented");
+		break;
+	
+	default: return false; //Mode not supported
+	}
+
+	header.dwMipMapCount = 0;
+	header.dwCaps = DDSCAPS_TEXTURE;
+
+	// Magic number
+	uint32_t magic = DDS_MAGICNUM;
+	std::fstream output;
+	output.open(filename, std::ios::out | std::ios::binary);
+
+	output.write((char*)&magic, sizeof(uint32_t));
+	output.write((char*)&header, DDS_HEADER_SIZE);
+
+	if (mode == IMG::MODE_DXT1)
+	{
+		uint32_t size;
+		uint8_t* outputBuffer = compressImageDXT1(imageData, w, h, &size);
+		output.write((char*)outputBuffer, size);
+	}
+	else
+	{
+		output.write((char*)imageData, final_image_size);
+	}
+
+	output.close();
+	return true;
+}
\ No newline at end of file
diff --git a/MCDV/main.cpp b/MCDV/main.cpp
index fa39f73..66d5b94 100644
--- a/MCDV/main.cpp
+++ b/MCDV/main.cpp
@@ -25,6 +25,8 @@
 #define STBI_MSC_SECURE_CRT
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include "stb_image_write.h"
+//#include "stb_image.h"
+#include "dds.hpp"
 
 void render_to_png(int x, int y, const char* filepath){
 	void* data = malloc(4 * x * y);
@@ -38,6 +40,15 @@ void render_to_png(int x, int y, const char* filepath){
 }
 
 int main(int argc, char* argv[]) {
+	int _w, _h, _nrc;
+	unsigned char* data = stbi_load("textures/test.jpg", &_w, &_h, &_nrc, 3);
+	
+	dds_write(data, "output.dds", _w, _h, IMG::MODE_DXT1);
+
+	stbi_image_free(data);
+
+	return 0;
+
 	std::cout << "Initializing OpenGL\n";
 
 #pragma region init_opengl
diff --git a/MCDV/stb_dxt.h b/MCDV/stb_dxt.h
new file mode 100644
index 0000000..c887cc4
--- /dev/null
+++ b/MCDV/stb_dxt.h
@@ -0,0 +1,734 @@
+// stb_dxt.h - v1.08b - DXT1/DXT5 compressor - public domain
+// original by fabian "ryg" giesen - ported to C by stb
+// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
+//
+// USAGE:
+//   call stb_compress_dxt_block() for every block (you must pad)
+//     source should be a 4x4 block of RGBA data in row-major order;
+//     A is ignored if you specify alpha=0; you can turn on dithering
+//     and "high quality" using mode.
+//
+// version history:
+//   v1.08  - (sbt) fix bug in dxt-with-alpha block
+//   v1.07  - (stb) bc4; allow not using libc; add STB_DXT_STATIC
+//   v1.06  - (stb) fix to known-broken 1.05
+//   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
+//   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
+//            single color match fix (allow for inexact color interpolation);
+//            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
+//   v1.03  - (stb) endianness support
+//   v1.02  - (stb) fix alpha encoding bug
+//   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
+//   v1.00  - (stb) first release
+//
+// contributors: 
+//   Kevin Schmidt (#defines for "freestanding" compilation)
+//   github:ppiastucki (BC4 support)
+// 
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifndef STB_INCLUDE_STB_DXT_H
+#define STB_INCLUDE_STB_DXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_DXT_STATIC
+#define STBDDEF static
+#else
+#define STBDDEF extern
+#endif
+
+	// compression mode (bitflags)
+#define STB_DXT_NORMAL    0
+#define STB_DXT_DITHER    1   // use dithering. dubious win. never use for normal maps and the like!
+#define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
+
+	STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
+	STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
+	STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
+
+#define STB_COMPRESS_DXT_BLOCK
+
+#ifdef __cplusplus
+}
+#endif
+#endif // STB_INCLUDE_STB_DXT_H
+
+#ifdef STB_DXT_IMPLEMENTATION
+
+// configuration options for DXT encoder. set them in the project/makefile or just define
+// them at the top.
+
+// STB_DXT_USE_ROUNDING_BIAS
+//     use a rounding bias during color interpolation. this is closer to what "ideal"
+//     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
+//     implicitly had this turned on. 
+//
+//     in case you're targeting a specific type of hardware (e.g. console programmers):
+//     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
+//     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
+//     you also see "(a*5 + b*3) / 8" on some old GPU designs.
+// #define STB_DXT_USE_ROUNDING_BIAS
+
+#include <stdlib.h>
+
+#if !defined(STBD_ABS) || !defined(STBI_FABS)
+#include <math.h>
+#endif
+
+#ifndef STBD_ABS
+#define STBD_ABS(i)           abs(i)
+#endif
+
+#ifndef STBD_FABS
+#define STBD_FABS(x)          fabs(x)
+#endif
+
+#ifndef STBD_MEMSET
+#include <string.h>
+#define STBD_MEMSET           memset
+#endif
+
+static unsigned char stb__Expand5[32];
+static unsigned char stb__Expand6[64];
+static unsigned char stb__OMatch5[256][2];
+static unsigned char stb__OMatch6[256][2];
+static unsigned char stb__QuantRBTab[256 + 16];
+static unsigned char stb__QuantGTab[256 + 16];
+
+static int stb__Mul8Bit(int a, int b)
+{
+	int t = a * b + 128;
+	return (t + (t >> 8)) >> 8;
+}
+
+static void stb__From16Bit(unsigned char *out, unsigned short v)
+{
+	int rv = (v & 0xf800) >> 11;
+	int gv = (v & 0x07e0) >> 5;
+	int bv = (v & 0x001f) >> 0;
+
+	out[0] = stb__Expand5[rv];
+	out[1] = stb__Expand6[gv];
+	out[2] = stb__Expand5[bv];
+	out[3] = 0;
+}
+
+static unsigned short stb__As16Bit(int r, int g, int b)
+{
+	return (stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) + stb__Mul8Bit(b, 31);
+}
+
+// linear interpolation at 1/3 point between a and b, using desired rounding type
+static int stb__Lerp13(int a, int b)
+{
+#ifdef STB_DXT_USE_ROUNDING_BIAS
+	// with rounding bias
+	return a + stb__Mul8Bit(b - a, 0x55);
+#else
+	// without rounding bias
+	// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
+	return (2 * a + b) / 3;
+#endif
+}
+
+// lerp RGB color
+static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
+{
+	out[0] = stb__Lerp13(p1[0], p2[0]);
+	out[1] = stb__Lerp13(p1[1], p2[1]);
+	out[2] = stb__Lerp13(p1[2], p2[2]);
+}
+
+/****************************************************************************/
+
+// compute table to reproduce constant colors as accurately as possible
+static void stb__PrepareOptTable(unsigned char *Table, const unsigned char *expand, int size)
+{
+	int i, mn, mx;
+	for (i = 0; i<256; i++) {
+		int bestErr = 256;
+		for (mn = 0; mn<size; mn++) {
+			for (mx = 0; mx<size; mx++) {
+				int mine = expand[mn];
+				int maxe = expand[mx];
+				int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
+
+				// DX10 spec says that interpolation must be within 3% of "correct" result,
+				// add this as error term. (normally we'd expect a random distribution of
+				// +-1.5% error, but nowhere in the spec does it say that the error has to be
+				// unbiased - better safe than sorry).
+				err += STBD_ABS(maxe - mine) * 3 / 100;
+
+				if (err < bestErr)
+				{
+					Table[i * 2 + 0] = mx;
+					Table[i * 2 + 1] = mn;
+					bestErr = err;
+				}
+			}
+		}
+	}
+}
+
+static void stb__EvalColors(unsigned char *color, unsigned short c0, unsigned short c1)
+{
+	stb__From16Bit(color + 0, c0);
+	stb__From16Bit(color + 4, c1);
+	stb__Lerp13RGB(color + 8, color + 0, color + 4);
+	stb__Lerp13RGB(color + 12, color + 4, color + 0);
+}
+
+// Block dithering function. Simply dithers a block to 565 RGB.
+// (Floyd-Steinberg)
+static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
+{
+	int err[8], *ep1 = err, *ep2 = err + 4, *et;
+	int ch, y;
+
+	// process channels separately
+	for (ch = 0; ch<3; ++ch) {
+		unsigned char *bp = block + ch, *dp = dest + ch;
+		unsigned char *quant = (ch == 1) ? stb__QuantGTab + 8 : stb__QuantRBTab + 8;
+		STBD_MEMSET(err, 0, sizeof(err));
+		for (y = 0; y<4; ++y) {
+			dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)];
+			ep1[0] = bp[0] - dp[0];
+			dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)];
+			ep1[1] = bp[4] - dp[4];
+			dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)];
+			ep1[2] = bp[8] - dp[8];
+			dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)];
+			ep1[3] = bp[12] - dp[12];
+			bp += 16;
+			dp += 16;
+			et = ep1, ep1 = ep2, ep2 = et; // swap
+		}
+	}
+}
+
+// The color matching function
+static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color, int dither)
+{
+	unsigned int mask = 0;
+	int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
+	int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
+	int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
+	int dots[16];
+	int stops[4];
+	int i;
+	int c0Point, halfPoint, c3Point;
+
+	for (i = 0; i<16; i++)
+		dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
+
+	for (i = 0; i<4; i++)
+		stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
+
+	// think of the colors as arranged on a line; project point onto that line, then choose
+	// next color out of available ones. we compute the crossover points for "best color in top
+	// half"/"best in bottom half" and then the same inside that subinterval.
+	//
+	// relying on this 1d approximation isn't always optimal in terms of euclidean distance,
+	// but it's very close and a lot faster.
+	// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
+
+	c0Point = (stops[1] + stops[3]) >> 1;
+	halfPoint = (stops[3] + stops[2]) >> 1;
+	c3Point = (stops[2] + stops[0]) >> 1;
+
+	if (!dither) {
+		// the version without dithering is straightforward
+		for (i = 15; i >= 0; i--) {
+			int dot = dots[i];
+			mask <<= 2;
+
+			if (dot < halfPoint)
+				mask |= (dot < c0Point) ? 1 : 3;
+			else
+				mask |= (dot < c3Point) ? 2 : 0;
+		}
+	}
+	else {
+		// with floyd-steinberg dithering
+		int err[8], *ep1 = err, *ep2 = err + 4;
+		int *dp = dots, y;
+
+		c0Point <<= 4;
+		halfPoint <<= 4;
+		c3Point <<= 4;
+		for (i = 0; i<8; i++)
+			err[i] = 0;
+
+		for (y = 0; y<4; y++)
+		{
+			int dot, lmask, step;
+
+			dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]);
+			if (dot < halfPoint)
+				step = (dot < c0Point) ? 1 : 3;
+			else
+				step = (dot < c3Point) ? 2 : 0;
+			ep1[0] = dp[0] - stops[step];
+			lmask = step;
+
+			dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]);
+			if (dot < halfPoint)
+				step = (dot < c0Point) ? 1 : 3;
+			else
+				step = (dot < c3Point) ? 2 : 0;
+			ep1[1] = dp[1] - stops[step];
+			lmask |= step << 2;
+
+			dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]);
+			if (dot < halfPoint)
+				step = (dot < c0Point) ? 1 : 3;
+			else
+				step = (dot < c3Point) ? 2 : 0;
+			ep1[2] = dp[2] - stops[step];
+			lmask |= step << 4;
+
+			dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]);
+			if (dot < halfPoint)
+				step = (dot < c0Point) ? 1 : 3;
+			else
+				step = (dot < c3Point) ? 2 : 0;
+			ep1[3] = dp[3] - stops[step];
+			lmask |= step << 6;
+
+			dp += 4;
+			mask |= lmask << (y * 8);
+			{ int *et = ep1; ep1 = ep2; ep2 = et; } // swap
+		}
+	}
+
+	return mask;
+}
+
+// The color optimization function. (Clever code, part 1)
+static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
+{
+	int mind = 0x7fffffff, maxd = -0x7fffffff;
+	unsigned char *minp, *maxp;
+	double magn;
+	int v_r, v_g, v_b;
+	static const int nIterPower = 4;
+	float covf[6], vfr, vfg, vfb;
+
+	// determine color distribution
+	int cov[6];
+	int mu[3], min[3], max[3];
+	int ch, i, iter;
+
+	for (ch = 0; ch<3; ch++)
+	{
+		const unsigned char *bp = ((const unsigned char *)block) + ch;
+		int muv, minv, maxv;
+
+		muv = minv = maxv = bp[0];
+		for (i = 4; i<64; i += 4)
+		{
+			muv += bp[i];
+			if (bp[i] < minv) minv = bp[i];
+			else if (bp[i] > maxv) maxv = bp[i];
+		}
+
+		mu[ch] = (muv + 8) >> 4;
+		min[ch] = minv;
+		max[ch] = maxv;
+	}
+
+	// determine covariance matrix
+	for (i = 0; i<6; i++)
+		cov[i] = 0;
+
+	for (i = 0; i<16; i++)
+	{
+		int r = block[i * 4 + 0] - mu[0];
+		int g = block[i * 4 + 1] - mu[1];
+		int b = block[i * 4 + 2] - mu[2];
+
+		cov[0] += r * r;
+		cov[1] += r * g;
+		cov[2] += r * b;
+		cov[3] += g * g;
+		cov[4] += g * b;
+		cov[5] += b * b;
+	}
+
+	// convert covariance matrix to float, find principal axis via power iter
+	for (i = 0; i<6; i++)
+		covf[i] = cov[i] / 255.0f;
+
+	vfr = (float)(max[0] - min[0]);
+	vfg = (float)(max[1] - min[1]);
+	vfb = (float)(max[2] - min[2]);
+
+	for (iter = 0; iter<nIterPower; iter++)
+	{
+		float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
+		float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
+		float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
+
+		vfr = r;
+		vfg = g;
+		vfb = b;
+	}
+
+	magn = STBD_FABS(vfr);
+	if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
+	if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
+
+	if (magn < 4.0f) { // too small, default to luminance
+		v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
+		v_g = 587;
+		v_b = 114;
+	}
+	else {
+		magn = 512.0 / magn;
+		v_r = (int)(vfr * magn);
+		v_g = (int)(vfg * magn);
+		v_b = (int)(vfb * magn);
+	}
+
+	// Pick colors at extreme points
+	for (i = 0; i<16; i++)
+	{
+		int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
+
+		if (dot < mind) {
+			mind = dot;
+			minp = block + i * 4;
+		}
+
+		if (dot > maxd) {
+			maxd = dot;
+			maxp = block + i * 4;
+		}
+	}
+
+	*pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
+	*pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
+}
+
+static int stb__sclamp(float y, int p0, int p1)
+{
+	int x = (int)y;
+	if (x < p0) return p0;
+	if (x > p1) return p1;
+	return x;
+}
+
+// The refinement function. (Clever code, part 2)
+// Tries to optimize colors to suit block contents better.
+// (By solving a least squares system via normal equations+Cramer's rule)
+static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
+{
+	static const int w1Tab[4] = { 3,0,2,1 };
+	static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
+	// ^some magic to save a lot of multiplies in the accumulating loop...
+	// (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
+
+	float frb, fg;
+	unsigned short oldMin, oldMax, min16, max16;
+	int i, akku = 0, xx, xy, yy;
+	int At1_r, At1_g, At1_b;
+	int At2_r, At2_g, At2_b;
+	unsigned int cm = mask;
+
+	oldMin = *pmin16;
+	oldMax = *pmax16;
+
+	if ((mask ^ (mask << 2)) < 4) // all pixels have the same index?
+	{
+		// yes, linear system would be singular; solve using optimal
+		// single-color match on average color
+		int r = 8, g = 8, b = 8;
+		for (i = 0; i<16; ++i) {
+			r += block[i * 4 + 0];
+			g += block[i * 4 + 1];
+			b += block[i * 4 + 2];
+		}
+
+		r >>= 4; g >>= 4; b >>= 4;
+
+		max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0];
+		min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1];
+	}
+	else {
+		At1_r = At1_g = At1_b = 0;
+		At2_r = At2_g = At2_b = 0;
+		for (i = 0; i<16; ++i, cm >>= 2) {
+			int step = cm & 3;
+			int w1 = w1Tab[step];
+			int r = block[i * 4 + 0];
+			int g = block[i * 4 + 1];
+			int b = block[i * 4 + 2];
+
+			akku += prods[step];
+			At1_r += w1 * r;
+			At1_g += w1 * g;
+			At1_b += w1 * b;
+			At2_r += r;
+			At2_g += g;
+			At2_b += b;
+		}
+
+		At2_r = 3 * At2_r - At1_r;
+		At2_g = 3 * At2_g - At1_g;
+		At2_b = 3 * At2_b - At1_b;
+
+		// extract solutions and decide solvability
+		xx = akku >> 16;
+		yy = (akku >> 8) & 0xff;
+		xy = (akku >> 0) & 0xff;
+
+		frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy * xy);
+		fg = frb * 63.0f / 31.0f;
+
+		// solve.
+		max16 = stb__sclamp((At1_r*yy - At2_r * xy)*frb + 0.5f, 0, 31) << 11;
+		max16 |= stb__sclamp((At1_g*yy - At2_g * xy)*fg + 0.5f, 0, 63) << 5;
+		max16 |= stb__sclamp((At1_b*yy - At2_b * xy)*frb + 0.5f, 0, 31) << 0;
+
+		min16 = stb__sclamp((At2_r*xx - At1_r * xy)*frb + 0.5f, 0, 31) << 11;
+		min16 |= stb__sclamp((At2_g*xx - At1_g * xy)*fg + 0.5f, 0, 63) << 5;
+		min16 |= stb__sclamp((At2_b*xx - At1_b * xy)*frb + 0.5f, 0, 31) << 0;
+	}
+
+	*pmin16 = min16;
+	*pmax16 = max16;
+	return oldMin != min16 || oldMax != max16;
+}
+
+// Color block compression
+static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
+{
+	unsigned int mask;
+	int i;
+	int dither;
+	int refinecount;
+	unsigned short max16, min16;
+	unsigned char dblock[16 * 4], color[4 * 4];
+
+	dither = mode & STB_DXT_DITHER;
+	refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
+
+	// check if block is constant
+	for (i = 1; i<16; i++)
+		if (((unsigned int *)block)[i] != ((unsigned int *)block)[0])
+			break;
+
+	if (i == 16) { // constant color
+		int r = block[0], g = block[1], b = block[2];
+		mask = 0xaaaaaaaa;
+		max16 = (stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) | stb__OMatch5[b][0];
+		min16 = (stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) | stb__OMatch5[b][1];
+	}
+	else {
+		// first step: compute dithered version for PCA if desired
+		if (dither)
+			stb__DitherBlock(dblock, block);
+
+		// second step: pca+map along principal axis
+		stb__OptimizeColorsBlock(dither ? dblock : block, &max16, &min16);
+		if (max16 != min16) {
+			stb__EvalColors(color, max16, min16);
+			mask = stb__MatchColorsBlock(block, color, dither);
+		}
+		else
+			mask = 0;
+
+		// third step: refine (multiple times if requested)
+		for (i = 0; i<refinecount; i++) {
+			unsigned int lastmask = mask;
+
+			if (stb__RefineBlock(dither ? dblock : block, &max16, &min16, mask)) {
+				if (max16 != min16) {
+					stb__EvalColors(color, max16, min16);
+					mask = stb__MatchColorsBlock(block, color, dither);
+				}
+				else {
+					mask = 0;
+					break;
+				}
+			}
+
+			if (mask == lastmask)
+				break;
+		}
+	}
+
+	// write the color block
+	if (max16 < min16)
+	{
+		unsigned short t = min16;
+		min16 = max16;
+		max16 = t;
+		mask ^= 0x55555555;
+	}
+
+	dest[0] = (unsigned char)(max16);
+	dest[1] = (unsigned char)(max16 >> 8);
+	dest[2] = (unsigned char)(min16);
+	dest[3] = (unsigned char)(min16 >> 8);
+	dest[4] = (unsigned char)(mask);
+	dest[5] = (unsigned char)(mask >> 8);
+	dest[6] = (unsigned char)(mask >> 16);
+	dest[7] = (unsigned char)(mask >> 24);
+}
+
+// Alpha block compression (this is easy for a change)
+static void stb__CompressAlphaBlock(unsigned char *dest, unsigned char *src, int stride)
+{
+	int i, dist, bias, dist4, dist2, bits, mask;
+
+	// find min/max color
+	int mn, mx;
+	mn = mx = src[0];
+
+	for (i = 1; i<16; i++)
+	{
+		if (src[i*stride] < mn) mn = src[i*stride];
+		else if (src[i*stride] > mx) mx = src[i*stride];
+	}
+
+	// encode them
+	((unsigned char *)dest)[0] = mx;
+	((unsigned char *)dest)[1] = mn;
+	dest += 2;
+
+	// determine bias and emit color indices
+	// given the choice of mx/mn, these indices are optimal:
+	// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
+	dist = mx - mn;
+	dist4 = dist * 4;
+	dist2 = dist * 2;
+	bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
+	bias -= mn * 7;
+	bits = 0, mask = 0;
+
+	for (i = 0; i<16; i++) {
+		int a = src[i*stride] * 7 + bias;
+		int ind, t;
+
+		// select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
+		t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
+		t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
+		ind += (a >= dist);
+
+		// turn linear scale into DXT index (0/1 are extremal pts)
+		ind = -ind & 7;
+		ind ^= (2 > ind);
+
+		// write index
+		mask |= ind << bits;
+		if ((bits += 3) >= 8) {
+			*dest++ = mask;
+			mask >>= 8;
+			bits -= 8;
+		}
+	}
+}
+
+static void stb__InitDXT()
+{
+	int i;
+	for (i = 0; i<32; i++)
+		stb__Expand5[i] = (i << 3) | (i >> 2);
+
+	for (i = 0; i<64; i++)
+		stb__Expand6[i] = (i << 2) | (i >> 4);
+
+	for (i = 0; i<256 + 16; i++)
+	{
+		int v = i - 8 < 0 ? 0 : i - 8 > 255 ? 255 : i - 8;
+		stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v, 31)];
+		stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v, 63)];
+	}
+
+	stb__PrepareOptTable(&stb__OMatch5[0][0], stb__Expand5, 32);
+	stb__PrepareOptTable(&stb__OMatch6[0][0], stb__Expand6, 64);
+}
+
+void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
+{
+	unsigned char data[16][4];
+	static int init = 1;
+	if (init) {
+		stb__InitDXT();
+		init = 0;
+	}
+
+	if (alpha) {
+		int i;
+		stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4);
+		dest += 8;
+		// make a new copy of the data in which alpha is opaque,
+		// because code uses a fast test for color constancy
+		memcpy(data, src, 4 * 16);
+		for (i = 0; i < 16; ++i)
+			data[i][3] = 255;
+		src = &data[0][0];
+	}
+
+	stb__CompressColorBlock(dest, (unsigned char*)src, mode);
+}
+
+void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
+{
+	stb__CompressAlphaBlock(dest, (unsigned char*)src, 1);
+}
+
+void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
+{
+	stb__CompressAlphaBlock(dest, (unsigned char*)src, 2);
+	stb__CompressAlphaBlock(dest + 8, (unsigned char*)src + 1, 2);
+}
+#endif // STB_DXT_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
\ No newline at end of file
-- 
2.25.1