MCDV/stb_image.h

   1 /* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h
   2 no warranty implied; use at your own risk
   3
   4 Do this:
   5 #define STB_IMAGE_IMPLEMENTATION
   6 before you include this file in *one* C or C++ file to create the implementation.
   7
   8 // i.e. it should look like this:
   9 #include ...
  10 #include ...
  11 #include ...
  12 #define STB_IMAGE_IMPLEMENTATION
  13 #include "stb_image.h"
  14
  15 You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
  16 And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
  17
  18
  19 QUICK NOTES:
  20 Primarily of interest to game developers and other people who can
  21 avoid problematic images and only need the trivial interface
  22
  23 JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
  24 PNG 1/2/4/8/16-bit-per-channel
  25
  26 TGA (not sure what subset, if a subset)
  27 BMP non-1bpp, non-RLE
  28 PSD (composited view only, no extra channels, 8/16 bit-per-channel)
  29
  30 GIF (*comp always reports as 4-channel)
  31 HDR (radiance rgbE format)
  32 PIC (Softimage PIC)
  33 PNM (PPM and PGM binary only)
  34
  35 Animated GIF still needs a proper API, but here's one way to do it:
  36 http://gist.github.com/urraka/685d9a6340b26b830d49
  37
  38 - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
  39 - decode from arbitrary I/O callbacks
  40 - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
  41
  42 Full documentation under "DOCUMENTATION" below.
  43
  44
  45 LICENSE
  46
  47 See end of file for license information.
  48
  49 RECENT REVISION HISTORY:
  50
  51 2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
  52 2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
  53 2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
  54 2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
  55 2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
  56 2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
  57 RGB-format JPEG; remove white matting in PSD;
  58 allocate large structures on the stack;
  59 correct channel count for PNG & BMP
  60 2.10  (2016-01-22) avoid warning introduced in 2.09
  61 2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
  62
  63 See end of file for full revision history.
  64
  65
  66 ============================    Contributors    =========================
  67
  68 Image formats                          Extensions, features
  69 Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
  70 Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
  71 Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
  72 Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
  73 Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
  74 Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
  75 Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
  76 github:urraka (animated gif)           Junggon Kim (PNM comments)
  77 Daniel Gibson (16-bit TGA)
  78 socks-the-fox (16-bit PNG)
  79 Jeremy Sawicki (handle all ImageNet JPGs)
  80 Optimizations & bugfixes
  81 Fabian "ryg" Giesen
  82 Arseny Kapoulkine
  83 John-Mark Allen
  84
  85 Bug & warning fixes
  86 Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
  87 Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
  88 Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
  89 Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
  90 the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
  91 Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
  92 Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
  93 Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
  94 Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
  95 Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
  96 Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
  97 Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
  98 Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
  99 Christian Floisand      Kevin Schmidt                         github:poppolopoppo
 100 */
 101
 102 #ifndef STBI_INCLUDE_STB_IMAGE_H
 103 #define STBI_INCLUDE_STB_IMAGE_H
 104
 105 // DOCUMENTATION
 106 //
 107 // Limitations:
 108 //    - no 16-bit-per-channel PNG
 109 //    - no 12-bit-per-channel JPEG
 110 //    - no JPEGs with arithmetic coding
 111 //    - no 1-bit BMP
 112 //    - GIF always returns *comp=4
 113 //
 114 // Basic usage (see HDR discussion below for HDR usage):
 115 //    int x,y,n;
 116 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
 117 //    // ... process data if not NULL ...
 118 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
 119 //    // ... replace '0' with '1'..'4' to force that many components per pixel
 120 //    // ... but 'n' will always be the number that it would have been if you said 0
 121 //    stbi_image_free(data)
 122 //
 123 // Standard parameters:
 124 //    int *x                 -- outputs image width in pixels
 125 //    int *y                 -- outputs image height in pixels
 126 //    int *channels_in_file  -- outputs # of image components in image file
 127 //    int desired_channels   -- if non-zero, # of image components requested in result
 128 //
 129 // The return value from an image loader is an 'unsigned char *' which points
 130 // to the pixel data, or NULL on an allocation failure or if the image is
 131 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
 132 // with each pixel consisting of N interleaved 8-bit components; the first
 133 // pixel pointed to is top-left-most in the image. There is no padding between
 134 // image scanlines or between pixels, regardless of format. The number of
 135 // components N is 'desired_channels' if desired_channels is non-zero, or
 136 // *channels_in_file otherwise. If desired_channels is non-zero,
 137 // *channels_in_file has the number of components that _would_ have been
 138 // output otherwise. E.g. if you set desired_channels to 4, you will always
 139 // get RGBA output, but you can check *channels_in_file to see if it's trivially
 140 // opaque because e.g. there were only 3 channels in the source image.
 141 //
 142 // An output image with N components has the following components interleaved
 143 // in this order in each pixel:
 144 //
 145 //     N=#comp     components
 146 //       1           grey
 147 //       2           grey, alpha
 148 //       3           red, green, blue
 149 //       4           red, green, blue, alpha
 150 //
 151 // If image loading fails for any reason, the return value will be NULL,
 152 // and *x, *y, *channels_in_file will be unchanged. The function
 153 // stbi_failure_reason() can be queried for an extremely brief, end-user
 154 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
 155 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
 156 // more user-friendly ones.
 157 //
 158 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 159 //
 160 // ===========================================================================
 161 //
 162 // Philosophy
 163 //
 164 // stb libraries are designed with the following priorities:
 165 //
 166 //    1. easy to use
 167 //    2. easy to maintain
 168 //    3. good performance
 169 //
 170 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
 171 // and for best performance I may provide less-easy-to-use APIs that give higher
 172 // performance, in addition to the easy to use ones. Nevertheless, it's important
 173 // to keep in mind that from the standpoint of you, a client of this library,
 174 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
 175 //
 176 // Some secondary priorities arise directly from the first two, some of which
 177 // make more explicit reasons why performance can't be emphasized.
 178 //
 179 //    - Portable ("ease of use")
 180 //    - Small source code footprint ("easy to maintain")
 181 //    - No dependencies ("ease of use")
 182 //
 183 // ===========================================================================
 184 //
 185 // I/O callbacks
 186 //
 187 // I/O callbacks allow you to read from arbitrary sources, like packaged
 188 // files or some other source. Data read from callbacks are processed
 189 // through a small internal buffer (currently 128 bytes) to try to reduce
 190 // overhead.
 191 //
 192 // The three functions you must define are "read" (reads some bytes of data),
 193 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
 194 //
 195 // ===========================================================================
 196 //
 197 // SIMD support
 198 //
 199 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
 200 // supported by the compiler. For ARM Neon support, you must explicitly
 201 // request it.
 202 //
 203 // (The old do-it-yourself SIMD API is no longer supported in the current
 204 // code.)
 205 //
 206 // On x86, SSE2 will automatically be used when available based on a run-time
 207 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
 208 // the typical path is to have separate builds for NEON and non-NEON devices
 209 // (at least this is true for iOS and Android). Therefore, the NEON support is
 210 // toggled by a build flag: define STBI_NEON to get NEON loops.
 211 //
 212 // If for some reason you do not want to use any of SIMD code, or if
 213 // you have issues compiling it, you can disable it entirely by
 214 // defining STBI_NO_SIMD.
 215 //
 216 // ===========================================================================
 217 //
 218 // HDR image support   (disable by defining STBI_NO_HDR)
 219 //
 220 // stb_image now supports loading HDR images in general, and currently
 221 // the Radiance .HDR file format, although the support is provided
 222 // generically. You can still load any file through the existing interface;
 223 // if you attempt to load an HDR file, it will be automatically remapped to
 224 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
 225 // both of these constants can be reconfigured through this interface:
 226 //
 227 //     stbi_hdr_to_ldr_gamma(2.2f);
 228 //     stbi_hdr_to_ldr_scale(1.0f);
 229 //
 230 // (note, do not use _inverse_ constants; stbi_image will invert them
 231 // appropriately).
 232 //
 233 // Additionally, there is a new, parallel interface for loading files as
 234 // (linear) floats to preserve the full dynamic range:
 235 //
 236 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
 237 //
 238 // If you load LDR images through this interface, those images will
 239 // be promoted to floating point values, run through the inverse of
 240 // constants corresponding to the above:
 241 //
 242 //     stbi_ldr_to_hdr_scale(1.0f);
 243 //     stbi_ldr_to_hdr_gamma(2.2f);
 244 //
 245 // Finally, given a filename (or an open file or memory block--see header
 246 // file for details) containing image data, you can query for the "most
 247 // appropriate" interface to use (that is, whether the image is HDR or
 248 // not), using:
 249 //
 250 //     stbi_is_hdr(char *filename);
 251 //
 252 // ===========================================================================
 253 //
 254 // iPhone PNG support:
 255 //
 256 // By default we convert iphone-formatted PNGs back to RGB, even though
 257 // they are internally encoded differently. You can disable this conversion
 258 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
 259 // you will always just get the native iphone "format" through (which
 260 // is BGR stored in RGB).
 261 //
 262 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 263 // pixel to remove any premultiplied alpha *only* if the image file explicitly
 264 // says there's premultiplied data (currently only happens in iPhone images,
 265 // and only if iPhone convert-to-rgb processing is on).
 266 //
 267 // ===========================================================================
 268 //
 269 // ADDITIONAL CONFIGURATION
 270 //
 271 //  - You can suppress implementation of any of the decoders to reduce
 272 //    your code footprint by #defining one or more of the following
 273 //    symbols before creating the implementation.
 274 //
 275 //        STBI_NO_JPEG
 276 //        STBI_NO_PNG
 277 //        STBI_NO_BMP
 278 //        STBI_NO_PSD
 279 //        STBI_NO_TGA
 280 //        STBI_NO_GIF
 281 //        STBI_NO_HDR
 282 //        STBI_NO_PIC
 283 //        STBI_NO_PNM   (.ppm and .pgm)
 284 //
 285 //  - You can request *only* certain decoders and suppress all other ones
 286 //    (this will be more forward-compatible, as addition of new decoders
 287 //    doesn't require you to disable them explicitly):
 288 //
 289 //        STBI_ONLY_JPEG
 290 //        STBI_ONLY_PNG
 291 //        STBI_ONLY_BMP
 292 //        STBI_ONLY_PSD
 293 //        STBI_ONLY_TGA
 294 //        STBI_ONLY_GIF
 295 //        STBI_ONLY_HDR
 296 //        STBI_ONLY_PIC
 297 //        STBI_ONLY_PNM   (.ppm and .pgm)
 298 //
 299 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
 300 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
 301 //
 302
 303
 304 #ifndef STBI_NO_STDIO
 305 #include <stdio.h>
 306 #endif // STBI_NO_STDIO
 307
 308 #define STBI_VERSION 1
 309
 310 enum
 311 {
 312         STBI_default = 0, // only used for desired_channels
 313
 314         STBI_grey = 1,
 315         STBI_grey_alpha = 2,
 316         STBI_rgb = 3,
 317         STBI_rgb_alpha = 4
 318 };
 319
 320 typedef unsigned char stbi_uc;
 321 typedef unsigned short stbi_us;
 322
 323 #ifdef __cplusplus
 324 extern "C" {
 325 #endif
 326
 327 #ifdef STB_IMAGE_STATIC
 328 #define STBIDEF static
 329 #else
 330 #define STBIDEF extern
 331 #endif
 332
 333         //////////////////////////////////////////////////////////////////////////////
 334         //
 335         // PRIMARY API - works on images of any type
 336         //
 337
 338         //
 339         // load image by filename, open file, or memory buffer
 340         //
 341
 342         typedef struct
 343         {
 344                 int(*read)  (void *user, char *data, int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
 345                 void(*skip)  (void *user, int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
 346                 int(*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 347         } stbi_io_callbacks;
 348
 349         ////////////////////////////////////
 350         //
 351         // 8-bits-per-channel interface
 352         //
 353
 354         STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc           const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
 355         STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 356
 357 #ifndef STBI_NO_STDIO
 358         STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 359         STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 360         // for stbi_load_from_file, file pointer is left pointing immediately after image
 361 #endif
 362
 363         ////////////////////////////////////
 364         //
 365         // 16-bits-per-channel interface
 366         //
 367
 368         STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
 369         STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 370
 371 #ifndef STBI_NO_STDIO
 372         STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 373         STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 374 #endif
 375
 376         ////////////////////////////////////
 377         //
 378         // float-per-channel interface
 379         //
 380 #ifndef STBI_NO_LINEAR
 381         STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
 382         STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 383
 384 #ifndef STBI_NO_STDIO
 385         STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 386         STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 387 #endif
 388 #endif
 389
 390 #ifndef STBI_NO_HDR
 391         STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
 392         STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
 393 #endif // STBI_NO_HDR
 394
 395 #ifndef STBI_NO_LINEAR
 396         STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
 397         STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
 398 #endif // STBI_NO_LINEAR
 399
 400         // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
 401         STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
 402         STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
 403 #ifndef STBI_NO_STDIO
 404         STBIDEF int      stbi_is_hdr(char const *filename);
 405         STBIDEF int      stbi_is_hdr_from_file(FILE *f);
 406 #endif // STBI_NO_STDIO
 407
 408
 409         // get a VERY brief reason for failure
 410         // NOT THREADSAFE
 411         STBIDEF const char *stbi_failure_reason(void);
 412
 413         // free the loaded image -- this is just free()
 414         STBIDEF void     stbi_image_free(void *retval_from_stbi_load);
 415
 416         // get image dimensions & components without fully decoding
 417         STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 418         STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
 419
 420 #ifndef STBI_NO_STDIO
 421         STBIDEF int      stbi_info(char const *filename, int *x, int *y, int *comp);
 422         STBIDEF int      stbi_info_from_file(FILE *f, int *x, int *y, int *comp);
 423
 424 #endif
 425
 426
 427
 428         // for image formats that explicitly notate that they have premultiplied alpha,
 429         // we just return the colors as stored in the file. set this flag to force
 430         // unpremultiplication. results are undefined if the unpremultiply overflow.
 431         STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
 432
 433         // indicate whether we should process iphone images back to canonical format,
 434         // or just pass them through "as-is"
 435         STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
 436
 437         // flip the image vertically, so the first pixel in the output array is the bottom left
 438         STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
 439
 440         // ZLIB client - used by PNG, available for other purposes
 441
 442         STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
 443         STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
 444         STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
 445         STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 446
 447         STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
 448         STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 449
 450
 451 #ifdef __cplusplus
 452 }
 453 #endif
 454
 455 //
 456 //
 457 ////   end header file   /////////////////////////////////////////////////////
 458 #endif // STBI_INCLUDE_STB_IMAGE_H
 459
 460 #ifdef STB_IMAGE_IMPLEMENTATION
 461
 462 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
 463   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
 464   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
 465   || defined(STBI_ONLY_ZLIB)
 466 #ifndef STBI_ONLY_JPEG
 467 #define STBI_NO_JPEG
 468 #endif
 469 #ifndef STBI_ONLY_PNG
 470 #define STBI_NO_PNG
 471 #endif
 472 #ifndef STBI_ONLY_BMP
 473 #define STBI_NO_BMP
 474 #endif
 475 #ifndef STBI_ONLY_PSD
 476 #define STBI_NO_PSD
 477 #endif
 478 #ifndef STBI_ONLY_TGA
 479 #define STBI_NO_TGA
 480 #endif
 481 #ifndef STBI_ONLY_GIF
 482 #define STBI_NO_GIF
 483 #endif
 484 #ifndef STBI_ONLY_HDR
 485 #define STBI_NO_HDR
 486 #endif
 487 #ifndef STBI_ONLY_PIC
 488 #define STBI_NO_PIC
 489 #endif
 490 #ifndef STBI_ONLY_PNM
 491 #define STBI_NO_PNM
 492 #endif
 493 #endif
 494
 495 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
 496 #define STBI_NO_ZLIB
 497 #endif
 498
 499
 500 #include <stdarg.h>
 501 #include <stddef.h> // ptrdiff_t on osx
 502 #include <stdlib.h>
 503 #include <string.h>
 504 #include <limits.h>
 505
 506 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
 507 #include <math.h>  // ldexp
 508 #endif
 509
 510 #ifndef STBI_NO_STDIO
 511 #include <stdio.h>
 512 #endif
 513
 514 #ifndef STBI_ASSERT
 515 #include <assert.h>
 516 #define STBI_ASSERT(x) assert(x)
 517 #endif
 518
 519
 520 #ifndef _MSC_VER
 521 #ifdef __cplusplus
 522 #define stbi_inline inline
 523 #else
 524 #define stbi_inline
 525 #endif
 526 #else
 527 #define stbi_inline __forceinline
 528 #endif
 529
 530
 531 #ifdef _MSC_VER
 532 typedef unsigned short stbi__uint16;
 533 typedef   signed short stbi__int16;
 534 typedef unsigned int   stbi__uint32;
 535 typedef   signed int   stbi__int32;
 536 #else
 537 #include <stdint.h>
 538 typedef uint16_t stbi__uint16;
 539 typedef int16_t  stbi__int16;
 540 typedef uint32_t stbi__uint32;
 541 typedef int32_t  stbi__int32;
 542 #endif
 543
 544 // should produce compiler error if size is wrong
 545 typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
 546
 547 #ifdef _MSC_VER
 548 #define STBI_NOTUSED(v)  (void)(v)
 549 #else
 550 #define STBI_NOTUSED(v)  (void)sizeof(v)
 551 #endif
 552
 553 #ifdef _MSC_VER
 554 #define STBI_HAS_LROTL
 555 #endif
 556
 557 #ifdef STBI_HAS_LROTL
 558 #define stbi_lrot(x,y)  _lrotl(x,y)
 559 #else
 560 #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
 561 #endif
 562
 563 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
 564 // ok
 565 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
 566 // ok
 567 #else
 568 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
 569 #endif
 570
 571 #ifndef STBI_MALLOC
 572 #define STBI_MALLOC(sz)           malloc(sz)
 573 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
 574 #define STBI_FREE(p)              free(p)
 575 #endif
 576
 577 #ifndef STBI_REALLOC_SIZED
 578 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
 579 #endif
 580
 581 // x86/x64 detection
 582 #if defined(__x86_64__) || defined(_M_X64)
 583 #define STBI__X64_TARGET
 584 #elif defined(__i386) || defined(_M_IX86)
 585 #define STBI__X86_TARGET
 586 #endif
 587
 588 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
 589 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
 590 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
 591 // but previous attempts to provide the SSE2 functions with runtime
 592 // detection caused numerous issues. The way architecture extensions are
 593 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
 594 // New behavior: if compiled with -msse2, we use SSE2 without any
 595 // detection; if not, we don't use it at all.
 596 #define STBI_NO_SIMD
 597 #endif
 598
 599 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
 600 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
 601 //
 602 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
 603 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
 604 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
 605 // simultaneously enabling "-mstackrealign".
 606 //
 607 // See https://github.com/nothings/stb/issues/81 for more information.
 608 //
 609 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
 610 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
 611 #define STBI_NO_SIMD
 612 #endif
 613
 614 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
 615 #define STBI_SSE2
 616 #include <emmintrin.h>
 617
 618 #ifdef _MSC_VER
 619
 620 #if _MSC_VER >= 1400  // not VC6
 621 #include <intrin.h> // __cpuid
 622 static int stbi__cpuid3(void)
 623 {
 624         int info[4];
 625         __cpuid(info, 1);
 626         return info[3];
 627 }
 628 #else
 629 static int stbi__cpuid3(void)
 630 {
 631         int res;
 632         __asm {
 633                 mov  eax, 1
 634                 cpuid
 635                 mov  res, edx
 636         }
 637         return res;
 638 }
 639 #endif
 640
 641 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 642
 643 static int stbi__sse2_available(void)
 644 {
 645         int info3 = stbi__cpuid3();
 646         return ((info3 >> 26) & 1) != 0;
 647 }
 648 #else // assume GCC-style if not VC++
 649 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 650
 651 static int stbi__sse2_available(void)
 652 {
 653         // If we're even attempting to compile this on GCC/Clang, that means
 654         // -msse2 is on, which means the compiler is allowed to use SSE2
 655         // instructions at will, and so are we.
 656         return 1;
 657 }
 658 #endif
 659 #endif
 660
 661 // ARM NEON
 662 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
 663 #undef STBI_NEON
 664 #endif
 665
 666 #ifdef STBI_NEON
 667 #include <arm_neon.h>
 668 // assume GCC or Clang on ARM targets
 669 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 670 #endif
 671
 672 #ifndef STBI_SIMD_ALIGN
 673 #define STBI_SIMD_ALIGN(type, name) type name
 674 #endif
 675
 676 ///////////////////////////////////////////////
 677 //
 678 //  stbi__context struct and start_xxx functions
 679
 680 // stbi__context structure is our basic context used by all images, so it
 681 // contains all the IO context, plus some basic image information
 682 typedef struct
 683 {
 684         stbi__uint32 img_x, img_y;
 685         int img_n, img_out_n;
 686
 687         stbi_io_callbacks io;
 688         void *io_user_data;
 689
 690         int read_from_callbacks;
 691         int buflen;
 692         stbi_uc buffer_start[128];
 693
 694         stbi_uc *img_buffer, *img_buffer_end;
 695         stbi_uc *img_buffer_original, *img_buffer_original_end;
 696 } stbi__context;
 697
 698
 699 static void stbi__refill_buffer(stbi__context *s);
 700
 701 // initialize a memory-decode context
 702 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
 703 {
 704         s->io.read = NULL;
 705         s->read_from_callbacks = 0;
 706         s->img_buffer = s->img_buffer_original = (stbi_uc *)buffer;
 707         s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *)buffer + len;
 708 }
 709
 710 // initialize a callback-based context
 711 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
 712 {
 713         s->io = *c;
 714         s->io_user_data = user;
 715         s->buflen = sizeof(s->buffer_start);
 716         s->read_from_callbacks = 1;
 717         s->img_buffer_original = s->buffer_start;
 718         stbi__refill_buffer(s);
 719         s->img_buffer_original_end = s->img_buffer_end;
 720 }
 721
 722 #ifndef STBI_NO_STDIO
 723
 724 static int stbi__stdio_read(void *user, char *data, int size)
 725 {
 726         return (int)fread(data, 1, size, (FILE*)user);
 727 }
 728
 729 static void stbi__stdio_skip(void *user, int n)
 730 {
 731         fseek((FILE*)user, n, SEEK_CUR);
 732 }
 733
 734 static int stbi__stdio_eof(void *user)
 735 {
 736         return feof((FILE*)user);
 737 }
 738
 739 static stbi_io_callbacks stbi__stdio_callbacks =
 740 {
 741         stbi__stdio_read,
 742         stbi__stdio_skip,
 743         stbi__stdio_eof,
 744 };
 745
 746 static void stbi__start_file(stbi__context *s, FILE *f)
 747 {
 748         stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *)f);
 749 }
 750
 751 //static void stop_file(stbi__context *s) { }
 752
 753 #endif // !STBI_NO_STDIO
 754
 755 static void stbi__rewind(stbi__context *s)
 756 {
 757         // conceptually rewind SHOULD rewind to the beginning of the stream,
 758         // but we just rewind to the beginning of the initial buffer, because
 759         // we only use it after doing 'test', which only ever looks at at most 92 bytes
 760         s->img_buffer = s->img_buffer_original;
 761         s->img_buffer_end = s->img_buffer_original_end;
 762 }
 763
 764 enum
 765 {
 766         STBI_ORDER_RGB,
 767         STBI_ORDER_BGR
 768 };
 769
 770 typedef struct
 771 {
 772         int bits_per_channel;
 773         int num_channels;
 774         int channel_order;
 775 } stbi__result_info;
 776
 777 #ifndef STBI_NO_JPEG
 778 static int      stbi__jpeg_test(stbi__context *s);
 779 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 780 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
 781 #endif
 782
 783 #ifndef STBI_NO_PNG
 784 static int      stbi__png_test(stbi__context *s);
 785 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 786 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
 787 #endif
 788
 789 #ifndef STBI_NO_BMP
 790 static int      stbi__bmp_test(stbi__context *s);
 791 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 792 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
 793 #endif
 794
 795 #ifndef STBI_NO_TGA
 796 static int      stbi__tga_test(stbi__context *s);
 797 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 798 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 799 #endif
 800
 801 #ifndef STBI_NO_PSD
 802 static int      stbi__psd_test(stbi__context *s);
 803 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
 804 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
 805 #endif
 806
 807 #ifndef STBI_NO_HDR
 808 static int      stbi__hdr_test(stbi__context *s);
 809 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 810 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
 811 #endif
 812
 813 #ifndef STBI_NO_PIC
 814 static int      stbi__pic_test(stbi__context *s);
 815 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 816 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
 817 #endif
 818
 819 #ifndef STBI_NO_GIF
 820 static int      stbi__gif_test(stbi__context *s);
 821 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 822 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 823 #endif
 824
 825 #ifndef STBI_NO_PNM
 826 static int      stbi__pnm_test(stbi__context *s);
 827 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 828 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
 829 #endif
 830
 831 // this is not threadsafe
 832 static const char *stbi__g_failure_reason;
 833
 834 STBIDEF const char *stbi_failure_reason(void)
 835 {
 836         return stbi__g_failure_reason;
 837 }
 838
 839 static int stbi__err(const char *str)
 840 {
 841         stbi__g_failure_reason = str;
 842         return 0;
 843 }
 844
 845 static void *stbi__malloc(size_t size)
 846 {
 847         return STBI_MALLOC(size);
 848 }
 849
 850 // stb_image uses ints pervasively, including for offset calculations.
 851 // therefore the largest decoded image size we can support with the
 852 // current code, even on 64-bit targets, is INT_MAX. this is not a
 853 // significant limitation for the intended use case.
 854 //
 855 // we do, however, need to make sure our size calculations don't
 856 // overflow. hence a few helper functions for size calculations that
 857 // multiply integers together, making sure that they're non-negative
 858 // and no overflow occurs.
 859
 860 // return 1 if the sum is valid, 0 on overflow.
 861 // negative terms are considered invalid.
 862 static int stbi__addsizes_valid(int a, int b)
 863 {
 864         if (b < 0) return 0;
 865         // now 0 <= b <= INT_MAX, hence also
 866         // 0 <= INT_MAX - b <= INTMAX.
 867         // And "a + b <= INT_MAX" (which might overflow) is the
 868         // same as a <= INT_MAX - b (no overflow)
 869         return a <= INT_MAX - b;
 870 }
 871
 872 // returns 1 if the product is valid, 0 on overflow.
 873 // negative factors are considered invalid.
 874 static int stbi__mul2sizes_valid(int a, int b)
 875 {
 876         if (a < 0 || b < 0) return 0;
 877         if (b == 0) return 1; // mul-by-0 is always safe
 878                                                   // portable way to check for no overflows in a*b
 879         return a <= INT_MAX / b;
 880 }
 881
 882 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
 883 static int stbi__mad2sizes_valid(int a, int b, int add)
 884 {
 885         return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
 886 }
 887
 888 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
 889 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
 890 {
 891         return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
 892                 stbi__addsizes_valid(a*b*c, add);
 893 }
 894
 895 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
 896 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
 897 {
 898         return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
 899                 stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
 900 }
 901
 902 // mallocs with size overflow checking
 903 static void *stbi__malloc_mad2(int a, int b, int add)
 904 {
 905         if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
 906         return stbi__malloc(a*b + add);
 907 }
 908
 909 static void *stbi__malloc_mad3(int a, int b, int c, int add)
 910 {
 911         if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
 912         return stbi__malloc(a*b*c + add);
 913 }
 914
 915 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
 916 {
 917         if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
 918         return stbi__malloc(a*b*c*d + add);
 919 }
 920
 921 // stbi__err - error
 922 // stbi__errpf - error returning pointer to float
 923 // stbi__errpuc - error returning pointer to unsigned char
 924
 925 #ifdef STBI_NO_FAILURE_STRINGS
 926 #define stbi__err(x,y)  0
 927 #elif defined(STBI_FAILURE_USERMSG)
 928 #define stbi__err(x,y)  stbi__err(y)
 929 #else
 930 #define stbi__err(x,y)  stbi__err(x)
 931 #endif
 932
 933 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
 934 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
 935
 936 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
 937 {
 938         STBI_FREE(retval_from_stbi_load);
 939 }
 940
 941 #ifndef STBI_NO_LINEAR
 942 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
 943 #endif
 944
 945 #ifndef STBI_NO_HDR
 946 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
 947 #endif
 948
 949 static int stbi__vertically_flip_on_load = 0;
 950
 951 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
 952 {
 953         stbi__vertically_flip_on_load = flag_true_if_should_flip;
 954 }
 955
 956 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
 957 {
 958         memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
 959         ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
 960         ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
 961         ri->num_channels = 0;
 962
 963 #ifndef STBI_NO_JPEG
 964         if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
 965 #endif
 966 #ifndef STBI_NO_PNG
 967         if (stbi__png_test(s))  return stbi__png_load(s, x, y, comp, req_comp, ri);
 968 #endif
 969 #ifndef STBI_NO_BMP
 970         if (stbi__bmp_test(s))  return stbi__bmp_load(s, x, y, comp, req_comp, ri);
 971 #endif
 972 #ifndef STBI_NO_GIF
 973         if (stbi__gif_test(s))  return stbi__gif_load(s, x, y, comp, req_comp, ri);
 974 #endif
 975 #ifndef STBI_NO_PSD
 976         if (stbi__psd_test(s))  return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
 977 #endif
 978 #ifndef STBI_NO_PIC
 979         if (stbi__pic_test(s))  return stbi__pic_load(s, x, y, comp, req_comp, ri);
 980 #endif
 981 #ifndef STBI_NO_PNM
 982         if (stbi__pnm_test(s))  return stbi__pnm_load(s, x, y, comp, req_comp, ri);
 983 #endif
 984
 985 #ifndef STBI_NO_HDR
 986         if (stbi__hdr_test(s)) {
 987                 float *hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
 988                 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
 989         }
 990 #endif
 991
 992 #ifndef STBI_NO_TGA
 993         // test tga last because it's a crappy test!
 994         if (stbi__tga_test(s))
 995                 return stbi__tga_load(s, x, y, comp, req_comp, ri);
 996 #endif
 997
 998         return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
 999 }
1000
1001 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1002 {
1003         int i;
1004         int img_len = w * h * channels;
1005         stbi_uc *reduced;
1006
1007         reduced = (stbi_uc *)stbi__malloc(img_len);
1008         if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1009
1010         for (i = 0; i < img_len; ++i)
1011                 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1012
1013         STBI_FREE(orig);
1014         return reduced;
1015 }
1016
1017 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1018 {
1019         int i;
1020         int img_len = w * h * channels;
1021         stbi__uint16 *enlarged;
1022
1023         enlarged = (stbi__uint16 *)stbi__malloc(img_len * 2);
1024         if (enlarged == NULL) return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
1025
1026         for (i = 0; i < img_len; ++i)
1027                 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1028
1029         STBI_FREE(orig);
1030         return enlarged;
1031 }
1032
1033 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1034 {
1035         int row;
1036         size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1037         stbi_uc temp[2048];
1038         stbi_uc *bytes = (stbi_uc *)image;
1039
1040         for (row = 0; row < (h >> 1); row++) {
1041                 stbi_uc *row0 = bytes + row*bytes_per_row;
1042                 stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1043                 // swap row0 with row1
1044                 size_t bytes_left = bytes_per_row;
1045                 while (bytes_left) {
1046                         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1047                         memcpy(temp, row0, bytes_copy);
1048                         memcpy(row0, row1, bytes_copy);
1049                         memcpy(row1, temp, bytes_copy);
1050                         row0 += bytes_copy;
1051                         row1 += bytes_copy;
1052                         bytes_left -= bytes_copy;
1053                 }
1054         }
1055 }
1056
1057 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1058 {
1059         stbi__result_info ri;
1060         void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1061
1062         if (result == NULL)
1063                 return NULL;
1064
1065         if (ri.bits_per_channel != 8) {
1066                 STBI_ASSERT(ri.bits_per_channel == 16);
1067                 result = stbi__convert_16_to_8((stbi__uint16 *)result, *x, *y, req_comp == 0 ? *comp : req_comp);
1068                 ri.bits_per_channel = 8;
1069         }
1070
1071         // @TODO: move stbi__convert_format to here
1072
1073         if (stbi__vertically_flip_on_load) {
1074                 int channels = req_comp ? req_comp : *comp;
1075                 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1076         }
1077
1078         return (unsigned char *)result;
1079 }
1080
1081 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1082 {
1083         stbi__result_info ri;
1084         void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1085
1086         if (result == NULL)
1087                 return NULL;
1088
1089         if (ri.bits_per_channel != 16) {
1090                 STBI_ASSERT(ri.bits_per_channel == 8);
1091                 result = stbi__convert_8_to_16((stbi_uc *)result, *x, *y, req_comp == 0 ? *comp : req_comp);
1092                 ri.bits_per_channel = 16;
1093         }
1094
1095         // @TODO: move stbi__convert_format16 to here
1096         // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1097
1098         if (stbi__vertically_flip_on_load) {
1099                 int channels = req_comp ? req_comp : *comp;
1100                 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1101         }
1102
1103         return (stbi__uint16 *)result;
1104 }
1105
1106 #ifndef STBI_NO_HDR
1107 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1108 {
1109         if (stbi__vertically_flip_on_load && result != NULL) {
1110                 int channels = req_comp ? req_comp : *comp;
1111                 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1112         }
1113 }
1114 #endif
1115
1116 #ifndef STBI_NO_STDIO
1117
1118 static FILE *stbi__fopen(char const *filename, char const *mode)
1119 {
1120         FILE *f;
1121 #if defined(_MSC_VER) && _MSC_VER >= 1400
1122         if (0 != fopen_s(&f, filename, mode))
1123                 f = 0;
1124 #else
1125         f = fopen(filename, mode);
1126 #endif
1127         return f;
1128 }
1129
1130
1131 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1132 {
1133         FILE *f = stbi__fopen(filename, "rb");
1134         unsigned char *result;
1135         if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1136         result = stbi_load_from_file(f, x, y, comp, req_comp);
1137         fclose(f);
1138         return result;
1139 }
1140
1141 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1142 {
1143         unsigned char *result;
1144         stbi__context s;
1145         stbi__start_file(&s, f);
1146         result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1147         if (result) {
1148                 // need to 'unget' all the characters in the IO buffer
1149                 fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1150         }
1151         return result;
1152 }
1153
1154 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1155 {
1156         stbi__uint16 *result;
1157         stbi__context s;
1158         stbi__start_file(&s, f);
1159         result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
1160         if (result) {
1161                 // need to 'unget' all the characters in the IO buffer
1162                 fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1163         }
1164         return result;
1165 }
1166
1167 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1168 {
1169         FILE *f = stbi__fopen(filename, "rb");
1170         stbi__uint16 *result;
1171         if (!f) return (stbi_us *)stbi__errpuc("can't fopen", "Unable to open file");
1172         result = stbi_load_from_file_16(f, x, y, comp, req_comp);
1173         fclose(f);
1174         return result;
1175 }
1176
1177
1178 #endif //!STBI_NO_STDIO
1179
1180 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1181 {
1182         stbi__context s;
1183         stbi__start_mem(&s, buffer, len);
1184         return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
1185 }
1186
1187 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1188 {
1189         stbi__context s;
1190         stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1191         return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
1192 }
1193
1194 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1195 {
1196         stbi__context s;
1197         stbi__start_mem(&s, buffer, len);
1198         return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1199 }
1200
1201 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1202 {
1203         stbi__context s;
1204         stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1205         return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1206 }
1207
1208 #ifndef STBI_NO_LINEAR
1209 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1210 {
1211         unsigned char *data;
1212 #ifndef STBI_NO_HDR
1213         if (stbi__hdr_test(s)) {
1214                 stbi__result_info ri;
1215                 float *hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
1216                 if (hdr_data)
1217                         stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
1218                 return hdr_data;
1219         }
1220 #endif
1221         data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1222         if (data)
1223                 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1224         return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1225 }
1226
1227 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1228 {
1229         stbi__context s;
1230         stbi__start_mem(&s, buffer, len);
1231         return stbi__loadf_main(&s, x, y, comp, req_comp);
1232 }
1233
1234 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1235 {
1236         stbi__context s;
1237         stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1238         return stbi__loadf_main(&s, x, y, comp, req_comp);
1239 }
1240
1241 #ifndef STBI_NO_STDIO
1242 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1243 {
1244         float *result;
1245         FILE *f = stbi__fopen(filename, "rb");
1246         if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1247         result = stbi_loadf_from_file(f, x, y, comp, req_comp);
1248         fclose(f);
1249         return result;
1250 }
1251
1252 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1253 {
1254         stbi__context s;
1255         stbi__start_file(&s, f);
1256         return stbi__loadf_main(&s, x, y, comp, req_comp);
1257 }
1258 #endif // !STBI_NO_STDIO
1259
1260 #endif // !STBI_NO_LINEAR
1261
1262 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1263 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1264 // reports false!
1265
1266 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1267 {
1268 #ifndef STBI_NO_HDR
1269         stbi__context s;
1270         stbi__start_mem(&s, buffer, len);
1271         return stbi__hdr_test(&s);
1272 #else
1273         STBI_NOTUSED(buffer);
1274         STBI_NOTUSED(len);
1275         return 0;
1276 #endif
1277 }
1278
1279 #ifndef STBI_NO_STDIO
1280 STBIDEF int      stbi_is_hdr(char const *filename)
1281 {
1282         FILE *f = stbi__fopen(filename, "rb");
1283         int result = 0;
1284         if (f) {
1285                 result = stbi_is_hdr_from_file(f);
1286                 fclose(f);
1287         }
1288         return result;
1289 }
1290
1291 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
1292 {
1293 #ifndef STBI_NO_HDR
1294         stbi__context s;
1295         stbi__start_file(&s, f);
1296         return stbi__hdr_test(&s);
1297 #else
1298         STBI_NOTUSED(f);
1299         return 0;
1300 #endif
1301 }
1302 #endif // !STBI_NO_STDIO
1303
1304 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1305 {
1306 #ifndef STBI_NO_HDR
1307         stbi__context s;
1308         stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1309         return stbi__hdr_test(&s);
1310 #else
1311         STBI_NOTUSED(clbk);
1312         STBI_NOTUSED(user);
1313         return 0;
1314 #endif
1315 }
1316
1317 #ifndef STBI_NO_LINEAR
1318 static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
1319
1320 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1321 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1322 #endif
1323
1324 static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
1325
1326 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }
1327 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }
1328
1329
1330 //////////////////////////////////////////////////////////////////////////////
1331 //
1332 // Common code used by all image loaders
1333 //
1334
1335 enum
1336 {
1337         STBI__SCAN_load = 0,
1338         STBI__SCAN_type,
1339         STBI__SCAN_header
1340 };
1341
1342 static void stbi__refill_buffer(stbi__context *s)
1343 {
1344         int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
1345         if (n == 0) {
1346                 // at end of file, treat same as if from memory, but need to handle case
1347                 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1348                 s->read_from_callbacks = 0;
1349                 s->img_buffer = s->buffer_start;
1350                 s->img_buffer_end = s->buffer_start + 1;
1351                 *s->img_buffer = 0;
1352         }
1353         else {
1354                 s->img_buffer = s->buffer_start;
1355                 s->img_buffer_end = s->buffer_start + n;
1356         }
1357 }
1358
1359 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1360 {
1361         if (s->img_buffer < s->img_buffer_end)
1362                 return *s->img_buffer++;
1363         if (s->read_from_callbacks) {
1364                 stbi__refill_buffer(s);
1365                 return *s->img_buffer++;
1366         }
1367         return 0;
1368 }
1369
1370 stbi_inline static int stbi__at_eof(stbi__context *s)
1371 {
1372         if (s->io.read) {
1373                 if (!(s->io.eof)(s->io_user_data)) return 0;
1374                 // if feof() is true, check if buffer = end
1375                 // special case: we've only got the special 0 character at the end
1376                 if (s->read_from_callbacks == 0) return 1;
1377         }
1378
1379         return s->img_buffer >= s->img_buffer_end;
1380 }
1381
1382 static void stbi__skip(stbi__context *s, int n)
1383 {
1384         if (n < 0) {
1385                 s->img_buffer = s->img_buffer_end;
1386                 return;
1387         }
1388         if (s->io.read) {
1389                 int blen = (int)(s->img_buffer_end - s->img_buffer);
1390                 if (blen < n) {
1391                         s->img_buffer = s->img_buffer_end;
1392                         (s->io.skip)(s->io_user_data, n - blen);
1393                         return;
1394                 }
1395         }
1396         s->img_buffer += n;
1397 }
1398
1399 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1400 {
1401         if (s->io.read) {
1402                 int blen = (int)(s->img_buffer_end - s->img_buffer);
1403                 if (blen < n) {
1404                         int res, count;
1405
1406                         memcpy(buffer, s->img_buffer, blen);
1407
1408                         count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
1409                         res = (count == (n - blen));
1410                         s->img_buffer = s->img_buffer_end;
1411                         return res;
1412                 }
1413         }
1414
1415         if (s->img_buffer + n <= s->img_buffer_end) {
1416                 memcpy(buffer, s->img_buffer, n);
1417                 s->img_buffer += n;
1418                 return 1;
1419         }
1420         else
1421                 return 0;
1422 }
1423
1424 static int stbi__get16be(stbi__context *s)
1425 {
1426         int z = stbi__get8(s);
1427         return (z << 8) + stbi__get8(s);
1428 }
1429
1430 static stbi__uint32 stbi__get32be(stbi__context *s)
1431 {
1432         stbi__uint32 z = stbi__get16be(s);
1433         return (z << 16) + stbi__get16be(s);
1434 }
1435
1436 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1437 // nothing
1438 #else
1439 static int stbi__get16le(stbi__context *s)
1440 {
1441         int z = stbi__get8(s);
1442         return z + (stbi__get8(s) << 8);
1443 }
1444 #endif
1445
1446 #ifndef STBI_NO_BMP
1447 static stbi__uint32 stbi__get32le(stbi__context *s)
1448 {
1449         stbi__uint32 z = stbi__get16le(s);
1450         return z + (stbi__get16le(s) << 16);
1451 }
1452 #endif
1453
1454 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1455
1456
1457 //////////////////////////////////////////////////////////////////////////////
1458 //
1459 //  generic converter from built-in img_n to req_comp
1460 //    individual types do this automatically as much as possible (e.g. jpeg
1461 //    does all cases internally since it needs to colorspace convert anyway,
1462 //    and it never has alpha, so very few cases ). png can automatically
1463 //    interleave an alpha=255 channel, but falls back to this for other cases
1464 //
1465 //  assume data buffer is malloced, so malloc a new one and free that one
1466 //  only failure mode is malloc failing
1467
1468 static stbi_uc stbi__compute_y(int r, int g, int b)
1469 {
1470         return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
1471 }
1472
1473 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1474 {
1475         int i, j;
1476         unsigned char *good;
1477
1478         if (req_comp == img_n) return data;
1479         STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1480
1481         good = (unsigned char *)stbi__malloc_mad3(req_comp, x, y, 0);
1482         if (good == NULL) {
1483                 STBI_FREE(data);
1484                 return stbi__errpuc("outofmem", "Out of memory");
1485         }
1486
1487         for (j = 0; j < (int)y; ++j) {
1488                 unsigned char *src = data + j * x * img_n;
1489                 unsigned char *dest = good + j * x * req_comp;
1490
1491 #define STBI__COMBO(a,b)  ((a)*8+(b))
1492 #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1493                 // convert source image with img_n components to one with req_comp components;
1494                 // avoid switch per pixel, so use switch per scanline and massive macros
1495                 switch (STBI__COMBO(img_n, req_comp)) {
1496                         STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 255; } break;
1497                         STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
1498                         STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255; } break;
1499                         STBI__CASE(2, 1) { dest[0] = src[0]; } break;
1500                         STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
1501                         STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
1502                         STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255; } break;
1503                         STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
1504                         STBI__CASE(3, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = 255; } break;
1505                         STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
1506                         STBI__CASE(4, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = src[3]; } break;
1507                         STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
1508                 default: STBI_ASSERT(0);
1509                 }
1510 #undef STBI__CASE
1511         }
1512
1513         STBI_FREE(data);
1514         return good;
1515 }
1516
1517 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1518 {
1519         return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
1520 }
1521
1522 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1523 {
1524         int i, j;
1525         stbi__uint16 *good;
1526
1527         if (req_comp == img_n) return data;
1528         STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1529
1530         good = (stbi__uint16 *)stbi__malloc(req_comp * x * y * 2);
1531         if (good == NULL) {
1532                 STBI_FREE(data);
1533                 return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
1534         }
1535
1536         for (j = 0; j < (int)y; ++j) {
1537                 stbi__uint16 *src = data + j * x * img_n;
1538                 stbi__uint16 *dest = good + j * x * req_comp;
1539
1540 #define STBI__COMBO(a,b)  ((a)*8+(b))
1541 #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1542                 // convert source image with img_n components to one with req_comp components;
1543                 // avoid switch per pixel, so use switch per scanline and massive macros
1544                 switch (STBI__COMBO(img_n, req_comp)) {
1545                         STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 0xffff; } break;
1546                         STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
1547                         STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 0xffff; } break;
1548                         STBI__CASE(2, 1) { dest[0] = src[0]; } break;
1549                         STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
1550                         STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
1551                         STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 0xffff; } break;
1552                         STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
1553                         STBI__CASE(3, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = 0xffff; } break;
1554                         STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
1555                         STBI__CASE(4, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = src[3]; } break;
1556                         STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
1557                 default: STBI_ASSERT(0);
1558                 }
1559 #undef STBI__CASE
1560         }
1561
1562         STBI_FREE(data);
1563         return good;
1564 }
1565
1566 #ifndef STBI_NO_LINEAR
1567 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1568 {
1569         int i, k, n;
1570         float *output;
1571         if (!data) return NULL;
1572         output = (float *)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1573         if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1574         // compute number of non-alpha components
1575         if (comp & 1) n = comp; else n = comp - 1;
1576         for (i = 0; i < x*y; ++i) {
1577                 for (k = 0; k < n; ++k) {
1578                         output[i*comp + k] = (float)(pow(data[i*comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1579                 }
1580                 if (k < comp) output[i*comp + k] = data[i*comp + k] / 255.0f;
1581         }
1582         STBI_FREE(data);
1583         return output;
1584 }
1585 #endif
1586
1587 #ifndef STBI_NO_HDR
1588 #define stbi__float2int(x)   ((int) (x))
1589 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1590 {
1591         int i, k, n;
1592         stbi_uc *output;
1593         if (!data) return NULL;
1594         output = (stbi_uc *)stbi__malloc_mad3(x, y, comp, 0);
1595         if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1596         // compute number of non-alpha components
1597         if (comp & 1) n = comp; else n = comp - 1;
1598         for (i = 0; i < x*y; ++i) {
1599                 for (k = 0; k < n; ++k) {
1600                         float z = (float)pow(data[i*comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1601                         if (z < 0) z = 0;
1602                         if (z > 255) z = 255;
1603                         output[i*comp + k] = (stbi_uc)stbi__float2int(z);
1604                 }
1605                 if (k < comp) {
1606                         float z = data[i*comp + k] * 255 + 0.5f;
1607                         if (z < 0) z = 0;
1608                         if (z > 255) z = 255;
1609                         output[i*comp + k] = (stbi_uc)stbi__float2int(z);
1610                 }
1611         }
1612         STBI_FREE(data);
1613         return output;
1614 }
1615 #endif
1616
1617 //////////////////////////////////////////////////////////////////////////////
1618 //
1619 //  "baseline" JPEG/JFIF decoder
1620 //
1621 //    simple implementation
1622 //      - doesn't support delayed output of y-dimension
1623 //      - simple interface (only one output format: 8-bit interleaved RGB)
1624 //      - doesn't try to recover corrupt jpegs
1625 //      - doesn't allow partial loading, loading multiple at once
1626 //      - still fast on x86 (copying globals into locals doesn't help x86)
1627 //      - allocates lots of intermediate memory (full size of all components)
1628 //        - non-interleaved case requires this anyway
1629 //        - allows good upsampling (see next)
1630 //    high-quality
1631 //      - upsampled channels are bilinearly interpolated, even across blocks
1632 //      - quality integer IDCT derived from IJG's 'slow'
1633 //    performance
1634 //      - fast huffman; reasonable integer IDCT
1635 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1636 //      - uses a lot of intermediate memory, could cache poorly
1637
1638 #ifndef STBI_NO_JPEG
1639
1640 // huffman decoding acceleration
1641 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1642
1643 typedef struct
1644 {
1645         stbi_uc  fast[1 << FAST_BITS];
1646         // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1647         stbi__uint16 code[256];
1648         stbi_uc  values[256];
1649         stbi_uc  size[257];
1650         unsigned int maxcode[18];
1651         int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1652 } stbi__huffman;
1653
1654 typedef struct
1655 {
1656         stbi__context *s;
1657         stbi__huffman huff_dc[4];
1658         stbi__huffman huff_ac[4];
1659         stbi__uint16 dequant[4][64];
1660         stbi__int16 fast_ac[4][1 << FAST_BITS];
1661
1662         // sizes for components, interleaved MCUs
1663         int img_h_max, img_v_max;
1664         int img_mcu_x, img_mcu_y;
1665         int img_mcu_w, img_mcu_h;
1666
1667         // definition of jpeg image component
1668         struct
1669         {
1670                 int id;
1671                 int h, v;
1672                 int tq;
1673                 int hd, ha;
1674                 int dc_pred;
1675
1676                 int x, y, w2, h2;
1677                 stbi_uc *data;
1678                 void *raw_data, *raw_coeff;
1679                 stbi_uc *linebuf;
1680                 short   *coeff;   // progressive only
1681                 int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1682         } img_comp[4];
1683
1684         stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1685         int            code_bits;   // number of valid bits
1686         unsigned char  marker;      // marker seen while filling entropy buffer
1687         int            nomore;      // flag if we saw a marker so must stop
1688
1689         int            progressive;
1690         int            spec_start;
1691         int            spec_end;
1692         int            succ_high;
1693         int            succ_low;
1694         int            eob_run;
1695         int            jfif;
1696         int            app14_color_transform; // Adobe APP14 tag
1697         int            rgb;
1698
1699         int scan_n, order[4];
1700         int restart_interval, todo;
1701
1702         // kernels
1703         void(*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1704         void(*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1705         stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1706 } stbi__jpeg;
1707
1708 static int stbi__build_huffman(stbi__huffman *h, int *count)
1709 {
1710         int i, j, k = 0, code;
1711         // build size list for each symbol (from JPEG spec)
1712         for (i = 0; i < 16; ++i)
1713                 for (j = 0; j < count[i]; ++j)
1714                         h->size[k++] = (stbi_uc)(i + 1);
1715         h->size[k] = 0;
1716
1717         // compute actual symbols (from jpeg spec)
1718         code = 0;
1719         k = 0;
1720         for (j = 1; j <= 16; ++j) {
1721                 // compute delta to add to code to compute symbol id
1722                 h->delta[j] = k - code;
1723                 if (h->size[k] == j) {
1724                         while (h->size[k] == j)
1725                                 h->code[k++] = (stbi__uint16)(code++);
1726                         if (code - 1 >= (1 << j)) return stbi__err("bad code lengths", "Corrupt JPEG");
1727                 }
1728                 // compute largest code + 1 for this size, preshifted as needed later
1729                 h->maxcode[j] = code << (16 - j);
1730                 code <<= 1;
1731         }
1732         h->maxcode[j] = 0xffffffff;
1733
1734         // build non-spec acceleration table; 255 is flag for not-accelerated
1735         memset(h->fast, 255, 1 << FAST_BITS);
1736         for (i = 0; i < k; ++i) {
1737                 int s = h->size[i];
1738                 if (s <= FAST_BITS) {
1739                         int c = h->code[i] << (FAST_BITS - s);
1740                         int m = 1 << (FAST_BITS - s);
1741                         for (j = 0; j < m; ++j) {
1742                                 h->fast[c + j] = (stbi_uc)i;
1743                         }
1744                 }
1745         }
1746         return 1;
1747 }
1748
1749 // build a table that decodes both magnitude and value of small ACs in
1750 // one go.
1751 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1752 {
1753         int i;
1754         for (i = 0; i < (1 << FAST_BITS); ++i) {
1755                 stbi_uc fast = h->fast[i];
1756                 fast_ac[i] = 0;
1757                 if (fast < 255) {
1758                         int rs = h->values[fast];
1759                         int run = (rs >> 4) & 15;
1760                         int magbits = rs & 15;
1761                         int len = h->size[fast];
1762
1763                         if (magbits && len + magbits <= FAST_BITS) {
1764                                 // magnitude code followed by receive_extend code
1765                                 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1766                                 int m = 1 << (magbits - 1);
1767                                 if (k < m) k += (~0U << magbits) + 1;
1768                                 // if the result is small enough, we can fit it in fast_ac table
1769                                 if (k >= -128 && k <= 127)
1770                                         fast_ac[i] = (stbi__int16)((k << 8) + (run << 4) + (len + magbits));
1771                         }
1772                 }
1773         }
1774 }
1775
1776 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1777 {
1778         do {
1779                 int b = j->nomore ? 0 : stbi__get8(j->s);
1780                 if (b == 0xff) {
1781                         int c = stbi__get8(j->s);
1782                         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1783                         if (c != 0) {
1784                                 j->marker = (unsigned char)c;
1785                                 j->nomore = 1;
1786                                 return;
1787                         }
1788                 }
1789                 j->code_buffer |= b << (24 - j->code_bits);
1790                 j->code_bits += 8;
1791         } while (j->code_bits <= 24);
1792 }
1793
1794 // (1 << n) - 1
1795 static stbi__uint32 stbi__bmask[17] = { 0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535 };
1796
1797 // decode a jpeg huffman value from the bitstream
1798 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1799 {
1800         unsigned int temp;
1801         int c, k;
1802
1803         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1804
1805         // look at the top FAST_BITS and determine what symbol ID it is,
1806         // if the code is <= FAST_BITS
1807         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
1808         k = h->fast[c];
1809         if (k < 255) {
1810                 int s = h->size[k];
1811                 if (s > j->code_bits)
1812                         return -1;
1813                 j->code_buffer <<= s;
1814                 j->code_bits -= s;
1815                 return h->values[k];
1816         }
1817
1818         // naive test is to shift the code_buffer down so k bits are
1819         // valid, then test against maxcode. To speed this up, we've
1820         // preshifted maxcode left so that it has (16-k) 0s at the
1821         // end; in other words, regardless of the number of bits, it
1822         // wants to be compared against something shifted to have 16;
1823         // that way we don't need to shift inside the loop.
1824         temp = j->code_buffer >> 16;
1825         for (k = FAST_BITS + 1; ; ++k)
1826                 if (temp < h->maxcode[k])
1827                         break;
1828         if (k == 17) {
1829                 // error! code not found
1830                 j->code_bits -= 16;
1831                 return -1;
1832         }
1833
1834         if (k > j->code_bits)
1835                 return -1;
1836
1837         // convert the huffman code to the symbol id
1838         c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1839         STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1840
1841         // convert the id to a symbol
1842         j->code_bits -= k;
1843         j->code_buffer <<= k;
1844         return h->values[c];
1845 }
1846
1847 // bias[n] = (-1<<n) + 1
1848 static int const stbi__jbias[16] = { 0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767 };
1849
1850 // combined JPEG 'receive' and JPEG 'extend', since baseline
1851 // always extends everything it receives.
1852 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1853 {
1854         unsigned int k;
1855         int sgn;
1856         if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1857
1858         sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1859         k = stbi_lrot(j->code_buffer, n);
1860         STBI_ASSERT(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));
1861         j->code_buffer = k & ~stbi__bmask[n];
1862         k &= stbi__bmask[n];
1863         j->code_bits -= n;
1864         return k + (stbi__jbias[n] & ~sgn);
1865 }
1866
1867 // get some unsigned bits
1868 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1869 {
1870         unsigned int k;
1871         if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1872         k = stbi_lrot(j->code_buffer, n);
1873         j->code_buffer = k & ~stbi__bmask[n];
1874         k &= stbi__bmask[n];
1875         j->code_bits -= n;
1876         return k;
1877 }
1878
1879 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1880 {
1881         unsigned int k;
1882         if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1883         k = j->code_buffer;
1884         j->code_buffer <<= 1;
1885         --j->code_bits;
1886         return k & 0x80000000;
1887 }
1888
1889 // given a value that's at position X in the zigzag stream,
1890 // where does it appear in the 8x8 matrix coded as row-major?
1891 static stbi_uc stbi__jpeg_dezigzag[64 + 15] =
1892 {
1893         0,  1,  8, 16,  9,  2,  3, 10,
1894         17, 24, 32, 25, 18, 11,  4,  5,
1895         12, 19, 26, 33, 40, 48, 41, 34,
1896         27, 20, 13,  6,  7, 14, 21, 28,
1897         35, 42, 49, 56, 57, 50, 43, 36,
1898         29, 22, 15, 23, 30, 37, 44, 51,
1899         58, 59, 52, 45, 38, 31, 39, 46,
1900         53, 60, 61, 54, 47, 55, 62, 63,
1901         // let corrupt input sample past end
1902         63, 63, 63, 63, 63, 63, 63, 63,
1903         63, 63, 63, 63, 63, 63, 63
1904 };
1905
1906 // decode one 64-entry block--
1907 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1908 {
1909         int diff, dc, k;
1910         int t;
1911
1912         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1913         t = stbi__jpeg_huff_decode(j, hdc);
1914         if (t < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1915
1916         // 0 all the ac values now so we can do it 32-bits at a time
1917         memset(data, 0, 64 * sizeof(data[0]));
1918
1919         diff = t ? stbi__extend_receive(j, t) : 0;
1920         dc = j->img_comp[b].dc_pred + diff;
1921         j->img_comp[b].dc_pred = dc;
1922         data[0] = (short)(dc * dequant[0]);
1923
1924         // decode AC components, see JPEG spec
1925         k = 1;
1926         do {
1927                 unsigned int zig;
1928                 int c, r, s;
1929                 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1930                 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
1931                 r = fac[c];
1932                 if (r) { // fast-AC path
1933                         k += (r >> 4) & 15; // run
1934                         s = r & 15; // combined length
1935                         j->code_buffer <<= s;
1936                         j->code_bits -= s;
1937                         // decode into unzigzag'd location
1938                         zig = stbi__jpeg_dezigzag[k++];
1939                         data[zig] = (short)((r >> 8) * dequant[zig]);
1940                 }
1941                 else {
1942                         int rs = stbi__jpeg_huff_decode(j, hac);
1943                         if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1944                         s = rs & 15;
1945                         r = rs >> 4;
1946                         if (s == 0) {
1947                                 if (rs != 0xf0) break; // end block
1948                                 k += 16;
1949                         }
1950                         else {
1951                                 k += r;
1952                                 // decode into unzigzag'd location
1953                                 zig = stbi__jpeg_dezigzag[k++];
1954                                 data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
1955                         }
1956                 }
1957         } while (k < 64);
1958         return 1;
1959 }
1960
1961 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1962 {
1963         int diff, dc;
1964         int t;
1965         if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1966
1967         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1968
1969         if (j->succ_high == 0) {
1970                 // first scan for DC coefficient, must be first
1971                 memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
1972                 t = stbi__jpeg_huff_decode(j, hdc);
1973                 diff = t ? stbi__extend_receive(j, t) : 0;
1974
1975                 dc = j->img_comp[b].dc_pred + diff;
1976                 j->img_comp[b].dc_pred = dc;
1977                 data[0] = (short)(dc << j->succ_low);
1978         }
1979         else {
1980                 // refinement scan for DC coefficient
1981                 if (stbi__jpeg_get_bit(j))
1982                         data[0] += (short)(1 << j->succ_low);
1983         }
1984         return 1;
1985 }
1986
1987 // @OPTIMIZE: store non-zigzagged during the decode passes,
1988 // and only de-zigzag when dequantizing
1989 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1990 {
1991         int k;
1992         if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1993
1994         if (j->succ_high == 0) {
1995                 int shift = j->succ_low;
1996
1997                 if (j->eob_run) {
1998                         --j->eob_run;
1999                         return 1;
2000                 }
2001
2002                 k = j->spec_start;
2003                 do {
2004                         unsigned int zig;
2005                         int c, r, s;
2006                         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2007                         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
2008                         r = fac[c];
2009                         if (r) { // fast-AC path
2010                                 k += (r >> 4) & 15; // run
2011                                 s = r & 15; // combined length
2012                                 j->code_buffer <<= s;
2013                                 j->code_bits -= s;
2014                                 zig = stbi__jpeg_dezigzag[k++];
2015                                 data[zig] = (short)((r >> 8) << shift);
2016                         }
2017                         else {
2018                                 int rs = stbi__jpeg_huff_decode(j, hac);
2019                                 if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
2020                                 s = rs & 15;
2021                                 r = rs >> 4;
2022                                 if (s == 0) {
2023                                         if (r < 15) {
2024                                                 j->eob_run = (1 << r);
2025                                                 if (r)
2026                                                         j->eob_run += stbi__jpeg_get_bits(j, r);
2027                                                 --j->eob_run;
2028                                                 break;
2029                                         }
2030                                         k += 16;
2031                                 }
2032                                 else {
2033                                         k += r;
2034                                         zig = stbi__jpeg_dezigzag[k++];
2035                                         data[zig] = (short)(stbi__extend_receive(j, s) << shift);
2036                                 }
2037                         }
2038                 } while (k <= j->spec_end);
2039         }
2040         else {
2041                 // refinement scan for these AC coefficients
2042
2043                 short bit = (short)(1 << j->succ_low);
2044
2045                 if (j->eob_run) {
2046                         --j->eob_run;
2047                         for (k = j->spec_start; k <= j->spec_end; ++k) {
2048                                 short *p = &data[stbi__jpeg_dezigzag[k]];
2049                                 if (*p != 0)
2050                                         if (stbi__jpeg_get_bit(j))
2051                                                 if ((*p & bit) == 0) {
2052                                                         if (*p > 0)
2053                                                                 *p += bit;
2054                                                         else
2055                                                                 *p -= bit;
2056                                                 }
2057                         }
2058                 }
2059                 else {
2060                         k = j->spec_start;
2061                         do {
2062                                 int r, s;
2063                                 int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2064                                 if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
2065                                 s = rs & 15;
2066                                 r = rs >> 4;
2067                                 if (s == 0) {
2068                                         if (r < 15) {
2069                                                 j->eob_run = (1 << r) - 1;
2070                                                 if (r)
2071                                                         j->eob_run += stbi__jpeg_get_bits(j, r);
2072                                                 r = 64; // force end of block
2073                                         }
2074                                         else {
2075                                                 // r=15 s=0 should write 16 0s, so we just do
2076                                                 // a run of 15 0s and then write s (which is 0),
2077                                                 // so we don't have to do anything special here
2078                                         }
2079                                 }
2080                                 else {
2081                                         if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2082                                         // sign bit
2083                                         if (stbi__jpeg_get_bit(j))
2084                                                 s = bit;
2085                                         else
2086                                                 s = -bit;
2087                                 }
2088
2089                                 // advance by r
2090                                 while (k <= j->spec_end) {
2091                                         short *p = &data[stbi__jpeg_dezigzag[k++]];
2092                                         if (*p != 0) {
2093                                                 if (stbi__jpeg_get_bit(j))
2094                                                         if ((*p & bit) == 0) {
2095                                                                 if (*p > 0)
2096                                                                         *p += bit;
2097                                                                 else
2098                                                                         *p -= bit;
2099                                                         }
2100                                         }
2101                                         else {
2102                                                 if (r == 0) {
2103                                                         *p = (short)s;
2104                                                         break;
2105                                                 }
2106                                                 --r;
2107                                         }
2108                                 }
2109                         } while (k <= j->spec_end);
2110                 }
2111         }
2112         return 1;
2113 }
2114
2115 // take a -128..127 value and stbi__clamp it and convert to 0..255
2116 stbi_inline static stbi_uc stbi__clamp(int x)
2117 {
2118         // trick to use a single test to catch both cases
2119         if ((unsigned int)x > 255) {
2120                 if (x < 0) return 0;
2121                 if (x > 255) return 255;
2122         }
2123         return (stbi_uc)x;
2124 }
2125
2126 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2127 #define stbi__fsh(x)  ((x) << 12)
2128
2129 // derived from jidctint -- DCT_ISLOW
2130 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2131    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2132    p2 = s2;                                    \
2133    p3 = s6;                                    \
2134    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2135    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2136    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2137    p2 = s0;                                    \
2138    p3 = s4;                                    \
2139    t0 = stbi__fsh(p2+p3);                      \
2140    t1 = stbi__fsh(p2-p3);                      \
2141    x0 = t0+t3;                                 \
2142    x3 = t0-t3;                                 \
2143    x1 = t1+t2;                                 \
2144    x2 = t1-t2;                                 \
2145    t0 = s7;                                    \
2146    t1 = s5;                                    \
2147    t2 = s3;                                    \
2148    t3 = s1;                                    \
2149    p3 = t0+t2;                                 \
2150    p4 = t1+t3;                                 \
2151    p1 = t0+t3;                                 \
2152    p2 = t1+t2;                                 \
2153    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2154    t0 = t0*stbi__f2f( 0.298631336f);           \
2155    t1 = t1*stbi__f2f( 2.053119869f);           \
2156    t2 = t2*stbi__f2f( 3.072711026f);           \
2157    t3 = t3*stbi__f2f( 1.501321110f);           \
2158    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2159    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2160    p3 = p3*stbi__f2f(-1.961570560f);           \
2161    p4 = p4*stbi__f2f(-0.390180644f);           \
2162    t3 += p1+p4;                                \
2163    t2 += p2+p3;                                \
2164    t1 += p2+p4;                                \
2165    t0 += p1+p3;
2166
2167 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2168 {
2169         int i, val[64], *v = val;
2170         stbi_uc *o;
2171         short *d = data;
2172
2173         // columns
2174         for (i = 0; i < 8; ++i, ++d, ++v) {
2175                 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2176                 if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0
2177                         && d[40] == 0 && d[48] == 0 && d[56] == 0) {
2178                         //    no shortcut                 0     seconds
2179                         //    (1|2|3|4|5|6|7)==0          0     seconds
2180                         //    all separate               -0.047 seconds
2181                         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2182                         int dcterm = d[0] << 2;
2183                         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2184                 }
2185                 else {
2186                         STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
2187                                 // constants scaled things up by 1<<12; let's bring them back
2188                                 // down, but keep 2 extra bits of precision
2189                                 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2190                         v[0] = (x0 + t3) >> 10;
2191                         v[56] = (x0 - t3) >> 10;
2192                         v[8] = (x1 + t2) >> 10;
2193                         v[48] = (x1 - t2) >> 10;
2194                         v[16] = (x2 + t1) >> 10;
2195                         v[40] = (x2 - t1) >> 10;
2196                         v[24] = (x3 + t0) >> 10;
2197                         v[32] = (x3 - t0) >> 10;
2198                 }
2199         }
2200
2201         for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
2202                 // no fast case since the first 1D IDCT spread components out
2203                 STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
2204                         // constants scaled things up by 1<<12, plus we had 1<<2 from first
2205                         // loop, plus horizontal and vertical each scale by sqrt(8) so together
2206                         // we've got an extra 1<<3, so 1<<17 total we need to remove.
2207                         // so we want to round that, which means adding 0.5 * 1<<17,
2208                         // aka 65536. Also, we'll end up with -128 to 127 that we want
2209                         // to encode as 0..255 by adding 128, so we'll add that before the shift
2210                         x0 += 65536 + (128 << 17);
2211                 x1 += 65536 + (128 << 17);
2212                 x2 += 65536 + (128 << 17);
2213                 x3 += 65536 + (128 << 17);
2214                 // tried computing the shifts into temps, or'ing the temps to see
2215                 // if any were out of range, but that was slower
2216                 o[0] = stbi__clamp((x0 + t3) >> 17);
2217                 o[7] = stbi__clamp((x0 - t3) >> 17);
2218                 o[1] = stbi__clamp((x1 + t2) >> 17);
2219                 o[6] = stbi__clamp((x1 - t2) >> 17);
2220                 o[2] = stbi__clamp((x2 + t1) >> 17);
2221                 o[5] = stbi__clamp((x2 - t1) >> 17);
2222                 o[3] = stbi__clamp((x3 + t0) >> 17);
2223                 o[4] = stbi__clamp((x3 - t0) >> 17);
2224         }
2225 }
2226
2227 #ifdef STBI_SSE2
2228 // sse2 integer IDCT. not the fastest possible implementation but it
2229 // produces bit-identical results to the generic C version so it's
2230 // fully "transparent".
2231 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2232 {
2233         // This is constructed to match our regular (generic) integer IDCT exactly.
2234         __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2235         __m128i tmp;
2236
2237         // dot product constant: even elems=x, odd elems=y
2238 #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2239
2240         // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2241         // out(1) = c1[even]*x + c1[odd]*y
2242 #define dct_rot(out0,out1, x,y,c0,c1) \
2243       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2244       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2245       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2246       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2247       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2248       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2249
2250         // out = in << 12  (in 16-bit, out 32-bit)
2251 #define dct_widen(out, in) \
2252       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2253       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2254
2255         // wide add
2256 #define dct_wadd(out, a, b) \
2257       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2258       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2259
2260         // wide sub
2261 #define dct_wsub(out, a, b) \
2262       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2263       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2264
2265         // butterfly a/b, add bias, then shift by "s" and pack
2266 #define dct_bfly32o(out0, out1, a,b,bias,s) \
2267       { \
2268          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2269          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2270          dct_wadd(sum, abiased, b); \
2271          dct_wsub(dif, abiased, b); \
2272          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2273          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2274       }
2275
2276         // 8-bit interleave step (for transposes)
2277 #define dct_interleave8(a, b) \
2278       tmp = a; \
2279       a = _mm_unpacklo_epi8(a, b); \
2280       b = _mm_unpackhi_epi8(tmp, b)
2281
2282         // 16-bit interleave step (for transposes)
2283 #define dct_interleave16(a, b) \
2284       tmp = a; \
2285       a = _mm_unpacklo_epi16(a, b); \
2286       b = _mm_unpackhi_epi16(tmp, b)
2287
2288 #define dct_pass(bias,shift) \
2289       { \
2290          /* even part */ \
2291          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2292          __m128i sum04 = _mm_add_epi16(row0, row4); \
2293          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2294          dct_widen(t0e, sum04); \
2295          dct_widen(t1e, dif04); \
2296          dct_wadd(x0, t0e, t3e); \
2297          dct_wsub(x3, t0e, t3e); \
2298          dct_wadd(x1, t1e, t2e); \
2299          dct_wsub(x2, t1e, t2e); \
2300          /* odd part */ \
2301          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2302          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2303          __m128i sum17 = _mm_add_epi16(row1, row7); \
2304          __m128i sum35 = _mm_add_epi16(row3, row5); \
2305          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2306          dct_wadd(x4, y0o, y4o); \
2307          dct_wadd(x5, y1o, y5o); \
2308          dct_wadd(x6, y2o, y5o); \
2309          dct_wadd(x7, y3o, y4o); \
2310          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2311          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2312          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2313          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2314       }
2315
2316         __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2317         __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
2318         __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2319         __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2320         __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
2321         __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
2322         __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
2323         __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
2324
2325         // rounding biases in column/row passes, see stbi__idct_block for explanation.
2326         __m128i bias_0 = _mm_set1_epi32(512);
2327         __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
2328
2329         // load
2330         row0 = _mm_load_si128((const __m128i *) (data + 0 * 8));
2331         row1 = _mm_load_si128((const __m128i *) (data + 1 * 8));
2332         row2 = _mm_load_si128((const __m128i *) (data + 2 * 8));
2333         row3 = _mm_load_si128((const __m128i *) (data + 3 * 8));
2334         row4 = _mm_load_si128((const __m128i *) (data + 4 * 8));
2335         row5 = _mm_load_si128((const __m128i *) (data + 5 * 8));
2336         row6 = _mm_load_si128((const __m128i *) (data + 6 * 8));
2337         row7 = _mm_load_si128((const __m128i *) (data + 7 * 8));
2338
2339         // column pass
2340         dct_pass(bias_0, 10);
2341
2342         {
2343                 // 16bit 8x8 transpose pass 1
2344                 dct_interleave16(row0, row4);
2345                 dct_interleave16(row1, row5);
2346                 dct_interleave16(row2, row6);
2347                 dct_interleave16(row3, row7);
2348
2349                 // transpose pass 2
2350                 dct_interleave16(row0, row2);
2351                 dct_interleave16(row1, row3);
2352                 dct_interleave16(row4, row6);
2353                 dct_interleave16(row5, row7);
2354
2355                 // transpose pass 3
2356                 dct_interleave16(row0, row1);
2357                 dct_interleave16(row2, row3);
2358                 dct_interleave16(row4, row5);
2359                 dct_interleave16(row6, row7);
2360         }
2361
2362         // row pass
2363         dct_pass(bias_1, 17);
2364
2365         {
2366                 // pack
2367                 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2368                 __m128i p1 = _mm_packus_epi16(row2, row3);
2369                 __m128i p2 = _mm_packus_epi16(row4, row5);
2370                 __m128i p3 = _mm_packus_epi16(row6, row7);
2371
2372                 // 8bit 8x8 transpose pass 1
2373                 dct_interleave8(p0, p2); // a0e0a1e1...
2374                 dct_interleave8(p1, p3); // c0g0c1g1...
2375
2376                                                                  // transpose pass 2
2377                 dct_interleave8(p0, p1); // a0c0e0g0...
2378                 dct_interleave8(p2, p3); // b0d0f0h0...
2379
2380                                                                  // transpose pass 3
2381                 dct_interleave8(p0, p2); // a0b0c0d0...
2382                 dct_interleave8(p1, p3); // a4b4c4d4...
2383
2384                                                                  // store
2385                 _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2386                 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2387                 _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2388                 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2389                 _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2390                 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2391                 _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2392                 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2393         }
2394
2395 #undef dct_const
2396 #undef dct_rot
2397 #undef dct_widen
2398 #undef dct_wadd
2399 #undef dct_wsub
2400 #undef dct_bfly32o
2401 #undef dct_interleave8
2402 #undef dct_interleave16
2403 #undef dct_pass
2404 }
2405
2406 #endif // STBI_SSE2
2407
2408 #ifdef STBI_NEON
2409
2410 // NEON integer IDCT. should produce bit-identical
2411 // results to the generic C version.
2412 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2413 {
2414         int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2415
2416         int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2417         int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2418         int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
2419         int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
2420         int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2421         int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2422         int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2423         int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2424         int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
2425         int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
2426         int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
2427         int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
2428
2429 #define dct_long_mul(out, inq, coeff) \
2430    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2431    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2432
2433 #define dct_long_mac(out, acc, inq, coeff) \
2434    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2435    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2436
2437 #define dct_widen(out, inq) \
2438    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2439    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2440
2441         // wide add
2442 #define dct_wadd(out, a, b) \
2443    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2444    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2445
2446         // wide sub
2447 #define dct_wsub(out, a, b) \
2448    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2449    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2450
2451         // butterfly a/b, then shift using "shiftop" by "s" and pack
2452 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2453    { \
2454       dct_wadd(sum, a, b); \
2455       dct_wsub(dif, a, b); \
2456       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2457       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2458    }
2459
2460 #define dct_pass(shiftop, shift) \
2461    { \
2462       /* even part */ \
2463       int16x8_t sum26 = vaddq_s16(row2, row6); \
2464       dct_long_mul(p1e, sum26, rot0_0); \
2465       dct_long_mac(t2e, p1e, row6, rot0_1); \
2466       dct_long_mac(t3e, p1e, row2, rot0_2); \
2467       int16x8_t sum04 = vaddq_s16(row0, row4); \
2468       int16x8_t dif04 = vsubq_s16(row0, row4); \
2469       dct_widen(t0e, sum04); \
2470       dct_widen(t1e, dif04); \
2471       dct_wadd(x0, t0e, t3e); \
2472       dct_wsub(x3, t0e, t3e); \
2473       dct_wadd(x1, t1e, t2e); \
2474       dct_wsub(x2, t1e, t2e); \
2475       /* odd part */ \
2476       int16x8_t sum15 = vaddq_s16(row1, row5); \
2477       int16x8_t sum17 = vaddq_s16(row1, row7); \
2478       int16x8_t sum35 = vaddq_s16(row3, row5); \
2479       int16x8_t sum37 = vaddq_s16(row3, row7); \
2480       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2481       dct_long_mul(p5o, sumodd, rot1_0); \
2482       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2483       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2484       dct_long_mul(p3o, sum37, rot2_0); \
2485       dct_long_mul(p4o, sum15, rot2_1); \
2486       dct_wadd(sump13o, p1o, p3o); \
2487       dct_wadd(sump24o, p2o, p4o); \
2488       dct_wadd(sump23o, p2o, p3o); \
2489       dct_wadd(sump14o, p1o, p4o); \
2490       dct_long_mac(x4, sump13o, row7, rot3_0); \
2491       dct_long_mac(x5, sump24o, row5, rot3_1); \
2492       dct_long_mac(x6, sump23o, row3, rot3_2); \
2493       dct_long_mac(x7, sump14o, row1, rot3_3); \
2494       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2495       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2496       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2497       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2498    }
2499
2500         // load
2501         row0 = vld1q_s16(data + 0 * 8);
2502         row1 = vld1q_s16(data + 1 * 8);
2503         row2 = vld1q_s16(data + 2 * 8);
2504         row3 = vld1q_s16(data + 3 * 8);
2505         row4 = vld1q_s16(data + 4 * 8);
2506         row5 = vld1q_s16(data + 5 * 8);
2507         row6 = vld1q_s16(data + 6 * 8);
2508         row7 = vld1q_s16(data + 7 * 8);
2509
2510         // add DC bias
2511         row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2512
2513         // column pass
2514         dct_pass(vrshrn_n_s32, 10);
2515
2516         // 16bit 8x8 transpose
2517         {
2518                 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2519                 // whether compilers actually get this is another story, sadly.
2520 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2521 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2522 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2523
2524                 // pass 1
2525                 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2526                 dct_trn16(row2, row3);
2527                 dct_trn16(row4, row5);
2528                 dct_trn16(row6, row7);
2529
2530                 // pass 2
2531                 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2532                 dct_trn32(row1, row3);
2533                 dct_trn32(row4, row6);
2534                 dct_trn32(row5, row7);
2535
2536                 // pass 3
2537                 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2538                 dct_trn64(row1, row5);
2539                 dct_trn64(row2, row6);
2540                 dct_trn64(row3, row7);
2541
2542 #undef dct_trn16
2543 #undef dct_trn32
2544 #undef dct_trn64
2545         }
2546
2547         // row pass
2548         // vrshrn_n_s32 only supports shifts up to 16, we need
2549         // 17. so do a non-rounding shift of 16 first then follow
2550         // up with a rounding shift by 1.
2551         dct_pass(vshrn_n_s32, 16);
2552
2553         {
2554                 // pack and round
2555                 uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2556                 uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2557                 uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2558                 uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2559                 uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2560                 uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2561                 uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2562                 uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2563
2564                 // again, these can translate into one instruction, but often don't.
2565 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2566 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2567 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2568
2569                 // sadly can't use interleaved stores here since we only write
2570                 // 8 bytes to each scan line!
2571
2572                 // 8x8 8-bit transpose pass 1
2573                 dct_trn8_8(p0, p1);
2574                 dct_trn8_8(p2, p3);
2575                 dct_trn8_8(p4, p5);
2576                 dct_trn8_8(p6, p7);
2577
2578                 // pass 2
2579                 dct_trn8_16(p0, p2);
2580                 dct_trn8_16(p1, p3);
2581                 dct_trn8_16(p4, p6);
2582                 dct_trn8_16(p5, p7);
2583
2584                 // pass 3
2585                 dct_trn8_32(p0, p4);
2586                 dct_trn8_32(p1, p5);
2587                 dct_trn8_32(p2, p6);
2588                 dct_trn8_32(p3, p7);
2589
2590                 // store
2591                 vst1_u8(out, p0); out += out_stride;
2592                 vst1_u8(out, p1); out += out_stride;
2593                 vst1_u8(out, p2); out += out_stride;
2594                 vst1_u8(out, p3); out += out_stride;
2595                 vst1_u8(out, p4); out += out_stride;
2596                 vst1_u8(out, p5); out += out_stride;
2597                 vst1_u8(out, p6); out += out_stride;
2598                 vst1_u8(out, p7);
2599
2600 #undef dct_trn8_8
2601 #undef dct_trn8_16
2602 #undef dct_trn8_32
2603         }
2604
2605 #undef dct_long_mul
2606 #undef dct_long_mac
2607 #undef dct_widen
2608 #undef dct_wadd
2609 #undef dct_wsub
2610 #undef dct_bfly32o
2611 #undef dct_pass
2612 }
2613
2614 #endif // STBI_NEON
2615
2616 #define STBI__MARKER_none  0xff
2617 // if there's a pending marker from the entropy stream, return that
2618 // otherwise, fetch from the stream and get a marker. if there's no
2619 // marker, return 0xff, which is never a valid marker value
2620 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2621 {
2622         stbi_uc x;
2623         if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2624         x = stbi__get8(j->s);
2625         if (x != 0xff) return STBI__MARKER_none;
2626         while (x == 0xff)
2627                 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2628         return x;
2629 }
2630
2631 // in each scan, we'll have scan_n components, and the order
2632 // of the components is specified by order[]
2633 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2634
2635 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2636 // the dc prediction
2637 static void stbi__jpeg_reset(stbi__jpeg *j)
2638 {
2639         j->code_bits = 0;
2640         j->code_buffer = 0;
2641         j->nomore = 0;
2642         j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2643         j->marker = STBI__MARKER_none;
2644         j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2645         j->eob_run = 0;
2646         // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2647         // since we don't even allow 1<<30 pixels
2648 }
2649
2650 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2651 {
2652         stbi__jpeg_reset(z);
2653         if (!z->progressive) {
2654                 if (z->scan_n == 1) {
2655                         int i, j;
2656                         STBI_SIMD_ALIGN(short, data[64]);
2657                         int n = z->order[0];
2658                         // non-interleaved data, we just need to process one block at a time,
2659                         // in trivial scanline order
2660                         // number of blocks to do just depends on how many actual "pixels" this
2661                         // component has, independent of interleaved MCU blocking and such
2662                         int w = (z->img_comp[n].x + 7) >> 3;
2663                         int h = (z->img_comp[n].y + 7) >> 3;
2664                         for (j = 0; j < h; ++j) {
2665                                 for (i = 0; i < w; ++i) {
2666                                         int ha = z->img_comp[n].ha;
2667                                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2668                                         z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*j * 8 + i * 8, z->img_comp[n].w2, data);
2669                                         // every data block is an MCU, so countdown the restart interval
2670                                         if (--z->todo <= 0) {
2671                                                 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2672                                                 // if it's NOT a restart, then just bail, so we get corrupt data
2673                                                 // rather than no data
2674                                                 if (!STBI__RESTART(z->marker)) return 1;
2675                                                 stbi__jpeg_reset(z);
2676                                         }
2677                                 }
2678                         }
2679                         return 1;
2680                 }
2681                 else { // interleaved
2682                         int i, j, k, x, y;
2683                         STBI_SIMD_ALIGN(short, data[64]);
2684                         for (j = 0; j < z->img_mcu_y; ++j) {
2685                                 for (i = 0; i < z->img_mcu_x; ++i) {
2686                                         // scan an interleaved mcu... process scan_n components in order
2687                                         for (k = 0; k < z->scan_n; ++k) {
2688                                                 int n = z->order[k];
2689                                                 // scan out an mcu's worth of this component; that's just determined
2690                                                 // by the basic H and V specified for the component
2691                                                 for (y = 0; y < z->img_comp[n].v; ++y) {
2692                                                         for (x = 0; x < z->img_comp[n].h; ++x) {
2693                                                                 int x2 = (i*z->img_comp[n].h + x) * 8;
2694                                                                 int y2 = (j*z->img_comp[n].v + y) * 8;
2695                                                                 int ha = z->img_comp[n].ha;
2696                                                                 if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2697                                                                 z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*y2 + x2, z->img_comp[n].w2, data);
2698                                                         }
2699                                                 }
2700                                         }
2701                                         // after all interleaved components, that's an interleaved MCU,
2702                                         // so now count down the restart interval
2703                                         if (--z->todo <= 0) {
2704                                                 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2705                                                 if (!STBI__RESTART(z->marker)) return 1;
2706                                                 stbi__jpeg_reset(z);
2707                                         }
2708                                 }
2709                         }
2710                         return 1;
2711                 }
2712         }
2713         else {
2714                 if (z->scan_n == 1) {
2715                         int i, j;
2716                         int n = z->order[0];
2717                         // non-interleaved data, we just need to process one block at a time,
2718                         // in trivial scanline order
2719                         // number of blocks to do just depends on how many actual "pixels" this
2720                         // component has, independent of interleaved MCU blocking and such
2721                         int w = (z->img_comp[n].x + 7) >> 3;
2722                         int h = (z->img_comp[n].y + 7) >> 3;
2723                         for (j = 0; j < h; ++j) {
2724                                 for (i = 0; i < w; ++i) {
2725                                         short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2726                                         if (z->spec_start == 0) {
2727                                                 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2728                                                         return 0;
2729                                         }
2730                                         else {
2731                                                 int ha = z->img_comp[n].ha;
2732                                                 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2733                                                         return 0;
2734                                         }
2735                                         // every data block is an MCU, so countdown the restart interval
2736                                         if (--z->todo <= 0) {
2737                                                 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2738                                                 if (!STBI__RESTART(z->marker)) return 1;
2739                                                 stbi__jpeg_reset(z);
2740                                         }
2741                                 }
2742                         }
2743                         return 1;
2744                 }
2745                 else { // interleaved
2746                         int i, j, k, x, y;
2747                         for (j = 0; j < z->img_mcu_y; ++j) {
2748                                 for (i = 0; i < z->img_mcu_x; ++i) {
2749                                         // scan an interleaved mcu... process scan_n components in order
2750                                         for (k = 0; k < z->scan_n; ++k) {
2751                                                 int n = z->order[k];
2752                                                 // scan out an mcu's worth of this component; that's just determined
2753                                                 // by the basic H and V specified for the component
2754                                                 for (y = 0; y < z->img_comp[n].v; ++y) {
2755                                                         for (x = 0; x < z->img_comp[n].h; ++x) {
2756                                                                 int x2 = (i*z->img_comp[n].h + x);
2757                                                                 int y2 = (j*z->img_comp[n].v + y);
2758                                                                 short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2759                                                                 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2760                                                                         return 0;
2761                                                         }
2762                                                 }
2763                                         }
2764                                         // after all interleaved components, that's an interleaved MCU,
2765                                         // so now count down the restart interval
2766                                         if (--z->todo <= 0) {
2767                                                 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2768                                                 if (!STBI__RESTART(z->marker)) return 1;
2769                                                 stbi__jpeg_reset(z);
2770                                         }
2771                                 }
2772                         }
2773                         return 1;
2774                 }
2775         }
2776 }
2777
2778 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2779 {
2780         int i;
2781         for (i = 0; i < 64; ++i)
2782                 data[i] *= dequant[i];
2783 }
2784
2785 static void stbi__jpeg_finish(stbi__jpeg *z)
2786 {
2787         if (z->progressive) {
2788                 // dequantize and idct the data
2789                 int i, j, n;
2790                 for (n = 0; n < z->s->img_n; ++n) {
2791                         int w = (z->img_comp[n].x + 7) >> 3;
2792                         int h = (z->img_comp[n].y + 7) >> 3;
2793                         for (j = 0; j < h; ++j) {
2794                                 for (i = 0; i < w; ++i) {
2795                                         short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2796                                         stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2797                                         z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*j * 8 + i * 8, z->img_comp[n].w2, data);
2798                                 }
2799                         }
2800                 }
2801         }
2802 }
2803
2804 static int stbi__process_marker(stbi__jpeg *z, int m)
2805 {
2806         int L;
2807         switch (m) {
2808         case STBI__MARKER_none: // no marker found
2809                 return stbi__err("expected marker", "Corrupt JPEG");
2810
2811         case 0xDD: // DRI - specify restart interval
2812                 if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len", "Corrupt JPEG");
2813                 z->restart_interval = stbi__get16be(z->s);
2814                 return 1;
2815
2816         case 0xDB: // DQT - define quantization table
2817                 L = stbi__get16be(z->s) - 2;
2818                 while (L > 0) {
2819                         int q = stbi__get8(z->s);
2820                         int p = q >> 4, sixteen = (p != 0);
2821                         int t = q & 15, i;
2822                         if (p != 0 && p != 1) return stbi__err("bad DQT type", "Corrupt JPEG");
2823                         if (t > 3) return stbi__err("bad DQT table", "Corrupt JPEG");
2824
2825                         for (i = 0; i < 64; ++i)
2826                                 z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2827                         L -= (sixteen ? 129 : 65);
2828                 }
2829                 return L == 0;
2830
2831         case 0xC4: // DHT - define huffman table
2832                 L = stbi__get16be(z->s) - 2;
2833                 while (L > 0) {
2834                         stbi_uc *v;
2835                         int sizes[16], i, n = 0;
2836                         int q = stbi__get8(z->s);
2837                         int tc = q >> 4;
2838                         int th = q & 15;
2839                         if (tc > 1 || th > 3) return stbi__err("bad DHT header", "Corrupt JPEG");
2840                         for (i = 0; i < 16; ++i) {
2841                                 sizes[i] = stbi__get8(z->s);
2842                                 n += sizes[i];
2843                         }
2844                         L -= 17;
2845                         if (tc == 0) {
2846                                 if (!stbi__build_huffman(z->huff_dc + th, sizes)) return 0;
2847                                 v = z->huff_dc[th].values;
2848                         }
2849                         else {
2850                                 if (!stbi__build_huffman(z->huff_ac + th, sizes)) return 0;
2851                                 v = z->huff_ac[th].values;
2852                         }
2853                         for (i = 0; i < n; ++i)
2854                                 v[i] = stbi__get8(z->s);
2855                         if (tc != 0)
2856                                 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2857                         L -= n;
2858                 }
2859                 return L == 0;
2860         }
2861
2862         // check for comment block or APP blocks
2863         if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2864                 L = stbi__get16be(z->s);
2865                 if (L < 2) {
2866                         if (m == 0xFE)
2867                                 return stbi__err("bad COM len", "Corrupt JPEG");
2868                         else
2869                                 return stbi__err("bad APP len", "Corrupt JPEG");
2870                 }
2871                 L -= 2;
2872
2873                 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2874                         static const unsigned char tag[5] = { 'J','F','I','F','\0' };
2875                         int ok = 1;
2876                         int i;
2877                         for (i = 0; i < 5; ++i)
2878                                 if (stbi__get8(z->s) != tag[i])
2879                                         ok = 0;
2880                         L -= 5;
2881                         if (ok)
2882                                 z->jfif = 1;
2883                 }
2884                 else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2885                         static const unsigned char tag[6] = { 'A','d','o','b','e','\0' };
2886                         int ok = 1;
2887                         int i;
2888                         for (i = 0; i < 6; ++i)
2889                                 if (stbi__get8(z->s) != tag[i])
2890                                         ok = 0;
2891                         L -= 6;
2892                         if (ok) {
2893                                 stbi__get8(z->s); // version
2894                                 stbi__get16be(z->s); // flags0
2895                                 stbi__get16be(z->s); // flags1
2896                                 z->app14_color_transform = stbi__get8(z->s); // color transform
2897                                 L -= 6;
2898                         }
2899                 }
2900
2901                 stbi__skip(z->s, L);
2902                 return 1;
2903         }
2904
2905         return stbi__err("unknown marker", "Corrupt JPEG");
2906 }
2907
2908 // after we see SOS
2909 static int stbi__process_scan_header(stbi__jpeg *z)
2910 {
2911         int i;
2912         int Ls = stbi__get16be(z->s);
2913         z->scan_n = stbi__get8(z->s);
2914         if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) return stbi__err("bad SOS component count", "Corrupt JPEG");
2915         if (Ls != 6 + 2 * z->scan_n) return stbi__err("bad SOS len", "Corrupt JPEG");
2916         for (i = 0; i < z->scan_n; ++i) {
2917                 int id = stbi__get8(z->s), which;
2918                 int q = stbi__get8(z->s);
2919                 for (which = 0; which < z->s->img_n; ++which)
2920                         if (z->img_comp[which].id == id)
2921                                 break;
2922                 if (which == z->s->img_n) return 0; // no match
2923                 z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff", "Corrupt JPEG");
2924                 z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff", "Corrupt JPEG");
2925                 z->order[i] = which;
2926         }
2927
2928         {
2929                 int aa;
2930                 z->spec_start = stbi__get8(z->s);
2931                 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2932                 aa = stbi__get8(z->s);
2933                 z->succ_high = (aa >> 4);
2934                 z->succ_low = (aa & 15);
2935                 if (z->progressive) {
2936                         if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2937                                 return stbi__err("bad SOS", "Corrupt JPEG");
2938                 }
2939                 else {
2940                         if (z->spec_start != 0) return stbi__err("bad SOS", "Corrupt JPEG");
2941                         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS", "Corrupt JPEG");
2942                         z->spec_end = 63;
2943                 }
2944         }
2945
2946         return 1;
2947 }
2948
2949 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2950 {
2951         int i;
2952         for (i = 0; i < ncomp; ++i) {
2953                 if (z->img_comp[i].raw_data) {
2954                         STBI_FREE(z->img_comp[i].raw_data);
2955                         z->img_comp[i].raw_data = NULL;
2956                         z->img_comp[i].data = NULL;
2957                 }
2958                 if (z->img_comp[i].raw_coeff) {
2959                         STBI_FREE(z->img_comp[i].raw_coeff);
2960                         z->img_comp[i].raw_coeff = 0;
2961                         z->img_comp[i].coeff = 0;
2962                 }
2963                 if (z->img_comp[i].linebuf) {
2964                         STBI_FREE(z->img_comp[i].linebuf);
2965                         z->img_comp[i].linebuf = NULL;
2966                 }
2967         }
2968         return why;
2969 }
2970
2971 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2972 {
2973         stbi__context *s = z->s;
2974         int Lf, p, i, q, h_max = 1, v_max = 1, c;
2975         Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
2976         p = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
2977         s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2978         s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
2979         c = stbi__get8(s);
2980         if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count", "Corrupt JPEG");
2981         s->img_n = c;
2982         for (i = 0; i < c; ++i) {
2983                 z->img_comp[i].data = NULL;
2984                 z->img_comp[i].linebuf = NULL;
2985         }
2986
2987         if (Lf != 8 + 3 * s->img_n) return stbi__err("bad SOF len", "Corrupt JPEG");
2988
2989         z->rgb = 0;
2990         for (i = 0; i < s->img_n; ++i) {
2991                 static unsigned char rgb[3] = { 'R', 'G', 'B' };
2992                 z->img_comp[i].id = stbi__get8(s);
2993                 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2994                         ++z->rgb;
2995                 q = stbi__get8(s);
2996                 z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H", "Corrupt JPEG");
2997                 z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V", "Corrupt JPEG");
2998                 z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ", "Corrupt JPEG");
2999         }
3000
3001         if (scan != STBI__SCAN_load) return 1;
3002
3003         if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3004
3005         for (i = 0; i < s->img_n; ++i) {
3006                 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3007                 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3008         }
3009
3010         // compute interleaved mcu info
3011         z->img_h_max = h_max;
3012         z->img_v_max = v_max;
3013         z->img_mcu_w = h_max * 8;
3014         z->img_mcu_h = v_max * 8;
3015         // these sizes can't be more than 17 bits
3016         z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
3017         z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
3018
3019         for (i = 0; i < s->img_n; ++i) {
3020                 // number of effective pixels (e.g. for non-interleaved MCU)
3021                 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
3022                 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
3023                 // to simplify generation, we'll allocate enough memory to decode
3024                 // the bogus oversized data from using interleaved MCUs and their
3025                 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3026                 // discard the extra data until colorspace conversion
3027                 //
3028                 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3029                 // so these muls can't overflow with 32-bit ints (which we require)
3030                 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3031                 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3032                 z->img_comp[i].coeff = 0;
3033                 z->img_comp[i].raw_coeff = 0;
3034                 z->img_comp[i].linebuf = NULL;
3035                 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3036                 if (z->img_comp[i].raw_data == NULL)
3037                         return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
3038                 // align blocks for idct using mmx/sse
3039                 z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
3040                 if (z->progressive) {
3041                         // w2, h2 are multiples of 8 (see above)
3042                         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3043                         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3044                         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3045                         if (z->img_comp[i].raw_coeff == NULL)
3046                                 return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
3047                         z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
3048                 }
3049         }
3050
3051         return 1;
3052 }
3053
3054 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3055 #define stbi__DNL(x)         ((x) == 0xdc)
3056 #define stbi__SOI(x)         ((x) == 0xd8)
3057 #define stbi__EOI(x)         ((x) == 0xd9)
3058 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3059 #define stbi__SOS(x)         ((x) == 0xda)
3060
3061 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3062
3063 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3064 {
3065         int m;
3066         z->jfif = 0;
3067         z->app14_color_transform = -1; // valid values are 0,1,2
3068         z->marker = STBI__MARKER_none; // initialize cached marker to empty
3069         m = stbi__get_marker(z);
3070         if (!stbi__SOI(m)) return stbi__err("no SOI", "Corrupt JPEG");
3071         if (scan == STBI__SCAN_type) return 1;
3072         m = stbi__get_marker(z);
3073         while (!stbi__SOF(m)) {
3074                 if (!stbi__process_marker(z, m)) return 0;
3075                 m = stbi__get_marker(z);
3076                 while (m == STBI__MARKER_none) {
3077                         // some files have extra padding after their blocks, so ok, we'll scan
3078                         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3079                         m = stbi__get_marker(z);
3080                 }
3081         }
3082         z->progressive = stbi__SOF_progressive(m);
3083         if (!stbi__process_frame_header(z, scan)) return 0;
3084         return 1;
3085 }
3086
3087 // decode image to YCbCr format
3088 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3089 {
3090         int m;
3091         for (m = 0; m < 4; m++) {
3092                 j->img_comp[m].raw_data = NULL;
3093                 j->img_comp[m].raw_coeff = NULL;
3094         }
3095         j->restart_interval = 0;
3096         if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3097         m = stbi__get_marker(j);
3098         while (!stbi__EOI(m)) {
3099                 if (stbi__SOS(m)) {
3100                         if (!stbi__process_scan_header(j)) return 0;
3101                         if (!stbi__parse_entropy_coded_data(j)) return 0;
3102                         if (j->marker == STBI__MARKER_none) {
3103                                 // handle 0s at the end of image data from IP Kamera 9060
3104                                 while (!stbi__at_eof(j->s)) {
3105                                         int x = stbi__get8(j->s);
3106                                         if (x == 255) {
3107                                                 j->marker = stbi__get8(j->s);
3108                                                 break;
3109                                         }
3110                                 }
3111                                 // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3112                         }
3113                 }
3114                 else if (stbi__DNL(m)) {
3115                         int Ld = stbi__get16be(j->s);
3116                         stbi__uint32 NL = stbi__get16be(j->s);
3117                         if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
3118                         if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
3119                 }
3120                 else {
3121                         if (!stbi__process_marker(j, m)) return 0;
3122                 }
3123                 m = stbi__get_marker(j);
3124         }
3125         if (j->progressive)
3126                 stbi__jpeg_finish(j);
3127         return 1;
3128 }
3129
3130 // static jfif-centered resampling (across block boundaries)
3131
3132 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3133         int w, int hs);
3134
3135 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3136
3137 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3138 {
3139         STBI_NOTUSED(out);
3140         STBI_NOTUSED(in_far);
3141         STBI_NOTUSED(w);
3142         STBI_NOTUSED(hs);
3143         return in_near;
3144 }
3145
3146 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3147 {
3148         // need to generate two samples vertically for every one in input
3149         int i;
3150         STBI_NOTUSED(hs);
3151         for (i = 0; i < w; ++i)
3152                 out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
3153         return out;
3154 }
3155
3156 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3157 {
3158         // need to generate two samples horizontally for every one in input
3159         int i;
3160         stbi_uc *input = in_near;
3161
3162         if (w == 1) {
3163                 // if only one sample, can't do any interpolation
3164                 out[0] = out[1] = input[0];
3165                 return out;
3166         }
3167
3168         out[0] = input[0];
3169         out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
3170         for (i = 1; i < w - 1; ++i) {
3171                 int n = 3 * input[i] + 2;
3172                 out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
3173                 out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
3174         }
3175         out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
3176         out[i * 2 + 1] = input[w - 1];
3177
3178         STBI_NOTUSED(in_far);
3179         STBI_NOTUSED(hs);
3180
3181         return out;
3182 }
3183
3184 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3185
3186 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3187 {
3188         // need to generate 2x2 samples for every one in input
3189         int i, t0, t1;
3190         if (w == 1) {
3191                 out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
3192                 return out;
3193         }
3194
3195         t1 = 3 * in_near[0] + in_far[0];
3196         out[0] = stbi__div4(t1 + 2);
3197         for (i = 1; i < w; ++i) {
3198                 t0 = t1;
3199                 t1 = 3 * in_near[i] + in_far[i];
3200                 out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
3201                 out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3202         }
3203         out[w * 2 - 1] = stbi__div4(t1 + 2);
3204
3205         STBI_NOTUSED(hs);
3206
3207         return out;
3208 }
3209
3210 #if defined(STBI_SSE2) || defined(STBI_NEON)
3211 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3212 {
3213         // need to generate 2x2 samples for every one in input
3214         int i = 0, t0, t1;
3215
3216         if (w == 1) {
3217                 out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
3218                 return out;
3219         }
3220
3221         t1 = 3 * in_near[0] + in_far[0];
3222         // process groups of 8 pixels for as long as we can.
3223         // note we can't handle the last pixel in a row in this loop
3224         // because we need to handle the filter boundary conditions.
3225         for (; i < ((w - 1) & ~7); i += 8) {
3226 #if defined(STBI_SSE2)
3227                 // load and perform the vertical filtering pass
3228                 // this uses 3*x + y = 4*x + (y - x)
3229                 __m128i zero = _mm_setzero_si128();
3230                 __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
3231                 __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3232                 __m128i farw = _mm_unpacklo_epi8(farb, zero);
3233                 __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3234                 __m128i diff = _mm_sub_epi16(farw, nearw);
3235                 __m128i nears = _mm_slli_epi16(nearw, 2);
3236                 __m128i curr = _mm_add_epi16(nears, diff); // current row
3237
3238                                                                                                    // horizontal filter works the same based on shifted vers of current
3239                                                                                                    // row. "prev" is current row shifted right by 1 pixel; we need to
3240                                                                                                    // insert the previous pixel value (from t1).
3241                                                                                                    // "next" is current row shifted left by 1 pixel, with first pixel
3242                                                                                                    // of next block of 8 pixels added in.
3243                 __m128i prv0 = _mm_slli_si128(curr, 2);
3244                 __m128i nxt0 = _mm_srli_si128(curr, 2);
3245                 __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3246                 __m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
3247
3248                 // horizontal filter, polyphase implementation since it's convenient:
3249                 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3250                 // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3251                 // note the shared term.
3252                 __m128i bias = _mm_set1_epi16(8);
3253                 __m128i curs = _mm_slli_epi16(curr, 2);
3254                 __m128i prvd = _mm_sub_epi16(prev, curr);
3255                 __m128i nxtd = _mm_sub_epi16(next, curr);
3256                 __m128i curb = _mm_add_epi16(curs, bias);
3257                 __m128i even = _mm_add_epi16(prvd, curb);
3258                 __m128i odd = _mm_add_epi16(nxtd, curb);
3259
3260                 // interleave even and odd pixels, then undo scaling.
3261                 __m128i int0 = _mm_unpacklo_epi16(even, odd);
3262                 __m128i int1 = _mm_unpackhi_epi16(even, odd);
3263                 __m128i de0 = _mm_srli_epi16(int0, 4);
3264                 __m128i de1 = _mm_srli_epi16(int1, 4);
3265
3266                 // pack and write output
3267                 __m128i outv = _mm_packus_epi16(de0, de1);
3268                 _mm_storeu_si128((__m128i *) (out + i * 2), outv);
3269 #elif defined(STBI_NEON)
3270                 // load and perform the vertical filtering pass
3271                 // this uses 3*x + y = 4*x + (y - x)
3272                 uint8x8_t farb = vld1_u8(in_far + i);
3273                 uint8x8_t nearb = vld1_u8(in_near + i);
3274                 int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3275                 int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3276                 int16x8_t curr = vaddq_s16(nears, diff); // current row
3277
3278                                                                                                  // horizontal filter works the same based on shifted vers of current
3279                                                                                                  // row. "prev" is current row shifted right by 1 pixel; we need to
3280                                                                                                  // insert the previous pixel value (from t1).
3281                                                                                                  // "next" is current row shifted left by 1 pixel, with first pixel
3282                                                                                                  // of next block of 8 pixels added in.
3283                 int16x8_t prv0 = vextq_s16(curr, curr, 7);
3284                 int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3285                 int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3286                 int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
3287
3288                 // horizontal filter, polyphase implementation since it's convenient:
3289                 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3290                 // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3291                 // note the shared term.
3292                 int16x8_t curs = vshlq_n_s16(curr, 2);
3293                 int16x8_t prvd = vsubq_s16(prev, curr);
3294                 int16x8_t nxtd = vsubq_s16(next, curr);
3295                 int16x8_t even = vaddq_s16(curs, prvd);
3296                 int16x8_t odd = vaddq_s16(curs, nxtd);
3297
3298                 // undo scaling and round, then store with even/odd phases interleaved
3299                 uint8x8x2_t o;
3300                 o.val[0] = vqrshrun_n_s16(even, 4);
3301                 o.val[1] = vqrshrun_n_s16(odd, 4);
3302                 vst2_u8(out + i * 2, o);
3303 #endif
3304
3305                 // "previous" value for next iter
3306                 t1 = 3 * in_near[i + 7] + in_far[i + 7];
3307         }
3308
3309         t0 = t1;
3310         t1 = 3 * in_near[i] + in_far[i];
3311         out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3312
3313         for (++i; i < w; ++i) {
3314                 t0 = t1;
3315                 t1 = 3 * in_near[i] + in_far[i];
3316                 out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
3317                 out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
3318         }
3319         out[w * 2 - 1] = stbi__div4(t1 + 2);
3320
3321         STBI_NOTUSED(hs);
3322
3323         return out;
3324 }
3325 #endif
3326
3327 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3328 {
3329         // resample with nearest-neighbor
3330         int i, j;
3331         STBI_NOTUSED(in_far);
3332         for (i = 0; i < w; ++i)
3333                 for (j = 0; j < hs; ++j)
3334                         out[i*hs + j] = in_near[i];
3335         return out;
3336 }
3337
3338 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3339 // to make sure the code produces the same results in both SIMD and scalar
3340 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
3341 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3342 {
3343         int i;
3344         for (i = 0; i < count; ++i) {
3345                 int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3346                 int r, g, b;
3347                 int cr = pcr[i] - 128;
3348                 int cb = pcb[i] - 128;
3349                 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3350                 g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3351                 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3352                 r >>= 20;
3353                 g >>= 20;
3354                 b >>= 20;
3355                 if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
3356                 if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
3357                 if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
3358                 out[0] = (stbi_uc)r;
3359                 out[1] = (stbi_uc)g;
3360                 out[2] = (stbi_uc)b;
3361                 out[3] = 255;
3362                 out += step;
3363         }
3364 }
3365
3366 #if defined(STBI_SSE2) || defined(STBI_NEON)
3367 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3368 {
3369         int i = 0;
3370
3371 #ifdef STBI_SSE2
3372         // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3373         // it's useful in practice (you wouldn't use it for textures, for example).
3374         // so just accelerate step == 4 case.
3375         if (step == 4) {
3376                 // this is a fairly straightforward implementation and not super-optimized.
3377                 __m128i signflip = _mm_set1_epi8(-0x80);
3378                 __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f*4096.0f + 0.5f));
3379                 __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f*4096.0f + 0.5f));
3380                 __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f*4096.0f + 0.5f));
3381                 __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f*4096.0f + 0.5f));
3382                 __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
3383                 __m128i xw = _mm_set1_epi16(255); // alpha channel
3384
3385                 for (; i + 7 < count; i += 8) {
3386                         // load
3387                         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y + i));
3388                         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr + i));
3389                         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb + i));
3390                         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3391                         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3392
3393                                                                                                                                    // unpack to short (and left-shift cr, cb by 8)
3394                         __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3395                         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3396                         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3397
3398                         // color transform
3399                         __m128i yws = _mm_srli_epi16(yw, 4);
3400                         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3401                         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3402                         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3403                         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3404                         __m128i rws = _mm_add_epi16(cr0, yws);
3405                         __m128i gwt = _mm_add_epi16(cb0, yws);
3406                         __m128i bws = _mm_add_epi16(yws, cb1);
3407                         __m128i gws = _mm_add_epi16(gwt, cr1);
3408
3409                         // descale
3410                         __m128i rw = _mm_srai_epi16(rws, 4);
3411                         __m128i bw = _mm_srai_epi16(bws, 4);
3412                         __m128i gw = _mm_srai_epi16(gws, 4);
3413
3414                         // back to byte, set up for transpose
3415                         __m128i brb = _mm_packus_epi16(rw, bw);
3416                         __m128i gxb = _mm_packus_epi16(gw, xw);
3417
3418                         // transpose to interleave channels
3419                         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3420                         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3421                         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3422                         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3423
3424                         // store
3425                         _mm_storeu_si128((__m128i *) (out + 0), o0);
3426                         _mm_storeu_si128((__m128i *) (out + 16), o1);
3427                         out += 32;
3428                 }
3429         }
3430 #endif
3431
3432 #ifdef STBI_NEON
3433         // in this version, step=3 support would be easy to add. but is there demand?
3434         if (step == 4) {
3435                 // this is a fairly straightforward implementation and not super-optimized.
3436                 uint8x8_t signflip = vdup_n_u8(0x80);
3437                 int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f*4096.0f + 0.5f));
3438                 int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f*4096.0f + 0.5f));
3439                 int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f*4096.0f + 0.5f));
3440                 int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f*4096.0f + 0.5f));
3441
3442                 for (; i + 7 < count; i += 8) {
3443                         // load
3444                         uint8x8_t y_bytes = vld1_u8(y + i);
3445                         uint8x8_t cr_bytes = vld1_u8(pcr + i);
3446                         uint8x8_t cb_bytes = vld1_u8(pcb + i);
3447                         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3448                         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3449
3450                         // expand to s16
3451                         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3452                         int16x8_t crw = vshll_n_s8(cr_biased, 7);
3453                         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3454
3455                         // color transform
3456                         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3457                         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3458                         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3459                         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3460                         int16x8_t rws = vaddq_s16(yws, cr0);
3461                         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3462                         int16x8_t bws = vaddq_s16(yws, cb1);
3463
3464                         // undo scaling, round, convert to byte
3465                         uint8x8x4_t o;
3466                         o.val[0] = vqrshrun_n_s16(rws, 4);
3467                         o.val[1] = vqrshrun_n_s16(gws, 4);
3468                         o.val[2] = vqrshrun_n_s16(bws, 4);
3469                         o.val[3] = vdup_n_u8(255);
3470
3471                         // store, interleaving r/g/b/a
3472                         vst4_u8(out, o);
3473                         out += 8 * 4;
3474                 }
3475         }
3476 #endif
3477
3478         for (; i < count; ++i) {
3479                 int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3480                 int r, g, b;
3481                 int cr = pcr[i] - 128;
3482                 int cb = pcb[i] - 128;
3483                 r = y_fixed + cr* stbi__float2fixed(1.40200f);
3484                 g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3485                 b = y_fixed + cb* stbi__float2fixed(1.77200f);
3486                 r >>= 20;
3487                 g >>= 20;
3488                 b >>= 20;
3489                 if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
3490                 if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
3491                 if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
3492                 out[0] = (stbi_uc)r;
3493                 out[1] = (stbi_uc)g;
3494                 out[2] = (stbi_uc)b;
3495                 out[3] = 255;
3496                 out += step;
3497         }
3498 }
3499 #endif
3500
3501 // set up the kernels
3502 static void stbi__setup_jpeg(stbi__jpeg *j)
3503 {
3504         j->idct_block_kernel = stbi__idct_block;
3505         j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3506         j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3507
3508 #ifdef STBI_SSE2
3509         if (stbi__sse2_available()) {
3510                 j->idct_block_kernel = stbi__idct_simd;
3511                 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3512                 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3513         }
3514 #endif
3515
3516 #ifdef STBI_NEON
3517         j->idct_block_kernel = stbi__idct_simd;
3518         j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3519         j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3520 #endif
3521 }
3522
3523 // clean up the temporary component buffers
3524 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3525 {
3526         stbi__free_jpeg_components(j, j->s->img_n, 0);
3527 }
3528
3529 typedef struct
3530 {
3531         resample_row_func resample;
3532         stbi_uc *line0, *line1;
3533         int hs, vs;   // expansion factor in each axis
3534         int w_lores; // horizontal pixels pre-expansion
3535         int ystep;   // how far through vertical expansion we are
3536         int ypos;    // which pre-expansion row we're on
3537 } stbi__resample;
3538
3539 // fast 0..255 * 0..255 => 0..255 rounded multiplication
3540 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3541 {
3542         unsigned int t = x*y + 128;
3543         return (stbi_uc)((t + (t >> 8)) >> 8);
3544 }
3545
3546 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3547 {
3548         int n, decode_n, is_rgb;
3549         z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3550
3551                                          // validate req_comp
3552         if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3553
3554         // load a jpeg image from whichever source, but leave in YCbCr format
3555         if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3556
3557         // determine actual number of components to generate
3558         n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3559
3560         is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3561
3562         if (z->s->img_n == 3 && n < 3 && !is_rgb)
3563                 decode_n = 1;
3564         else
3565                 decode_n = z->s->img_n;
3566
3567         // resample and color-convert
3568         {
3569                 int k;
3570                 unsigned int i, j;
3571                 stbi_uc *output;
3572                 stbi_uc *coutput[4];
3573
3574                 stbi__resample res_comp[4];
3575
3576                 for (k = 0; k < decode_n; ++k) {
3577                         stbi__resample *r = &res_comp[k];
3578
3579                         // allocate line buffer big enough for upsampling off the edges
3580                         // with upsample factor of 4
3581                         z->img_comp[k].linebuf = (stbi_uc *)stbi__malloc(z->s->img_x + 3);
3582                         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3583
3584                         r->hs = z->img_h_max / z->img_comp[k].h;
3585                         r->vs = z->img_v_max / z->img_comp[k].v;
3586                         r->ystep = r->vs >> 1;
3587                         r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
3588                         r->ypos = 0;
3589                         r->line0 = r->line1 = z->img_comp[k].data;
3590
3591                         if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3592                         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3593                         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3594                         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3595                         else                               r->resample = stbi__resample_row_generic;
3596                 }
3597
3598                 // can't error after this so, this is safe
3599                 output = (stbi_uc *)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3600                 if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3601
3602                 // now go ahead and resample
3603                 for (j = 0; j < z->s->img_y; ++j) {
3604                         stbi_uc *out = output + n * z->s->img_x * j;
3605                         for (k = 0; k < decode_n; ++k) {
3606                                 stbi__resample *r = &res_comp[k];
3607                                 int y_bot = r->ystep >= (r->vs >> 1);
3608                                 coutput[k] = r->resample(z->img_comp[k].linebuf,
3609                                         y_bot ? r->line1 : r->line0,
3610                                         y_bot ? r->line0 : r->line1,
3611                                         r->w_lores, r->hs);
3612                                 if (++r->ystep >= r->vs) {
3613                                         r->ystep = 0;
3614                                         r->line0 = r->line1;
3615                                         if (++r->ypos < z->img_comp[k].y)
3616                                                 r->line1 += z->img_comp[k].w2;
3617                                 }
3618                         }
3619                         if (n >= 3) {
3620                                 stbi_uc *y = coutput[0];
3621                                 if (z->s->img_n == 3) {
3622                                         if (is_rgb) {
3623                                                 for (i = 0; i < z->s->img_x; ++i) {
3624                                                         out[0] = y[i];
3625                                                         out[1] = coutput[1][i];
3626                                                         out[2] = coutput[2][i];
3627                                                         out[3] = 255;
3628                                                         out += n;
3629                                                 }
3630                                         }
3631                                         else {
3632                                                 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3633                                         }
3634                                 }
3635                                 else if (z->s->img_n == 4) {
3636                                         if (z->app14_color_transform == 0) { // CMYK
3637                                                 for (i = 0; i < z->s->img_x; ++i) {
3638                                                         stbi_uc m = coutput[3][i];
3639                                                         out[0] = stbi__blinn_8x8(coutput[0][i], m);
3640                                                         out[1] = stbi__blinn_8x8(coutput[1][i], m);
3641                                                         out[2] = stbi__blinn_8x8(coutput[2][i], m);
3642                                                         out[3] = 255;
3643                                                         out += n;
3644                                                 }
3645                                         }
3646                                         else if (z->app14_color_transform == 2) { // YCCK
3647                                                 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3648                                                 for (i = 0; i < z->s->img_x; ++i) {
3649                                                         stbi_uc m = coutput[3][i];
3650                                                         out[0] = stbi__blinn_8x8(255 - out[0], m);
3651                                                         out[1] = stbi__blinn_8x8(255 - out[1], m);
3652                                                         out[2] = stbi__blinn_8x8(255 - out[2], m);
3653                                                         out += n;
3654                                                 }
3655                                         }
3656                                         else { // YCbCr + alpha?  Ignore the fourth channel for now
3657                                                 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3658                                         }
3659                                 }
3660                                 else
3661                                         for (i = 0; i < z->s->img_x; ++i) {
3662                                                 out[0] = out[1] = out[2] = y[i];
3663                                                 out[3] = 255; // not used if n==3
3664                                                 out += n;
3665                                         }
3666                         }
3667                         else {
3668                                 if (is_rgb) {
3669                                         if (n == 1)
3670                                                 for (i = 0; i < z->s->img_x; ++i)
3671                                                         *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3672                                         else {
3673                                                 for (i = 0; i < z->s->img_x; ++i, out += 2) {
3674                                                         out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3675                                                         out[1] = 255;
3676                                                 }
3677                                         }
3678                                 }
3679                                 else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3680                                         for (i = 0; i < z->s->img_x; ++i) {
3681                                                 stbi_uc m = coutput[3][i];
3682                                                 stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3683                                                 stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3684                                                 stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3685                                                 out[0] = stbi__compute_y(r, g, b);
3686                                                 out[1] = 255;
3687                                                 out += n;
3688                                         }
3689                                 }
3690                                 else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3691                                         for (i = 0; i < z->s->img_x; ++i) {
3692                                                 out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3693                                                 out[1] = 255;
3694                                                 out += n;
3695                                         }
3696                                 }
3697                                 else {
3698                                         stbi_uc *y = coutput[0];
3699                                         if (n == 1)
3700                                                 for (i = 0; i < z->s->img_x; ++i) out[i] = y[i];
3701                                         else
3702                                                 for (i = 0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3703                                 }
3704                         }
3705                 }
3706                 stbi__cleanup_jpeg(z);
3707                 *out_x = z->s->img_x;
3708                 *out_y = z->s->img_y;
3709                 if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3710                 return output;
3711         }
3712 }
3713
3714 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3715 {
3716         unsigned char* result;
3717         stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3718         STBI_NOTUSED(ri);
3719         j->s = s;
3720         stbi__setup_jpeg(j);
3721         result = load_jpeg_image(j, x, y, comp, req_comp);
3722         STBI_FREE(j);
3723         return result;
3724 }
3725
3726 static int stbi__jpeg_test(stbi__context *s)
3727 {
3728         int r;
3729         stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3730         j->s = s;
3731         stbi__setup_jpeg(j);
3732         r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3733         stbi__rewind(s);
3734         STBI_FREE(j);
3735         return r;
3736 }
3737
3738 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3739 {
3740         if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3741                 stbi__rewind(j->s);
3742                 return 0;
3743         }
3744         if (x) *x = j->s->img_x;
3745         if (y) *y = j->s->img_y;
3746         if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3747         return 1;
3748 }
3749
3750 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3751 {
3752         int result;
3753         stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));
3754         j->s = s;
3755         result = stbi__jpeg_info_raw(j, x, y, comp);
3756         STBI_FREE(j);
3757         return result;
3758 }
3759 #endif
3760
3761 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3762 //    simple implementation
3763 //      - all input must be provided in an upfront buffer
3764 //      - all output is written to a single output buffer (can malloc/realloc)
3765 //    performance
3766 //      - fast huffman
3767
3768 #ifndef STBI_NO_ZLIB
3769
3770 // fast-way is faster to check than jpeg huffman, but slow way is slower
3771 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3772 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3773
3774 // zlib-style huffman encoding
3775 // (jpegs packs from left, zlib from right, so can't share code)
3776 typedef struct
3777 {
3778         stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3779         stbi__uint16 firstcode[16];
3780         int maxcode[17];
3781         stbi__uint16 firstsymbol[16];
3782         stbi_uc  size[288];
3783         stbi__uint16 value[288];
3784 } stbi__zhuffman;
3785
3786 stbi_inline static int stbi__bitreverse16(int n)
3787 {
3788         n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3789         n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3790         n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3791         n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3792         return n;
3793 }
3794
3795 stbi_inline static int stbi__bit_reverse(int v, int bits)
3796 {
3797         STBI_ASSERT(bits <= 16);
3798         // to bit reverse n bits, reverse 16 and shift
3799         // e.g. 11 bits, bit reverse and shift away 5
3800         return stbi__bitreverse16(v) >> (16 - bits);
3801 }
3802
3803 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3804 {
3805         int i, k = 0;
3806         int code, next_code[16], sizes[17];
3807
3808         // DEFLATE spec for generating codes
3809         memset(sizes, 0, sizeof(sizes));
3810         memset(z->fast, 0, sizeof(z->fast));
3811         for (i = 0; i < num; ++i)
3812                 ++sizes[sizelist[i]];
3813         sizes[0] = 0;
3814         for (i = 1; i < 16; ++i)
3815                 if (sizes[i] >(1 << i))
3816                         return stbi__err("bad sizes", "Corrupt PNG");
3817         code = 0;
3818         for (i = 1; i < 16; ++i) {
3819                 next_code[i] = code;
3820                 z->firstcode[i] = (stbi__uint16)code;
3821                 z->firstsymbol[i] = (stbi__uint16)k;
3822                 code = (code + sizes[i]);
3823                 if (sizes[i])
3824                         if (code - 1 >= (1 << i)) return stbi__err("bad codelengths", "Corrupt PNG");
3825                 z->maxcode[i] = code << (16 - i); // preshift for inner loop
3826                 code <<= 1;
3827                 k += sizes[i];
3828         }
3829         z->maxcode[16] = 0x10000; // sentinel
3830         for (i = 0; i < num; ++i) {
3831                 int s = sizelist[i];
3832                 if (s) {
3833                         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3834                         stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
3835                         z->size[c] = (stbi_uc)s;
3836                         z->value[c] = (stbi__uint16)i;
3837                         if (s <= STBI__ZFAST_BITS) {
3838                                 int j = stbi__bit_reverse(next_code[s], s);
3839                                 while (j < (1 << STBI__ZFAST_BITS)) {
3840                                         z->fast[j] = fastv;
3841                                         j += (1 << s);
3842                                 }
3843                         }
3844                         ++next_code[s];
3845                 }
3846         }
3847         return 1;
3848 }
3849
3850 // zlib-from-memory implementation for PNG reading
3851 //    because PNG allows splitting the zlib stream arbitrarily,
3852 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3853 //    we require PNG read all the IDATs and combine them into a single
3854 //    memory buffer
3855
3856 typedef struct
3857 {
3858         stbi_uc *zbuffer, *zbuffer_end;
3859         int num_bits;
3860         stbi__uint32 code_buffer;
3861
3862         char *zout;
3863         char *zout_start;
3864         char *zout_end;
3865         int   z_expandable;
3866
3867         stbi__zhuffman z_length, z_distance;
3868 } stbi__zbuf;
3869
3870 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3871 {
3872         if (z->zbuffer >= z->zbuffer_end) return 0;
3873         return *z->zbuffer++;
3874 }
3875
3876 static void stbi__fill_bits(stbi__zbuf *z)
3877 {
3878         do {
3879                 STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3880                 z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
3881                 z->num_bits += 8;
3882         } while (z->num_bits <= 24);
3883 }
3884
3885 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3886 {
3887         unsigned int k;
3888         if (z->num_bits < n) stbi__fill_bits(z);
3889         k = z->code_buffer & ((1 << n) - 1);
3890         z->code_buffer >>= n;
3891         z->num_bits -= n;
3892         return k;
3893 }
3894
3895 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3896 {
3897         int b, s, k;
3898         // not resolved by fast table, so compute it the slow way
3899         // use jpeg approach, which requires MSbits at top
3900         k = stbi__bit_reverse(a->code_buffer, 16);
3901         for (s = STBI__ZFAST_BITS + 1; ; ++s)
3902                 if (k < z->maxcode[s])
3903                         break;
3904         if (s == 16) return -1; // invalid code!
3905                                                         // code size is s, so:
3906         b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
3907         STBI_ASSERT(z->size[b] == s);
3908         a->code_buffer >>= s;
3909         a->num_bits -= s;
3910         return z->value[b];
3911 }
3912
3913 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3914 {
3915         int b, s;
3916         if (a->num_bits < 16) stbi__fill_bits(a);
3917         b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3918         if (b) {
3919                 s = b >> 9;
3920                 a->code_buffer >>= s;
3921                 a->num_bits -= s;
3922                 return b & 511;
3923         }
3924         return stbi__zhuffman_decode_slowpath(a, z);
3925 }
3926
3927 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3928 {
3929         char *q;
3930         int cur, limit, old_limit;
3931         z->zout = zout;
3932         if (!z->z_expandable) return stbi__err("output buffer limit", "Corrupt PNG");
3933         cur = (int)(z->zout - z->zout_start);
3934         limit = old_limit = (int)(z->zout_end - z->zout_start);
3935         while (cur + n > limit)
3936                 limit *= 2;
3937         q = (char *)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3938         STBI_NOTUSED(old_limit);
3939         if (q == NULL) return stbi__err("outofmem", "Out of memory");
3940         z->zout_start = q;
3941         z->zout = q + cur;
3942         z->zout_end = q + limit;
3943         return 1;
3944 }
3945
3946 static int stbi__zlength_base[31] = {
3947         3,4,5,6,7,8,9,10,11,13,
3948         15,17,19,23,27,31,35,43,51,59,
3949         67,83,99,115,131,163,195,227,258,0,0 };
3950
3951 static int stbi__zlength_extra[31] =
3952 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3953
3954 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3955 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0 };
3956
3957 static int stbi__zdist_extra[32] =
3958 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
3959
3960 static int stbi__parse_huffman_block(stbi__zbuf *a)
3961 {
3962         char *zout = a->zout;
3963         for (;;) {
3964                 int z = stbi__zhuffman_decode(a, &a->z_length);
3965                 if (z < 256) {
3966                         if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
3967                         if (zout >= a->zout_end) {
3968                                 if (!stbi__zexpand(a, zout, 1)) return 0;
3969                                 zout = a->zout;
3970                         }
3971                         *zout++ = (char)z;
3972                 }
3973                 else {
3974                         stbi_uc *p;
3975                         int len, dist;
3976                         if (z == 256) {
3977                                 a->zout = zout;
3978                                 return 1;
3979                         }
3980                         z -= 257;
3981                         len = stbi__zlength_base[z];
3982                         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3983                         z = stbi__zhuffman_decode(a, &a->z_distance);
3984                         if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG");
3985                         dist = stbi__zdist_base[z];
3986                         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3987                         if (zout - a->zout_start < dist) return stbi__err("bad dist", "Corrupt PNG");
3988                         if (zout + len > a->zout_end) {
3989                                 if (!stbi__zexpand(a, zout, len)) return 0;
3990                                 zout = a->zout;
3991                         }
3992                         p = (stbi_uc *)(zout - dist);
3993                         if (dist == 1) { // run of one byte; common in images.
3994                                 stbi_uc v = *p;
3995                                 if (len) { do *zout++ = v; while (--len); }
3996                         }
3997                         else {
3998                                 if (len) { do *zout++ = *p++; while (--len); }
3999                         }
4000                 }
4001         }
4002 }
4003
4004 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4005 {
4006         static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4007         stbi__zhuffman z_codelength;
4008         stbi_uc lencodes[286 + 32 + 137];//padding for maximum single op
4009         stbi_uc codelength_sizes[19];
4010         int i, n;
4011
4012         int hlit = stbi__zreceive(a, 5) + 257;
4013         int hdist = stbi__zreceive(a, 5) + 1;
4014         int hclen = stbi__zreceive(a, 4) + 4;
4015         int ntot = hlit + hdist;
4016
4017         memset(codelength_sizes, 0, sizeof(codelength_sizes));
4018         for (i = 0; i < hclen; ++i) {
4019                 int s = stbi__zreceive(a, 3);
4020                 codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
4021         }
4022         if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4023
4024         n = 0;
4025         while (n < ntot) {
4026                 int c = stbi__zhuffman_decode(a, &z_codelength);
4027                 if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4028                 if (c < 16)
4029                         lencodes[n++] = (stbi_uc)c;
4030                 else {
4031                         stbi_uc fill = 0;
4032                         if (c == 16) {
4033                                 c = stbi__zreceive(a, 2) + 3;
4034                                 if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4035                                 fill = lencodes[n - 1];
4036                         }
4037                         else if (c == 17)
4038                                 c = stbi__zreceive(a, 3) + 3;
4039                         else {
4040                                 STBI_ASSERT(c == 18);
4041                                 c = stbi__zreceive(a, 7) + 11;
4042                         }
4043                         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4044                         memset(lencodes + n, fill, c);
4045                         n += c;
4046                 }
4047         }
4048         if (n != ntot) return stbi__err("bad codelengths", "Corrupt PNG");
4049         if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4050         if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
4051         return 1;
4052 }
4053
4054 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4055 {
4056         stbi_uc header[4];
4057         int len, nlen, k;
4058         if (a->num_bits & 7)
4059                 stbi__zreceive(a, a->num_bits & 7); // discard
4060                                                                                         // drain the bit-packed data into header
4061         k = 0;
4062         while (a->num_bits > 0) {
4063                 header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
4064                 a->code_buffer >>= 8;
4065                 a->num_bits -= 8;
4066         }
4067         STBI_ASSERT(a->num_bits == 0);
4068         // now fill header the normal way
4069         while (k < 4)
4070                 header[k++] = stbi__zget8(a);
4071         len = header[1] * 256 + header[0];
4072         nlen = header[3] * 256 + header[2];
4073         if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt", "Corrupt PNG");
4074         if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer", "Corrupt PNG");
4075         if (a->zout + len > a->zout_end)
4076                 if (!stbi__zexpand(a, a->zout, len)) return 0;
4077         memcpy(a->zout, a->zbuffer, len);
4078         a->zbuffer += len;
4079         a->zout += len;
4080         return 1;
4081 }
4082
4083 static int stbi__parse_zlib_header(stbi__zbuf *a)
4084 {
4085         int cmf = stbi__zget8(a);
4086         int cm = cmf & 15;
4087         /* int cinfo = cmf >> 4; */
4088         int flg = stbi__zget8(a);
4089         if ((cmf * 256 + flg) % 31 != 0) return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
4090         if (flg & 32) return stbi__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
4091         if (cm != 8) return stbi__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
4092                                                                                                                                          // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4093         return 1;
4094 }
4095
4096 static const stbi_uc stbi__zdefault_length[288] =
4097 {
4098         8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4099         8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4100         8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4101         8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4102         8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4103         9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4104         9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4105         9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4106         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4107 };
4108 static const stbi_uc stbi__zdefault_distance[32] =
4109 {
4110         5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4111 };
4112 /*
4113 Init algorithm:
4114 {
4115 int i;   // use <= to match clearly with spec
4116 for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4117 for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4118 for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4119 for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4120
4121 for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4122 }
4123 */
4124
4125 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4126 {
4127         int final, type;
4128         if (parse_header)
4129                 if (!stbi__parse_zlib_header(a)) return 0;
4130         a->num_bits = 0;
4131         a->code_buffer = 0;
4132         do {
4133                 final = stbi__zreceive(a, 1);
4134                 type = stbi__zreceive(a, 2);
4135                 if (type == 0) {
4136                         if (!stbi__parse_uncompressed_block(a)) return 0;
4137                 }
4138                 else if (type == 3) {
4139                         return 0;
4140                 }
4141                 else {
4142                         if (type == 1) {
4143                                 // use fixed code lengths
4144                                 if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288)) return 0;
4145                                 if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
4146                         }
4147                         else {
4148                                 if (!stbi__compute_huffman_codes(a)) return 0;
4149                         }
4150                         if (!stbi__parse_huffman_block(a)) return 0;
4151                 }
4152         } while (!final);
4153         return 1;
4154 }
4155
4156 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4157 {
4158         a->zout_start = obuf;
4159         a->zout = obuf;
4160         a->zout_end = obuf + olen;
4161         a->z_expandable = exp;
4162
4163         return stbi__parse_zlib(a, parse_header);
4164 }
4165
4166 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4167 {
4168         stbi__zbuf a;
4169         char *p = (char *)stbi__malloc(initial_size);
4170         if (p == NULL) return NULL;
4171         a.zbuffer = (stbi_uc *)buffer;
4172         a.zbuffer_end = (stbi_uc *)buffer + len;
4173         if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4174                 if (outlen) *outlen = (int)(a.zout - a.zout_start);
4175                 return a.zout_start;
4176         }
4177         else {
4178                 STBI_FREE(a.zout_start);
4179                 return NULL;
4180         }
4181 }
4182
4183 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4184 {
4185         return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4186 }
4187
4188 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4189 {
4190         stbi__zbuf a;
4191         char *p = (char *)stbi__malloc(initial_size);
4192         if (p == NULL) return NULL;
4193         a.zbuffer = (stbi_uc *)buffer;
4194         a.zbuffer_end = (stbi_uc *)buffer + len;
4195         if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4196                 if (outlen) *outlen = (int)(a.zout - a.zout_start);
4197                 return a.zout_start;
4198         }
4199         else {
4200                 STBI_FREE(a.zout_start);
4201                 return NULL;
4202         }
4203 }
4204
4205 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4206 {
4207         stbi__zbuf a;
4208         a.zbuffer = (stbi_uc *)ibuffer;
4209         a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
4210         if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4211                 return (int)(a.zout - a.zout_start);
4212         else
4213                 return -1;
4214 }
4215
4216 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4217 {
4218         stbi__zbuf a;
4219         char *p = (char *)stbi__malloc(16384);
4220         if (p == NULL) return NULL;
4221         a.zbuffer = (stbi_uc *)buffer;
4222         a.zbuffer_end = (stbi_uc *)buffer + len;
4223         if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4224                 if (outlen) *outlen = (int)(a.zout - a.zout_start);
4225                 return a.zout_start;
4226         }
4227         else {
4228                 STBI_FREE(a.zout_start);
4229                 return NULL;
4230         }
4231 }
4232
4233 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4234 {
4235         stbi__zbuf a;
4236         a.zbuffer = (stbi_uc *)ibuffer;
4237         a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
4238         if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4239                 return (int)(a.zout - a.zout_start);
4240         else
4241                 return -1;
4242 }
4243 #endif
4244
4245 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4246 //    simple implementation
4247 //      - only 8-bit samples
4248 //      - no CRC checking
4249 //      - allocates lots of intermediate memory
4250 //        - avoids problem of streaming data between subsystems
4251 //        - avoids explicit window management
4252 //    performance
4253 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4254
4255 #ifndef STBI_NO_PNG
4256 typedef struct
4257 {
4258         stbi__uint32 length;
4259         stbi__uint32 type;
4260 } stbi__pngchunk;
4261
4262 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4263 {
4264         stbi__pngchunk c;
4265         c.length = stbi__get32be(s);
4266         c.type = stbi__get32be(s);
4267         return c;
4268 }
4269
4270 static int stbi__check_png_header(stbi__context *s)
4271 {
4272         static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4273         int i;
4274         for (i = 0; i < 8; ++i)
4275                 if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig", "Not a PNG");
4276         return 1;
4277 }
4278
4279 typedef struct
4280 {
4281         stbi__context *s;
4282         stbi_uc *idata, *expanded, *out;
4283         int depth;
4284 } stbi__png;
4285
4286
4287 enum {
4288         STBI__F_none = 0,
4289         STBI__F_sub = 1,
4290         STBI__F_up = 2,
4291         STBI__F_avg = 3,
4292         STBI__F_paeth = 4,
4293         // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4294         STBI__F_avg_first,
4295         STBI__F_paeth_first
4296 };
4297
4298 static stbi_uc first_row_filter[5] =
4299 {
4300         STBI__F_none,
4301         STBI__F_sub,
4302         STBI__F_none,
4303         STBI__F_avg_first,
4304         STBI__F_paeth_first
4305 };
4306
4307 static int stbi__paeth(int a, int b, int c)
4308 {
4309         int p = a + b - c;
4310         int pa = abs(p - a);
4311         int pb = abs(p - b);
4312         int pc = abs(p - c);
4313         if (pa <= pb && pa <= pc) return a;
4314         if (pb <= pc) return b;
4315         return c;
4316 }
4317
4318 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4319
4320 // create the png data from post-deflated data
4321 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4322 {
4323         int bytes = (depth == 16 ? 2 : 1);
4324         stbi__context *s = a->s;
4325         stbi__uint32 i, j, stride = x*out_n*bytes;
4326         stbi__uint32 img_len, img_width_bytes;
4327         int k;
4328         int img_n = s->img_n; // copy it into a local for later
4329
4330         int output_bytes = out_n*bytes;
4331         int filter_bytes = img_n*bytes;
4332         int width = x;
4333
4334         STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
4335         a->out = (stbi_uc *)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4336         if (!a->out) return stbi__err("outofmem", "Out of memory");
4337
4338         img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4339         img_len = (img_width_bytes + 1) * y;
4340         // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4341         // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4342         // so just check for raw_len < img_len always.
4343         if (raw_len < img_len) return stbi__err("not enough pixels", "Corrupt PNG");
4344
4345         for (j = 0; j < y; ++j) {
4346                 stbi_uc *cur = a->out + stride*j;
4347                 stbi_uc *prior;
4348                 int filter = *raw++;
4349
4350                 if (filter > 4)
4351                         return stbi__err("invalid filter", "Corrupt PNG");
4352
4353                 if (depth < 8) {
4354                         STBI_ASSERT(img_width_bytes <= x);
4355                         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4356                         filter_bytes = 1;
4357                         width = img_width_bytes;
4358                 }
4359                 prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4360
4361                                                           // if first row, use special filter that doesn't sample previous row
4362                 if (j == 0) filter = first_row_filter[filter];
4363
4364                 // handle first byte explicitly
4365                 for (k = 0; k < filter_bytes; ++k) {
4366                         switch (filter) {
4367                         case STBI__F_none: cur[k] = raw[k]; break;
4368                         case STBI__F_sub: cur[k] = raw[k]; break;
4369                         case STBI__F_up: cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4370                         case STBI__F_avg: cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1)); break;
4371                         case STBI__F_paeth: cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0)); break;
4372                         case STBI__F_avg_first: cur[k] = raw[k]; break;
4373                         case STBI__F_paeth_first: cur[k] = raw[k]; break;
4374                         }
4375                 }
4376
4377                 if (depth == 8) {
4378                         if (img_n != out_n)
4379                                 cur[img_n] = 255; // first pixel
4380                         raw += img_n;
4381                         cur += out_n;
4382                         prior += out_n;
4383                 }
4384                 else if (depth == 16) {
4385                         if (img_n != out_n) {
4386                                 cur[filter_bytes] = 255; // first pixel top byte
4387                                 cur[filter_bytes + 1] = 255; // first pixel bottom byte
4388                         }
4389                         raw += filter_bytes;
4390                         cur += output_bytes;
4391                         prior += output_bytes;
4392                 }
4393                 else {
4394                         raw += 1;
4395                         cur += 1;
4396                         prior += 1;
4397                 }
4398
4399                 // this is a little gross, so that we don't switch per-pixel or per-component
4400                 if (depth < 8 || img_n == out_n) {
4401                         int nk = (width - 1)*filter_bytes;
4402 #define STBI__CASE(f) \
4403              case f:     \
4404                 for (k=0; k < nk; ++k)
4405                         switch (filter) {
4406                                 // "none" filter turns into a memcpy here; make that explicit.
4407                         case STBI__F_none:         memcpy(cur, raw, nk); break;
4408                                 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); } break;
4409                                 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4410                                 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); } break;
4411                                 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); } break;
4412                                 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); } break;
4413                                 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); } break;
4414                         }
4415 #undef STBI__CASE
4416                         raw += nk;
4417                 }
4418                 else {
4419                         STBI_ASSERT(img_n + 1 == out_n);
4420 #define STBI__CASE(f) \
4421              case f:     \
4422                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4423                    for (k=0; k < filter_bytes; ++k)
4424                         switch (filter) {
4425                                 STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
4426                                 STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); } break;
4427                                 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4428                                 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); } break;
4429                                 STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); } break;
4430                                 STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); } break;
4431                                 STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); } break;
4432                         }
4433 #undef STBI__CASE
4434
4435                         // the loop above sets the high byte of the pixels' alpha, but for
4436                         // 16 bit png files we also need the low byte set. we'll do that here.
4437                         if (depth == 16) {
4438                                 cur = a->out + stride*j; // start at the beginning of the row again
4439                                 for (i = 0; i < x; ++i, cur += output_bytes) {
4440                                         cur[filter_bytes + 1] = 255;
4441                                 }
4442                         }
4443                 }
4444         }
4445
4446         // we make a separate pass to expand bits to pixels; for performance,
4447         // this could run two scanlines behind the above code, so it won't
4448         // intefere with filtering but will still be in the cache.
4449         if (depth < 8) {
4450                 for (j = 0; j < y; ++j) {
4451                         stbi_uc *cur = a->out + stride*j;
4452                         stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4453                         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4454                         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4455                         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4456
4457                                                                                                                                                            // note that the final byte might overshoot and write more data than desired.
4458                                                                                                                                                            // we can allocate enough data that this never writes out of memory, but it
4459                                                                                                                                                            // could also overwrite the next scanline. can it overwrite non-empty data
4460                                                                                                                                                            // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4461                                                                                                                                                            // so we need to explicitly clamp the final ones
4462
4463                         if (depth == 4) {
4464                                 for (k = x*img_n; k >= 2; k -= 2, ++in) {
4465                                         *cur++ = scale * ((*in >> 4));
4466                                         *cur++ = scale * ((*in) & 0x0f);
4467                                 }
4468                                 if (k > 0) *cur++ = scale * ((*in >> 4));
4469                         }
4470                         else if (depth == 2) {
4471                                 for (k = x*img_n; k >= 4; k -= 4, ++in) {
4472                                         *cur++ = scale * ((*in >> 6));
4473                                         *cur++ = scale * ((*in >> 4) & 0x03);
4474                                         *cur++ = scale * ((*in >> 2) & 0x03);
4475                                         *cur++ = scale * ((*in) & 0x03);
4476                                 }
4477                                 if (k > 0) *cur++ = scale * ((*in >> 6));
4478                                 if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4479                                 if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4480                         }
4481                         else if (depth == 1) {
4482                                 for (k = x*img_n; k >= 8; k -= 8, ++in) {
4483                                         *cur++ = scale * ((*in >> 7));
4484                                         *cur++ = scale * ((*in >> 6) & 0x01);
4485                                         *cur++ = scale * ((*in >> 5) & 0x01);
4486                                         *cur++ = scale * ((*in >> 4) & 0x01);
4487                                         *cur++ = scale * ((*in >> 3) & 0x01);
4488                                         *cur++ = scale * ((*in >> 2) & 0x01);
4489                                         *cur++ = scale * ((*in >> 1) & 0x01);
4490                                         *cur++ = scale * ((*in) & 0x01);
4491                                 }
4492                                 if (k > 0) *cur++ = scale * ((*in >> 7));
4493                                 if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4494                                 if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4495                                 if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4496                                 if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4497                                 if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4498                                 if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4499                         }
4500                         if (img_n != out_n) {
4501                                 int q;
4502                                 // insert alpha = 255
4503                                 cur = a->out + stride*j;
4504                                 if (img_n == 1) {
4505                                         for (q = x - 1; q >= 0; --q) {
4506                                                 cur[q * 2 + 1] = 255;
4507                                                 cur[q * 2 + 0] = cur[q];
4508                                         }
4509                                 }
4510                                 else {
4511                                         STBI_ASSERT(img_n == 3);
4512                                         for (q = x - 1; q >= 0; --q) {
4513                                                 cur[q * 4 + 3] = 255;
4514                                                 cur[q * 4 + 2] = cur[q * 3 + 2];
4515                                                 cur[q * 4 + 1] = cur[q * 3 + 1];
4516                                                 cur[q * 4 + 0] = cur[q * 3 + 0];
4517                                         }
4518                                 }
4519                         }
4520                 }
4521         }
4522         else if (depth == 16) {
4523                 // force the image data from big-endian to platform-native.
4524                 // this is done in a separate pass due to the decoding relying
4525                 // on the data being untouched, but could probably be done
4526                 // per-line during decode if care is taken.
4527                 stbi_uc *cur = a->out;
4528                 stbi__uint16 *cur16 = (stbi__uint16*)cur;
4529
4530                 for (i = 0; i < x*y*out_n; ++i, cur16++, cur += 2) {
4531                         *cur16 = (cur[0] << 8) | cur[1];
4532                 }
4533         }
4534
4535         return 1;
4536 }
4537
4538 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4539 {
4540         int bytes = (depth == 16 ? 2 : 1);
4541         int out_bytes = out_n * bytes;
4542         stbi_uc *final;
4543         int p;
4544         if (!interlaced)
4545                 return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4546
4547         // de-interlacing
4548         final = (stbi_uc *)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4549         for (p = 0; p < 7; ++p) {
4550                 int xorig[] = { 0,4,0,2,0,1,0 };
4551                 int yorig[] = { 0,0,4,0,2,0,1 };
4552                 int xspc[] = { 8,8,4,4,2,2,1 };
4553                 int yspc[] = { 8,8,8,4,4,2,2 };
4554                 int i, j, x, y;
4555                 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4556                 x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
4557                 y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
4558                 if (x && y) {
4559                         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4560                         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4561                                 STBI_FREE(final);
4562                                 return 0;
4563                         }
4564                         for (j = 0; j < y; ++j) {
4565                                 for (i = 0; i < x; ++i) {
4566                                         int out_y = j*yspc[p] + yorig[p];
4567                                         int out_x = i*xspc[p] + xorig[p];
4568                                         memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4569                                                 a->out + (j*x + i)*out_bytes, out_bytes);
4570                                 }
4571                         }
4572                         STBI_FREE(a->out);
4573                         image_data += img_len;
4574                         image_data_len -= img_len;
4575                 }
4576         }
4577         a->out = final;
4578
4579         return 1;
4580 }
4581
4582 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4583 {
4584         stbi__context *s = z->s;
4585         stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4586         stbi_uc *p = z->out;
4587
4588         // compute color-based transparency, assuming we've
4589         // already got 255 as the alpha value in the output
4590         STBI_ASSERT(out_n == 2 || out_n == 4);
4591
4592         if (out_n == 2) {
4593                 for (i = 0; i < pixel_count; ++i) {
4594                         p[1] = (p[0] == tc[0] ? 0 : 255);
4595                         p += 2;
4596                 }
4597         }
4598         else {
4599                 for (i = 0; i < pixel_count; ++i) {
4600                         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4601                                 p[3] = 0;
4602                         p += 4;
4603                 }
4604         }
4605         return 1;
4606 }
4607
4608 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4609 {
4610         stbi__context *s = z->s;
4611         stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4612         stbi__uint16 *p = (stbi__uint16*)z->out;
4613
4614         // compute color-based transparency, assuming we've
4615         // already got 65535 as the alpha value in the output
4616         STBI_ASSERT(out_n == 2 || out_n == 4);
4617
4618         if (out_n == 2) {
4619                 for (i = 0; i < pixel_count; ++i) {
4620                         p[1] = (p[0] == tc[0] ? 0 : 65535);
4621                         p += 2;
4622                 }
4623         }
4624         else {
4625                 for (i = 0; i < pixel_count; ++i) {
4626                         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4627                                 p[3] = 0;
4628                         p += 4;
4629                 }
4630         }
4631         return 1;
4632 }
4633
4634 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4635 {
4636         stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4637         stbi_uc *p, *temp_out, *orig = a->out;
4638
4639         p = (stbi_uc *)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4640         if (p == NULL) return stbi__err("outofmem", "Out of memory");
4641
4642         // between here and free(out) below, exitting would leak
4643         temp_out = p;
4644
4645         if (pal_img_n == 3) {
4646                 for (i = 0; i < pixel_count; ++i) {
4647                         int n = orig[i] * 4;
4648                         p[0] = palette[n];
4649                         p[1] = palette[n + 1];
4650                         p[2] = palette[n + 2];
4651                         p += 3;
4652                 }
4653         }
4654         else {
4655                 for (i = 0; i < pixel_count; ++i) {
4656                         int n = orig[i] * 4;
4657                         p[0] = palette[n];
4658                         p[1] = palette[n + 1];
4659                         p[2] = palette[n + 2];
4660                         p[3] = palette[n + 3];
4661                         p += 4;
4662                 }
4663         }
4664         STBI_FREE(a->out);
4665         a->out = temp_out;
4666
4667         STBI_NOTUSED(len);
4668
4669         return 1;
4670 }
4671
4672 static int stbi__unpremultiply_on_load = 0;
4673 static int stbi__de_iphone_flag = 0;
4674
4675 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4676 {
4677         stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4678 }
4679
4680 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4681 {
4682         stbi__de_iphone_flag = flag_true_if_should_convert;
4683 }
4684
4685 static void stbi__de_iphone(stbi__png *z)
4686 {
4687         stbi__context *s = z->s;
4688         stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4689         stbi_uc *p = z->out;
4690
4691         if (s->img_out_n == 3) {  // convert bgr to rgb
4692                 for (i = 0; i < pixel_count; ++i) {
4693                         stbi_uc t = p[0];
4694                         p[0] = p[2];
4695                         p[2] = t;
4696                         p += 3;
4697                 }
4698         }
4699         else {
4700                 STBI_ASSERT(s->img_out_n == 4);
4701                 if (stbi__unpremultiply_on_load) {
4702                         // convert bgr to rgb and unpremultiply
4703                         for (i = 0; i < pixel_count; ++i) {
4704                                 stbi_uc a = p[3];
4705                                 stbi_uc t = p[0];
4706                                 if (a) {
4707                                         stbi_uc half = a / 2;
4708                                         p[0] = (p[2] * 255 + half) / a;
4709                                         p[1] = (p[1] * 255 + half) / a;
4710                                         p[2] = (t * 255 + half) / a;
4711                                 }
4712                                 else {
4713                                         p[0] = p[2];
4714                                         p[2] = t;
4715                                 }
4716                                 p += 4;
4717                         }
4718                 }
4719                 else {
4720                         // convert bgr to rgb
4721                         for (i = 0; i < pixel_count; ++i) {
4722                                 stbi_uc t = p[0];
4723                                 p[0] = p[2];
4724                                 p[2] = t;
4725                                 p += 4;
4726                         }
4727                 }
4728         }
4729 }
4730
4731 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4732
4733 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4734 {
4735         stbi_uc palette[1024], pal_img_n = 0;
4736         stbi_uc has_trans = 0, tc[3];
4737         stbi__uint16 tc16[3];
4738         stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
4739         int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
4740         stbi__context *s = z->s;
4741
4742         z->expanded = NULL;
4743         z->idata = NULL;
4744         z->out = NULL;
4745
4746         if (!stbi__check_png_header(s)) return 0;
4747
4748         if (scan == STBI__SCAN_type) return 1;
4749
4750         for (;;) {
4751                 stbi__pngchunk c = stbi__get_chunk_header(s);
4752                 switch (c.type) {
4753                 case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
4754                         is_iphone = 1;
4755                         stbi__skip(s, c.length);
4756                         break;
4757                 case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
4758                         int comp, filter;
4759                         if (!first) return stbi__err("multiple IHDR", "Corrupt PNG");
4760                         first = 0;
4761                         if (c.length != 13) return stbi__err("bad IHDR len", "Corrupt PNG");
4762                         s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
4763                         s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
4764                         z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
4765                         color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype", "Corrupt PNG");
4766                         if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype", "Corrupt PNG");
4767                         if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype", "Corrupt PNG");
4768                         comp = stbi__get8(s);  if (comp) return stbi__err("bad comp method", "Corrupt PNG");
4769                         filter = stbi__get8(s);  if (filter) return stbi__err("bad filter method", "Corrupt PNG");
4770                         interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method", "Corrupt PNG");
4771                         if (!s->img_x || !s->img_y) return stbi__err("0-pixel image", "Corrupt PNG");
4772                         if (!pal_img_n) {
4773                                 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4774                                 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4775                                 if (scan == STBI__SCAN_header) return 1;
4776                         }
4777                         else {
4778                                 // if paletted, then pal_n is our final components, and
4779                                 // img_n is # components to decompress/filter.
4780                                 s->img_n = 1;
4781                                 if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large", "Corrupt PNG");
4782                                 // if SCAN_header, have to scan to see if we have a tRNS
4783                         }
4784                         break;
4785                 }
4786
4787                 case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
4788                         if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4789                         if (c.length > 256 * 3) return stbi__err("invalid PLTE", "Corrupt PNG");
4790                         pal_len = c.length / 3;
4791                         if (pal_len * 3 != c.length) return stbi__err("invalid PLTE", "Corrupt PNG");
4792                         for (i = 0; i < pal_len; ++i) {
4793                                 palette[i * 4 + 0] = stbi__get8(s);
4794                                 palette[i * 4 + 1] = stbi__get8(s);
4795                                 palette[i * 4 + 2] = stbi__get8(s);
4796                                 palette[i * 4 + 3] = 255;
4797                         }
4798                         break;
4799                 }
4800
4801                 case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
4802                         if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4803                         if (z->idata) return stbi__err("tRNS after IDAT", "Corrupt PNG");
4804                         if (pal_img_n) {
4805                                 if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4806                                 if (pal_len == 0) return stbi__err("tRNS before PLTE", "Corrupt PNG");
4807                                 if (c.length > pal_len) return stbi__err("bad tRNS len", "Corrupt PNG");
4808                                 pal_img_n = 4;
4809                                 for (i = 0; i < c.length; ++i)
4810                                         palette[i * 4 + 3] = stbi__get8(s);
4811                         }
4812                         else {
4813                                 if (!(s->img_n & 1)) return stbi__err("tRNS with alpha", "Corrupt PNG");
4814                                 if (c.length != (stbi__uint32)s->img_n * 2) return stbi__err("bad tRNS len", "Corrupt PNG");
4815                                 has_trans = 1;
4816                                 if (z->depth == 16) {
4817                                         for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4818                                 }
4819                                 else {
4820                                         for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4821                                 }
4822                         }
4823                         break;
4824                 }
4825
4826                 case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
4827                         if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4828                         if (pal_img_n && !pal_len) return stbi__err("no PLTE", "Corrupt PNG");
4829                         if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4830                         if ((int)(ioff + c.length) < (int)ioff) return 0;
4831                         if (ioff + c.length > idata_limit) {
4832                                 stbi__uint32 idata_limit_old = idata_limit;
4833                                 stbi_uc *p;
4834                                 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4835                                 while (ioff + c.length > idata_limit)
4836                                         idata_limit *= 2;
4837                                 STBI_NOTUSED(idata_limit_old);
4838                                 p = (stbi_uc *)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4839                                 z->idata = p;
4840                         }
4841                         if (!stbi__getn(s, z->idata + ioff, c.length)) return stbi__err("outofdata", "Corrupt PNG");
4842                         ioff += c.length;
4843                         break;
4844                 }
4845
4846                 case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
4847                         stbi__uint32 raw_len, bpl;
4848                         if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4849                         if (scan != STBI__SCAN_load) return 1;
4850                         if (z->idata == NULL) return stbi__err("no IDAT", "Corrupt PNG");
4851                         // initial guess for decoded data size to avoid unnecessary reallocs
4852                         bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4853                         raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4854                         z->expanded = (stbi_uc *)stbi_zlib_decode_malloc_guesssize_headerflag((char *)z->idata, ioff, raw_len, (int *)&raw_len, !is_iphone);
4855                         if (z->expanded == NULL) return 0; // zlib should set error
4856                         STBI_FREE(z->idata); z->idata = NULL;
4857                         if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
4858                                 s->img_out_n = s->img_n + 1;
4859                         else
4860                                 s->img_out_n = s->img_n;
4861                         if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4862                         if (has_trans) {
4863                                 if (z->depth == 16) {
4864                                         if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4865                                 }
4866                                 else {
4867                                         if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4868                                 }
4869                         }
4870                         if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4871                                 stbi__de_iphone(z);
4872                         if (pal_img_n) {
4873                                 // pal_img_n == 3 or 4
4874                                 s->img_n = pal_img_n; // record the actual colors we had
4875                                 s->img_out_n = pal_img_n;
4876                                 if (req_comp >= 3) s->img_out_n = req_comp;
4877                                 if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4878                                         return 0;
4879                         }
4880                         else if (has_trans) {
4881                                 // non-paletted image with tRNS -> source image has (constant) alpha
4882                                 ++s->img_n;
4883                         }
4884                         STBI_FREE(z->expanded); z->expanded = NULL;
4885                         return 1;
4886                 }
4887
4888                 default:
4889                         // if critical, fail
4890                         if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4891                         if ((c.type & (1 << 29)) == 0) {
4892 #ifndef STBI_NO_FAILURE_STRINGS
4893                                 // not threadsafe
4894                                 static char invalid_chunk[] = "XXXX PNG chunk not known";
4895                                 invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4896                                 invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4897                                 invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4898                                 invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4899 #endif
4900                                 return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4901                         }
4902                         stbi__skip(s, c.length);
4903                         break;
4904                 }
4905                 // end of PNG chunk, read and skip CRC
4906                 stbi__get32be(s);
4907         }
4908 }
4909
4910 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4911 {
4912         void *result = NULL;
4913         if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4914         if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4915                 if (p->depth < 8)
4916                         ri->bits_per_channel = 8;
4917                 else
4918                         ri->bits_per_channel = p->depth;
4919                 result = p->out;
4920                 p->out = NULL;
4921                 if (req_comp && req_comp != p->s->img_out_n) {
4922                         if (ri->bits_per_channel == 8)
4923                                 result = stbi__convert_format((unsigned char *)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4924                         else
4925                                 result = stbi__convert_format16((stbi__uint16 *)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4926                         p->s->img_out_n = req_comp;
4927                         if (result == NULL) return result;
4928                 }
4929                 *x = p->s->img_x;
4930                 *y = p->s->img_y;
4931                 if (n) *n = p->s->img_n;
4932         }
4933         STBI_FREE(p->out);      p->out = NULL;
4934         STBI_FREE(p->expanded); p->expanded = NULL;
4935         STBI_FREE(p->idata);    p->idata = NULL;
4936
4937         return result;
4938 }
4939
4940 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4941 {
4942         stbi__png p;
4943         p.s = s;
4944         return stbi__do_png(&p, x, y, comp, req_comp, ri);
4945 }
4946
4947 static int stbi__png_test(stbi__context *s)
4948 {
4949         int r;
4950         r = stbi__check_png_header(s);
4951         stbi__rewind(s);
4952         return r;
4953 }
4954
4955 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4956 {
4957         if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4958                 stbi__rewind(p->s);
4959                 return 0;
4960         }
4961         if (x) *x = p->s->img_x;
4962         if (y) *y = p->s->img_y;
4963         if (comp) *comp = p->s->img_n;
4964         return 1;
4965 }
4966
4967 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4968 {
4969         stbi__png p;
4970         p.s = s;
4971         return stbi__png_info_raw(&p, x, y, comp);
4972 }
4973 #endif
4974
4975 // Microsoft/Windows BMP image
4976
4977 #ifndef STBI_NO_BMP
4978 static int stbi__bmp_test_raw(stbi__context *s)
4979 {
4980         int r;
4981         int sz;
4982         if (stbi__get8(s) != 'B') return 0;
4983         if (stbi__get8(s) != 'M') return 0;
4984         stbi__get32le(s); // discard filesize
4985         stbi__get16le(s); // discard reserved
4986         stbi__get16le(s); // discard reserved
4987         stbi__get32le(s); // discard data offset
4988         sz = stbi__get32le(s);
4989         r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4990         return r;
4991 }
4992
4993 static int stbi__bmp_test(stbi__context *s)
4994 {
4995         int r = stbi__bmp_test_raw(s);
4996         stbi__rewind(s);
4997         return r;
4998 }
4999
5000
5001 // returns 0..31 for the highest set bit
5002 static int stbi__high_bit(unsigned int z)
5003 {
5004         int n = 0;
5005         if (z == 0) return -1;
5006         if (z >= 0x10000) n += 16, z >>= 16;
5007         if (z >= 0x00100) n += 8, z >>= 8;
5008         if (z >= 0x00010) n += 4, z >>= 4;
5009         if (z >= 0x00004) n += 2, z >>= 2;
5010         if (z >= 0x00002) n += 1, z >>= 1;
5011         return n;
5012 }
5013
5014 static int stbi__bitcount(unsigned int a)
5015 {
5016         a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
5017         a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
5018         a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5019         a = (a + (a >> 8)); // max 16 per 8 bits
5020         a = (a + (a >> 16)); // max 32 per 8 bits
5021         return a & 0xff;
5022 }
5023
5024 static int stbi__shiftsigned(int v, int shift, int bits)
5025 {
5026         int result;
5027         int z = 0;
5028
5029         if (shift < 0) v <<= -shift;
5030         else v >>= shift;
5031         result = v;
5032
5033         z = bits;
5034         while (z < 8) {
5035                 result += v >> z;
5036                 z += bits;
5037         }
5038         return result;
5039 }
5040
5041 typedef struct
5042 {
5043         int bpp, offset, hsz;
5044         unsigned int mr, mg, mb, ma, all_a;
5045 } stbi__bmp_data;
5046
5047 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5048 {
5049         int hsz;
5050         if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5051         stbi__get32le(s); // discard filesize
5052         stbi__get16le(s); // discard reserved
5053         stbi__get16le(s); // discard reserved
5054         info->offset = stbi__get32le(s);
5055         info->hsz = hsz = stbi__get32le(s);
5056         info->mr = info->mg = info->mb = info->ma = 0;
5057
5058         if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5059         if (hsz == 12) {
5060                 s->img_x = stbi__get16le(s);
5061                 s->img_y = stbi__get16le(s);
5062         }
5063         else {
5064                 s->img_x = stbi__get32le(s);
5065                 s->img_y = stbi__get32le(s);
5066         }
5067         if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5068         info->bpp = stbi__get16le(s);
5069         if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5070         if (hsz != 12) {
5071                 int compress = stbi__get32le(s);
5072                 if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5073                 stbi__get32le(s); // discard sizeof
5074                 stbi__get32le(s); // discard hres
5075                 stbi__get32le(s); // discard vres
5076                 stbi__get32le(s); // discard colorsused
5077                 stbi__get32le(s); // discard max important
5078                 if (hsz == 40 || hsz == 56) {
5079                         if (hsz == 56) {
5080                                 stbi__get32le(s);
5081                                 stbi__get32le(s);
5082                                 stbi__get32le(s);
5083                                 stbi__get32le(s);
5084                         }
5085                         if (info->bpp == 16 || info->bpp == 32) {
5086                                 if (compress == 0) {
5087                                         if (info->bpp == 32) {
5088                                                 info->mr = 0xffu << 16;
5089                                                 info->mg = 0xffu << 8;
5090                                                 info->mb = 0xffu << 0;
5091                                                 info->ma = 0xffu << 24;
5092                                                 info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5093                                         }
5094                                         else {
5095                                                 info->mr = 31u << 10;
5096                                                 info->mg = 31u << 5;
5097                                                 info->mb = 31u << 0;
5098                                         }
5099                                 }
5100                                 else if (compress == 3) {
5101                                         info->mr = stbi__get32le(s);
5102                                         info->mg = stbi__get32le(s);
5103                                         info->mb = stbi__get32le(s);
5104                                         // not documented, but generated by photoshop and handled by mspaint
5105                                         if (info->mr == info->mg && info->mg == info->mb) {
5106                                                 // ?!?!?
5107                                                 return stbi__errpuc("bad BMP", "bad BMP");
5108                                         }
5109                                 }
5110                                 else
5111                                         return stbi__errpuc("bad BMP", "bad BMP");
5112                         }
5113                 }
5114                 else {
5115                         int i;
5116                         if (hsz != 108 && hsz != 124)
5117                                 return stbi__errpuc("bad BMP", "bad BMP");
5118                         info->mr = stbi__get32le(s);
5119                         info->mg = stbi__get32le(s);
5120                         info->mb = stbi__get32le(s);
5121                         info->ma = stbi__get32le(s);
5122                         stbi__get32le(s); // discard color space
5123                         for (i = 0; i < 12; ++i)
5124                                 stbi__get32le(s); // discard color space parameters
5125                         if (hsz == 124) {
5126                                 stbi__get32le(s); // discard rendering intent
5127                                 stbi__get32le(s); // discard offset of profile data
5128                                 stbi__get32le(s); // discard size of profile data
5129                                 stbi__get32le(s); // discard reserved
5130                         }
5131                 }
5132         }
5133         return (void *)1;
5134 }
5135
5136
5137 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5138 {
5139         stbi_uc *out;
5140         unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
5141         stbi_uc pal[256][4];
5142         int psize = 0, i, j, width;
5143         int flip_vertically, pad, target;
5144         stbi__bmp_data info;
5145         STBI_NOTUSED(ri);
5146
5147         info.all_a = 255;
5148         if (stbi__bmp_parse_header(s, &info) == NULL)
5149                 return NULL; // error code already set
5150
5151         flip_vertically = ((int)s->img_y) > 0;
5152         s->img_y = abs((int)s->img_y);
5153
5154         mr = info.mr;
5155         mg = info.mg;
5156         mb = info.mb;
5157         ma = info.ma;
5158         all_a = info.all_a;
5159
5160         if (info.hsz == 12) {
5161                 if (info.bpp < 24)
5162                         psize = (info.offset - 14 - 24) / 3;
5163         }
5164         else {
5165                 if (info.bpp < 16)
5166                         psize = (info.offset - 14 - info.hsz) >> 2;
5167         }
5168
5169         s->img_n = ma ? 4 : 3;
5170         if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5171                 target = req_comp;
5172         else
5173                 target = s->img_n; // if they want monochrome, we'll post-convert
5174
5175                                                    // sanity-check size
5176         if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5177                 return stbi__errpuc("too large", "Corrupt BMP");
5178
5179         out = (stbi_uc *)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5180         if (!out) return stbi__errpuc("outofmem", "Out of memory");
5181         if (info.bpp < 16) {
5182                 int z = 0;
5183                 if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5184                 for (i = 0; i < psize; ++i) {
5185                         pal[i][2] = stbi__get8(s);
5186                         pal[i][1] = stbi__get8(s);
5187                         pal[i][0] = stbi__get8(s);
5188                         if (info.hsz != 12) stbi__get8(s);
5189                         pal[i][3] = 255;
5190                 }
5191                 stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5192                 if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5193                 else if (info.bpp == 8) width = s->img_x;
5194                 else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5195                 pad = (-width) & 3;
5196                 for (j = 0; j < (int)s->img_y; ++j) {
5197                         for (i = 0; i < (int)s->img_x; i += 2) {
5198                                 int v = stbi__get8(s), v2 = 0;
5199                                 if (info.bpp == 4) {
5200                                         v2 = v & 15;
5201                                         v >>= 4;
5202                                 }
5203                                 out[z++] = pal[v][0];
5204                                 out[z++] = pal[v][1];
5205                                 out[z++] = pal[v][2];
5206                                 if (target == 4) out[z++] = 255;
5207                                 if (i + 1 == (int)s->img_x) break;
5208                                 v = (info.bpp == 8) ? stbi__get8(s) : v2;
5209                                 out[z++] = pal[v][0];
5210                                 out[z++] = pal[v][1];
5211                                 out[z++] = pal[v][2];
5212                                 if (target == 4) out[z++] = 255;
5213                         }
5214                         stbi__skip(s, pad);
5215                 }
5216         }
5217         else {
5218                 int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
5219                 int z = 0;
5220                 int easy = 0;
5221                 stbi__skip(s, info.offset - 14 - info.hsz);
5222                 if (info.bpp == 24) width = 3 * s->img_x;
5223                 else if (info.bpp == 16) width = 2 * s->img_x;
5224                 else /* bpp = 32 and pad = 0 */ width = 0;
5225                 pad = (-width) & 3;
5226                 if (info.bpp == 24) {
5227                         easy = 1;
5228                 }
5229                 else if (info.bpp == 32) {
5230                         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5231                                 easy = 2;
5232                 }
5233                 if (!easy) {
5234                         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5235                         // right shift amt to put high bit in position #7
5236                         rshift = stbi__high_bit(mr) - 7; rcount = stbi__bitcount(mr);
5237                         gshift = stbi__high_bit(mg) - 7; gcount = stbi__bitcount(mg);
5238                         bshift = stbi__high_bit(mb) - 7; bcount = stbi__bitcount(mb);
5239                         ashift = stbi__high_bit(ma) - 7; acount = stbi__bitcount(ma);
5240                 }
5241                 for (j = 0; j < (int)s->img_y; ++j) {
5242                         if (easy) {
5243                                 for (i = 0; i < (int)s->img_x; ++i) {
5244                                         unsigned char a;
5245                                         out[z + 2] = stbi__get8(s);
5246                                         out[z + 1] = stbi__get8(s);
5247                                         out[z + 0] = stbi__get8(s);
5248                                         z += 3;
5249                                         a = (easy == 2 ? stbi__get8(s) : 255);
5250                                         all_a |= a;
5251                                         if (target == 4) out[z++] = a;
5252                                 }
5253                         }
5254                         else {
5255                                 int bpp = info.bpp;
5256                                 for (i = 0; i < (int)s->img_x; ++i) {
5257                                         stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));
5258                                         int a;
5259                                         out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5260                                         out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5261                                         out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5262                                         a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5263                                         all_a |= a;
5264                                         if (target == 4) out[z++] = STBI__BYTECAST(a);
5265                                 }
5266                         }
5267                         stbi__skip(s, pad);
5268                 }
5269         }
5270
5271         // if alpha channel is all 0s, replace with all 255s
5272         if (target == 4 && all_a == 0)
5273                 for (i = 4 * s->img_x*s->img_y - 1; i >= 0; i -= 4)
5274                         out[i] = 255;
5275
5276         if (flip_vertically) {
5277                 stbi_uc t;
5278                 for (j = 0; j < (int)s->img_y >> 1; ++j) {
5279                         stbi_uc *p1 = out + j     *s->img_x*target;
5280                         stbi_uc *p2 = out + (s->img_y - 1 - j)*s->img_x*target;
5281                         for (i = 0; i < (int)s->img_x*target; ++i) {
5282                                 t = p1[i], p1[i] = p2[i], p2[i] = t;
5283                         }
5284                 }
5285         }
5286
5287         if (req_comp && req_comp != target) {
5288                 out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5289                 if (out == NULL) return out; // stbi__convert_format frees input on failure
5290         }
5291
5292         *x = s->img_x;
5293         *y = s->img_y;
5294         if (comp) *comp = s->img_n;
5295         return out;
5296 }
5297 #endif
5298
5299 // Targa Truevision - TGA
5300 // by Jonathan Dummer
5301 #ifndef STBI_NO_TGA
5302 // returns STBI_rgb or whatever, 0 on error
5303 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5304 {
5305         // only RGB or RGBA (incl. 16bit) or grey allowed
5306         if (is_rgb16) *is_rgb16 = 0;
5307         switch (bits_per_pixel) {
5308         case 8:  return STBI_grey;
5309         case 16: if (is_grey) return STBI_grey_alpha;
5310                 // else: fall-through
5311         case 15: if (is_rgb16) *is_rgb16 = 1;
5312                 return STBI_rgb;
5313         case 24: // fall-through
5314         case 32: return bits_per_pixel / 8;
5315         default: return 0;
5316         }
5317 }
5318
5319 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5320 {
5321         int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5322         int sz, tga_colormap_type;
5323         stbi__get8(s);                   // discard Offset
5324         tga_colormap_type = stbi__get8(s); // colormap type
5325         if (tga_colormap_type > 1) {
5326                 stbi__rewind(s);
5327                 return 0;      // only RGB or indexed allowed
5328         }
5329         tga_image_type = stbi__get8(s); // image type
5330         if (tga_colormap_type == 1) { // colormapped (paletted) image
5331                 if (tga_image_type != 1 && tga_image_type != 9) {
5332                         stbi__rewind(s);
5333                         return 0;
5334                 }
5335                 stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
5336                 sz = stbi__get8(s);    //   check bits per palette color entry
5337                 if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
5338                         stbi__rewind(s);
5339                         return 0;
5340                 }
5341                 stbi__skip(s, 4);       // skip image x and y origin
5342                 tga_colormap_bpp = sz;
5343         }
5344         else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5345                 if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {
5346                         stbi__rewind(s);
5347                         return 0; // only RGB or grey allowed, +/- RLE
5348                 }
5349                 stbi__skip(s, 9); // skip colormap specification and image x/y origin
5350                 tga_colormap_bpp = 0;
5351         }
5352         tga_w = stbi__get16le(s);
5353         if (tga_w < 1) {
5354                 stbi__rewind(s);
5355                 return 0;   // test width
5356         }
5357         tga_h = stbi__get16le(s);
5358         if (tga_h < 1) {
5359                 stbi__rewind(s);
5360                 return 0;   // test height
5361         }
5362         tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5363         stbi__get8(s); // ignore alpha bits
5364         if (tga_colormap_bpp != 0) {
5365                 if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5366                         // when using a colormap, tga_bits_per_pixel is the size of the indexes
5367                         // I don't think anything but 8 or 16bit indexes makes sense
5368                         stbi__rewind(s);
5369                         return 0;
5370                 }
5371                 tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5372         }
5373         else {
5374                 tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5375         }
5376         if (!tga_comp) {
5377                 stbi__rewind(s);
5378                 return 0;
5379         }
5380         if (x) *x = tga_w;
5381         if (y) *y = tga_h;
5382         if (comp) *comp = tga_comp;
5383         return 1;                   // seems to have passed everything
5384 }
5385
5386 static int stbi__tga_test(stbi__context *s)
5387 {
5388         int res = 0;
5389         int sz, tga_color_type;
5390         stbi__get8(s);      //   discard Offset
5391         tga_color_type = stbi__get8(s);   //   color type
5392         if (tga_color_type > 1) goto errorEnd;   //   only RGB or indexed allowed
5393         sz = stbi__get8(s);   //   image type
5394         if (tga_color_type == 1) { // colormapped (paletted) image
5395                 if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5396                 stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
5397                 sz = stbi__get8(s);    //   check bits per palette color entry
5398                 if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
5399                 stbi__skip(s, 4);       // skip image x and y origin
5400         }
5401         else { // "normal" image w/o colormap
5402                 if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11)) goto errorEnd; // only RGB or grey allowed, +/- RLE
5403                 stbi__skip(s, 9); // skip colormap specification and image x/y origin
5404         }
5405         if (stbi__get16le(s) < 1) goto errorEnd;      //   test width
5406         if (stbi__get16le(s) < 1) goto errorEnd;      //   test height
5407         sz = stbi__get8(s);   //   bits per pixel
5408         if ((tga_color_type == 1) && (sz != 8) && (sz != 16)) goto errorEnd; // for colormapped images, bpp is size of an index
5409         if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
5410
5411         res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5412
5413 errorEnd:
5414         stbi__rewind(s);
5415         return res;
5416 }
5417
5418 // read 16bit value and convert to 24bit RGB
5419 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5420 {
5421         stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5422         stbi__uint16 fiveBitMask = 31;
5423         // we have 3 channels with 5bits each
5424         int r = (px >> 10) & fiveBitMask;
5425         int g = (px >> 5) & fiveBitMask;
5426         int b = px & fiveBitMask;
5427         // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5428         out[0] = (stbi_uc)((r * 255) / 31);
5429         out[1] = (stbi_uc)((g * 255) / 31);
5430         out[2] = (stbi_uc)((b * 255) / 31);
5431
5432         // some people claim that the most significant bit might be used for alpha
5433         // (possibly if an alpha-bit is set in the "image descriptor byte")
5434         // but that only made 16bit test images completely translucent..
5435         // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5436 }
5437
5438 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5439 {
5440         //   read in the TGA header stuff
5441         int tga_offset = stbi__get8(s);
5442         int tga_indexed = stbi__get8(s);
5443         int tga_image_type = stbi__get8(s);
5444         int tga_is_RLE = 0;
5445         int tga_palette_start = stbi__get16le(s);
5446         int tga_palette_len = stbi__get16le(s);
5447         int tga_palette_bits = stbi__get8(s);
5448         int tga_x_origin = stbi__get16le(s);
5449         int tga_y_origin = stbi__get16le(s);
5450         int tga_width = stbi__get16le(s);
5451         int tga_height = stbi__get16le(s);
5452         int tga_bits_per_pixel = stbi__get8(s);
5453         int tga_comp, tga_rgb16 = 0;
5454         int tga_inverted = stbi__get8(s);
5455         // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5456         //   image data
5457         unsigned char *tga_data;
5458         unsigned char *tga_palette = NULL;
5459         int i, j;
5460         unsigned char raw_data[4] = { 0 };
5461         int RLE_count = 0;
5462         int RLE_repeating = 0;
5463         int read_next_pixel = 1;
5464         STBI_NOTUSED(ri);
5465
5466         //   do a tiny bit of precessing
5467         if (tga_image_type >= 8)
5468         {
5469                 tga_image_type -= 8;
5470                 tga_is_RLE = 1;
5471         }
5472         tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5473
5474         //   If I'm paletted, then I'll use the number of bits from the palette
5475         if (tga_indexed) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5476         else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5477
5478         if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5479                 return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5480
5481         //   tga info
5482         *x = tga_width;
5483         *y = tga_height;
5484         if (comp) *comp = tga_comp;
5485
5486         if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5487                 return stbi__errpuc("too large", "Corrupt TGA");
5488
5489         tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5490         if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5491
5492         // skip to the data's starting position (offset usually = 0)
5493         stbi__skip(s, tga_offset);
5494
5495         if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
5496                 for (i = 0; i < tga_height; ++i) {
5497                         int row = tga_inverted ? tga_height - i - 1 : i;
5498                         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5499                         stbi__getn(s, tga_row, tga_width * tga_comp);
5500                 }
5501         }
5502         else {
5503                 //   do I need to load a palette?
5504                 if (tga_indexed)
5505                 {
5506                         //   any data to skip? (offset usually = 0)
5507                         stbi__skip(s, tga_palette_start);
5508                         //   load the palette
5509                         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5510                         if (!tga_palette) {
5511                                 STBI_FREE(tga_data);
5512                                 return stbi__errpuc("outofmem", "Out of memory");
5513                         }
5514                         if (tga_rgb16) {
5515                                 stbi_uc *pal_entry = tga_palette;
5516                                 STBI_ASSERT(tga_comp == STBI_rgb);
5517                                 for (i = 0; i < tga_palette_len; ++i) {
5518                                         stbi__tga_read_rgb16(s, pal_entry);
5519                                         pal_entry += tga_comp;
5520                                 }
5521                         }
5522                         else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5523                                 STBI_FREE(tga_data);
5524                                 STBI_FREE(tga_palette);
5525                                 return stbi__errpuc("bad palette", "Corrupt TGA");
5526                         }
5527                 }
5528                 //   load the data
5529                 for (i = 0; i < tga_width * tga_height; ++i)
5530                 {
5531                         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5532                         if (tga_is_RLE)
5533                         {
5534                                 if (RLE_count == 0)
5535                                 {
5536                                         //   yep, get the next byte as a RLE command
5537                                         int RLE_cmd = stbi__get8(s);
5538                                         RLE_count = 1 + (RLE_cmd & 127);
5539                                         RLE_repeating = RLE_cmd >> 7;
5540                                         read_next_pixel = 1;
5541                                 }
5542                                 else if (!RLE_repeating)
5543                                 {
5544                                         read_next_pixel = 1;
5545                                 }
5546                         }
5547                         else
5548                         {
5549                                 read_next_pixel = 1;
5550                         }
5551                         //   OK, if I need to read a pixel, do it now
5552                         if (read_next_pixel)
5553                         {
5554                                 //   load however much data we did have
5555                                 if (tga_indexed)
5556                                 {
5557                                         // read in index, then perform the lookup
5558                                         int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5559                                         if (pal_idx >= tga_palette_len) {
5560                                                 // invalid index
5561                                                 pal_idx = 0;
5562                                         }
5563                                         pal_idx *= tga_comp;
5564                                         for (j = 0; j < tga_comp; ++j) {
5565                                                 raw_data[j] = tga_palette[pal_idx + j];
5566                                         }
5567                                 }
5568                                 else if (tga_rgb16) {
5569                                         STBI_ASSERT(tga_comp == STBI_rgb);
5570                                         stbi__tga_read_rgb16(s, raw_data);
5571                                 }
5572                                 else {
5573                                         //   read in the data raw
5574                                         for (j = 0; j < tga_comp; ++j) {
5575                                                 raw_data[j] = stbi__get8(s);
5576                                         }
5577                                 }
5578                                 //   clear the reading flag for the next pixel
5579                                 read_next_pixel = 0;
5580                         } // end of reading a pixel
5581
5582                           // copy data
5583                         for (j = 0; j < tga_comp; ++j)
5584                                 tga_data[i*tga_comp + j] = raw_data[j];
5585
5586                         //   in case we're in RLE mode, keep counting down
5587                         --RLE_count;
5588                 }
5589                 //   do I need to invert the image?
5590                 if (tga_inverted)
5591                 {
5592                         for (j = 0; j * 2 < tga_height; ++j)
5593                         {
5594                                 int index1 = j * tga_width * tga_comp;
5595                                 int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5596                                 for (i = tga_width * tga_comp; i > 0; --i)
5597                                 {
5598                                         unsigned char temp = tga_data[index1];
5599                                         tga_data[index1] = tga_data[index2];
5600                                         tga_data[index2] = temp;
5601                                         ++index1;
5602                                         ++index2;
5603                                 }
5604                         }
5605                 }
5606                 //   clear my palette, if I had one
5607                 if (tga_palette != NULL)
5608                 {
5609                         STBI_FREE(tga_palette);
5610                 }
5611         }
5612
5613         // swap RGB - if the source data was RGB16, it already is in the right order
5614         if (tga_comp >= 3 && !tga_rgb16)
5615         {
5616                 unsigned char* tga_pixel = tga_data;
5617                 for (i = 0; i < tga_width * tga_height; ++i)
5618                 {
5619                         unsigned char temp = tga_pixel[0];
5620                         tga_pixel[0] = tga_pixel[2];
5621                         tga_pixel[2] = temp;
5622                         tga_pixel += tga_comp;
5623                 }
5624         }
5625
5626         // convert to target component count
5627         if (req_comp && req_comp != tga_comp)
5628                 tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5629
5630         //   the things I do to get rid of an error message, and yet keep
5631         //   Microsoft's C compilers happy... [8^(
5632         tga_palette_start = tga_palette_len = tga_palette_bits =
5633                 tga_x_origin = tga_y_origin = 0;
5634         //   OK, done
5635         return tga_data;
5636 }
5637 #endif
5638
5639 // *************************************************************************************************
5640 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5641
5642 #ifndef STBI_NO_PSD
5643 static int stbi__psd_test(stbi__context *s)
5644 {
5645         int r = (stbi__get32be(s) == 0x38425053);
5646         stbi__rewind(s);
5647         return r;
5648 }
5649
5650 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5651 {
5652         int count, nleft, len;
5653
5654         count = 0;
5655         while ((nleft = pixelCount - count) > 0) {
5656                 len = stbi__get8(s);
5657                 if (len == 128) {
5658                         // No-op.
5659                 }
5660                 else if (len < 128) {
5661                         // Copy next len+1 bytes literally.
5662                         len++;
5663                         if (len > nleft) return 0; // corrupt data
5664                         count += len;
5665                         while (len) {
5666                                 *p = stbi__get8(s);
5667                                 p += 4;
5668                                 len--;
5669                         }
5670                 }
5671                 else if (len > 128) {
5672                         stbi_uc   val;
5673                         // Next -len+1 bytes in the dest are replicated from next source byte.
5674                         // (Interpret len as a negative 8-bit int.)
5675                         len = 257 - len;
5676                         if (len > nleft) return 0; // corrupt data
5677                         val = stbi__get8(s);
5678                         count += len;
5679                         while (len) {
5680                                 *p = val;
5681                                 p += 4;
5682                                 len--;
5683                         }
5684                 }
5685         }
5686
5687         return 1;
5688 }
5689
5690 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5691 {
5692         int pixelCount;
5693         int channelCount, compression;
5694         int channel, i;
5695         int bitdepth;
5696         int w, h;
5697         stbi_uc *out;
5698         STBI_NOTUSED(ri);
5699
5700         // Check identifier
5701         if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5702                 return stbi__errpuc("not PSD", "Corrupt PSD image");
5703
5704         // Check file type version.
5705         if (stbi__get16be(s) != 1)
5706                 return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5707
5708         // Skip 6 reserved bytes.
5709         stbi__skip(s, 6);
5710
5711         // Read the number of channels (R, G, B, A, etc).
5712         channelCount = stbi__get16be(s);
5713         if (channelCount < 0 || channelCount > 16)
5714                 return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5715
5716         // Read the rows and columns of the image.
5717         h = stbi__get32be(s);
5718         w = stbi__get32be(s);
5719
5720         // Make sure the depth is 8 bits.
5721         bitdepth = stbi__get16be(s);
5722         if (bitdepth != 8 && bitdepth != 16)
5723                 return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5724
5725         // Make sure the color mode is RGB.
5726         // Valid options are:
5727         //   0: Bitmap
5728         //   1: Grayscale
5729         //   2: Indexed color
5730         //   3: RGB color
5731         //   4: CMYK color
5732         //   7: Multichannel
5733         //   8: Duotone
5734         //   9: Lab color
5735         if (stbi__get16be(s) != 3)
5736                 return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5737
5738         // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5739         stbi__skip(s, stbi__get32be(s));
5740
5741         // Skip the image resources.  (resolution, pen tool paths, etc)
5742         stbi__skip(s, stbi__get32be(s));
5743
5744         // Skip the reserved data.
5745         stbi__skip(s, stbi__get32be(s));
5746
5747         // Find out if the data is compressed.
5748         // Known values:
5749         //   0: no compression
5750         //   1: RLE compressed
5751         compression = stbi__get16be(s);
5752         if (compression > 1)
5753                 return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5754
5755         // Check size
5756         if (!stbi__mad3sizes_valid(4, w, h, 0))
5757                 return stbi__errpuc("too large", "Corrupt PSD");
5758
5759         // Create the destination image.
5760
5761         if (!compression && bitdepth == 16 && bpc == 16) {
5762                 out = (stbi_uc *)stbi__malloc_mad3(8, w, h, 0);
5763                 ri->bits_per_channel = 16;
5764         }
5765         else
5766                 out = (stbi_uc *)stbi__malloc(4 * w*h);
5767
5768         if (!out) return stbi__errpuc("outofmem", "Out of memory");
5769         pixelCount = w*h;
5770
5771         // Initialize the data to zero.
5772         //memset( out, 0, pixelCount * 4 );
5773
5774         // Finally, the image data.
5775         if (compression) {
5776                 // RLE as used by .PSD and .TIFF
5777                 // Loop until you get the number of unpacked bytes you are expecting:
5778                 //     Read the next source byte into n.
5779                 //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5780                 //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5781                 //     Else if n is 128, noop.
5782                 // Endloop
5783
5784                 // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5785                 // which we're going to just skip.
5786                 stbi__skip(s, h * channelCount * 2);
5787
5788                 // Read the RLE data by channel.
5789                 for (channel = 0; channel < 4; channel++) {
5790                         stbi_uc *p;
5791
5792                         p = out + channel;
5793                         if (channel >= channelCount) {
5794                                 // Fill this channel with default data.
5795                                 for (i = 0; i < pixelCount; i++, p += 4)
5796                                         *p = (channel == 3 ? 255 : 0);
5797                         }
5798                         else {
5799                                 // Read the RLE data.
5800                                 if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5801                                         STBI_FREE(out);
5802                                         return stbi__errpuc("corrupt", "bad RLE data");
5803                                 }
5804                         }
5805                 }
5806
5807         }
5808         else {
5809                 // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5810                 // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5811
5812                 // Read the data by channel.
5813                 for (channel = 0; channel < 4; channel++) {
5814                         if (channel >= channelCount) {
5815                                 // Fill this channel with default data.
5816                                 if (bitdepth == 16 && bpc == 16) {
5817                                         stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
5818                                         stbi__uint16 val = channel == 3 ? 65535 : 0;
5819                                         for (i = 0; i < pixelCount; i++, q += 4)
5820                                                 *q = val;
5821                                 }
5822                                 else {
5823                                         stbi_uc *p = out + channel;
5824                                         stbi_uc val = channel == 3 ? 255 : 0;
5825                                         for (i = 0; i < pixelCount; i++, p += 4)
5826                                                 *p = val;
5827                                 }
5828                         }
5829                         else {
5830                                 if (ri->bits_per_channel == 16) {    // output bpc
5831                                         stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
5832                                         for (i = 0; i < pixelCount; i++, q += 4)
5833                                                 *q = (stbi__uint16)stbi__get16be(s);
5834                                 }
5835                                 else {
5836                                         stbi_uc *p = out + channel;
5837                                         if (bitdepth == 16) {  // input bpc
5838                                                 for (i = 0; i < pixelCount; i++, p += 4)
5839                                                         *p = (stbi_uc)(stbi__get16be(s) >> 8);
5840                                         }
5841                                         else {
5842                                                 for (i = 0; i < pixelCount; i++, p += 4)
5843                                                         *p = stbi__get8(s);
5844                                         }
5845                                 }
5846                         }
5847                 }
5848         }
5849
5850         // remove weird white matte from PSD
5851         if (channelCount >= 4) {
5852                 if (ri->bits_per_channel == 16) {
5853                         for (i = 0; i < w*h; ++i) {
5854                                 stbi__uint16 *pixel = (stbi__uint16 *)out + 4 * i;
5855                                 if (pixel[3] != 0 && pixel[3] != 65535) {
5856                                         float a = pixel[3] / 65535.0f;
5857                                         float ra = 1.0f / a;
5858                                         float inv_a = 65535.0f * (1 - ra);
5859                                         pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
5860                                         pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
5861                                         pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
5862                                 }
5863                         }
5864                 }
5865                 else {
5866                         for (i = 0; i < w*h; ++i) {
5867                                 unsigned char *pixel = out + 4 * i;
5868                                 if (pixel[3] != 0 && pixel[3] != 255) {
5869                                         float a = pixel[3] / 255.0f;
5870                                         float ra = 1.0f / a;
5871                                         float inv_a = 255.0f * (1 - ra);
5872                                         pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
5873                                         pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
5874                                         pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
5875                                 }
5876                         }
5877                 }
5878         }
5879
5880         // convert to desired output format
5881         if (req_comp && req_comp != 4) {
5882                 if (ri->bits_per_channel == 16)
5883                         out = (stbi_uc *)stbi__convert_format16((stbi__uint16 *)out, 4, req_comp, w, h);
5884                 else
5885                         out = stbi__convert_format(out, 4, req_comp, w, h);
5886                 if (out == NULL) return out; // stbi__convert_format frees input on failure
5887         }
5888
5889         if (comp) *comp = 4;
5890         *y = h;
5891         *x = w;
5892
5893         return out;
5894 }
5895 #endif
5896
5897 // *************************************************************************************************
5898 // Softimage PIC loader
5899 // by Tom Seddon
5900 //
5901 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5902 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5903
5904 #ifndef STBI_NO_PIC
5905 static int stbi__pic_is4(stbi__context *s, const char *str)
5906 {
5907         int i;
5908         for (i = 0; i<4; ++i)
5909                 if (stbi__get8(s) != (stbi_uc)str[i])
5910                         return 0;
5911
5912         return 1;
5913 }
5914
5915 static int stbi__pic_test_core(stbi__context *s)
5916 {
5917         int i;
5918
5919         if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
5920                 return 0;
5921
5922         for (i = 0; i<84; ++i)
5923                 stbi__get8(s);
5924
5925         if (!stbi__pic_is4(s, "PICT"))
5926                 return 0;
5927
5928         return 1;
5929 }
5930
5931 typedef struct
5932 {
5933         stbi_uc size, type, channel;
5934 } stbi__pic_packet;
5935
5936 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5937 {
5938         int mask = 0x80, i;
5939
5940         for (i = 0; i<4; ++i, mask >>= 1) {
5941                 if (channel & mask) {
5942                         if (stbi__at_eof(s)) return stbi__errpuc("bad file", "PIC file too short");
5943                         dest[i] = stbi__get8(s);
5944                 }
5945         }
5946
5947         return dest;
5948 }
5949
5950 static void stbi__copyval(int channel, stbi_uc *dest, const stbi_uc *src)
5951 {
5952         int mask = 0x80, i;
5953
5954         for (i = 0; i<4; ++i, mask >>= 1)
5955                 if (channel&mask)
5956                         dest[i] = src[i];
5957 }
5958
5959 static stbi_uc *stbi__pic_load_core(stbi__context *s, int width, int height, int *comp, stbi_uc *result)
5960 {
5961         int act_comp = 0, num_packets = 0, y, chained;
5962         stbi__pic_packet packets[10];
5963
5964         // this will (should...) cater for even some bizarre stuff like having data
5965         // for the same channel in multiple packets.
5966         do {
5967                 stbi__pic_packet *packet;
5968
5969                 if (num_packets == sizeof(packets) / sizeof(packets[0]))
5970                         return stbi__errpuc("bad format", "too many packets");
5971
5972                 packet = &packets[num_packets++];
5973
5974                 chained = stbi__get8(s);
5975                 packet->size = stbi__get8(s);
5976                 packet->type = stbi__get8(s);
5977                 packet->channel = stbi__get8(s);
5978
5979                 act_comp |= packet->channel;
5980
5981                 if (stbi__at_eof(s))          return stbi__errpuc("bad file", "file too short (reading packets)");
5982                 if (packet->size != 8)  return stbi__errpuc("bad format", "packet isn't 8bpp");
5983         } while (chained);
5984
5985         *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5986
5987         for (y = 0; y<height; ++y) {
5988                 int packet_idx;
5989
5990                 for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
5991                         stbi__pic_packet *packet = &packets[packet_idx];
5992                         stbi_uc *dest = result + y*width * 4;
5993
5994                         switch (packet->type) {
5995                         default:
5996                                 return stbi__errpuc("bad format", "packet has bad compression type");
5997
5998                         case 0: {//uncompressed
5999                                 int x;
6000
6001                                 for (x = 0; x<width; ++x, dest += 4)
6002                                         if (!stbi__readval(s, packet->channel, dest))
6003                                                 return 0;
6004                                 break;
6005                         }
6006
6007                         case 1://Pure RLE
6008                         {
6009                                 int left = width, i;
6010
6011                                 while (left>0) {
6012                                         stbi_uc count, value[4];
6013
6014                                         count = stbi__get8(s);
6015                                         if (stbi__at_eof(s))   return stbi__errpuc("bad file", "file too short (pure read count)");
6016
6017                                         if (count > left)
6018                                                 count = (stbi_uc)left;
6019
6020                                         if (!stbi__readval(s, packet->channel, value))  return 0;
6021
6022                                         for (i = 0; i<count; ++i, dest += 4)
6023                                                 stbi__copyval(packet->channel, dest, value);
6024                                         left -= count;
6025                                 }
6026                         }
6027                         break;
6028
6029                         case 2: {//Mixed RLE
6030                                 int left = width;
6031                                 while (left>0) {
6032                                         int count = stbi__get8(s), i;
6033                                         if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (mixed read count)");
6034
6035                                         if (count >= 128) { // Repeated
6036                                                 stbi_uc value[4];
6037
6038                                                 if (count == 128)
6039                                                         count = stbi__get16be(s);
6040                                                 else
6041                                                         count -= 127;
6042                                                 if (count > left)
6043                                                         return stbi__errpuc("bad file", "scanline overrun");
6044
6045                                                 if (!stbi__readval(s, packet->channel, value))
6046                                                         return 0;
6047
6048                                                 for (i = 0; i<count; ++i, dest += 4)
6049                                                         stbi__copyval(packet->channel, dest, value);
6050                                         }
6051                                         else { // Raw
6052                                                 ++count;
6053                                                 if (count>left) return stbi__errpuc("bad file", "scanline overrun");
6054
6055                                                 for (i = 0; i<count; ++i, dest += 4)
6056                                                         if (!stbi__readval(s, packet->channel, dest))
6057                                                                 return 0;
6058                                         }
6059                                         left -= count;
6060                                 }
6061                                 break;
6062                         }
6063                         }
6064                 }
6065         }
6066
6067         return result;
6068 }
6069
6070 static void *stbi__pic_load(stbi__context *s, int *px, int *py, int *comp, int req_comp, stbi__result_info *ri)
6071 {
6072         stbi_uc *result;
6073         int i, x, y, internal_comp;
6074         STBI_NOTUSED(ri);
6075
6076         if (!comp) comp = &internal_comp;
6077
6078         for (i = 0; i<92; ++i)
6079                 stbi__get8(s);
6080
6081         x = stbi__get16be(s);
6082         y = stbi__get16be(s);
6083         if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (pic header)");
6084         if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6085
6086         stbi__get32be(s); //skip `ratio'
6087         stbi__get16be(s); //skip `fields'
6088         stbi__get16be(s); //skip `pad'
6089
6090                                           // intermediate buffer is RGBA
6091         result = (stbi_uc *)stbi__malloc_mad3(x, y, 4, 0);
6092         memset(result, 0xff, x*y * 4);
6093
6094         if (!stbi__pic_load_core(s, x, y, comp, result)) {
6095                 STBI_FREE(result);
6096                 result = 0;
6097         }
6098         *px = x;
6099         *py = y;
6100         if (req_comp == 0) req_comp = *comp;
6101         result = stbi__convert_format(result, 4, req_comp, x, y);
6102
6103         return result;
6104 }
6105
6106 static int stbi__pic_test(stbi__context *s)
6107 {
6108         int r = stbi__pic_test_core(s);
6109         stbi__rewind(s);
6110         return r;
6111 }
6112 #endif
6113
6114 // *************************************************************************************************
6115 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6116
6117 #ifndef STBI_NO_GIF
6118 typedef struct
6119 {
6120         stbi__int16 prefix;
6121         stbi_uc first;
6122         stbi_uc suffix;
6123 } stbi__gif_lzw;
6124
6125 typedef struct
6126 {
6127         int w, h;
6128         stbi_uc *out, *old_out;             // output buffer (always 4 components)
6129         int flags, bgindex, ratio, transparent, eflags, delay;
6130         stbi_uc  pal[256][4];
6131         stbi_uc lpal[256][4];
6132         stbi__gif_lzw codes[4096];
6133         stbi_uc *color_table;
6134         int parse, step;
6135         int lflags;
6136         int start_x, start_y;
6137         int max_x, max_y;
6138         int cur_x, cur_y;
6139         int line_size;
6140 } stbi__gif;
6141
6142 static int stbi__gif_test_raw(stbi__context *s)
6143 {
6144         int sz;
6145         if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6146         sz = stbi__get8(s);
6147         if (sz != '9' && sz != '7') return 0;
6148         if (stbi__get8(s) != 'a') return 0;
6149         return 1;
6150 }
6151
6152 static int stbi__gif_test(stbi__context *s)
6153 {
6154         int r = stbi__gif_test_raw(s);
6155         stbi__rewind(s);
6156         return r;
6157 }
6158
6159 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6160 {
6161         int i;
6162         for (i = 0; i < num_entries; ++i) {
6163                 pal[i][2] = stbi__get8(s);
6164                 pal[i][1] = stbi__get8(s);
6165                 pal[i][0] = stbi__get8(s);
6166                 pal[i][3] = transp == i ? 0 : 255;
6167         }
6168 }
6169
6170 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6171 {
6172         stbi_uc version;
6173         if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6174                 return stbi__err("not GIF", "Corrupt GIF");
6175
6176         version = stbi__get8(s);
6177         if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6178         if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6179
6180         stbi__g_failure_reason = "";
6181         g->w = stbi__get16le(s);
6182         g->h = stbi__get16le(s);
6183         g->flags = stbi__get8(s);
6184         g->bgindex = stbi__get8(s);
6185         g->ratio = stbi__get8(s);
6186         g->transparent = -1;
6187
6188         if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6189
6190         if (is_info) return 1;
6191
6192         if (g->flags & 0x80)
6193                 stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
6194
6195         return 1;
6196 }
6197
6198 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6199 {
6200         stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));
6201         if (!stbi__gif_header(s, g, comp, 1)) {
6202                 STBI_FREE(g);
6203                 stbi__rewind(s);
6204                 return 0;
6205         }
6206         if (x) *x = g->w;
6207         if (y) *y = g->h;
6208         STBI_FREE(g);
6209         return 1;
6210 }
6211
6212 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6213 {
6214         stbi_uc *p, *c;
6215
6216         // recurse to decode the prefixes, since the linked-list is backwards,
6217         // and working backwards through an interleaved image would be nasty
6218         if (g->codes[code].prefix >= 0)
6219                 stbi__out_gif_code(g, g->codes[code].prefix);
6220
6221         if (g->cur_y >= g->max_y) return;
6222
6223         p = &g->out[g->cur_x + g->cur_y];
6224         c = &g->color_table[g->codes[code].suffix * 4];
6225
6226         if (c[3] >= 128) {
6227                 p[0] = c[2];
6228                 p[1] = c[1];
6229                 p[2] = c[0];
6230                 p[3] = c[3];
6231         }
6232         g->cur_x += 4;
6233
6234         if (g->cur_x >= g->max_x) {
6235                 g->cur_x = g->start_x;
6236                 g->cur_y += g->step;
6237
6238                 while (g->cur_y >= g->max_y && g->parse > 0) {
6239                         g->step = (1 << g->parse) * g->line_size;
6240                         g->cur_y = g->start_y + (g->step >> 1);
6241                         --g->parse;
6242                 }
6243         }
6244 }
6245
6246 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6247 {
6248         stbi_uc lzw_cs;
6249         stbi__int32 len, init_code;
6250         stbi__uint32 first;
6251         stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6252         stbi__gif_lzw *p;
6253
6254         lzw_cs = stbi__get8(s);
6255         if (lzw_cs > 12) return NULL;
6256         clear = 1 << lzw_cs;
6257         first = 1;
6258         codesize = lzw_cs + 1;
6259         codemask = (1 << codesize) - 1;
6260         bits = 0;
6261         valid_bits = 0;
6262         for (init_code = 0; init_code < clear; init_code++) {
6263                 g->codes[init_code].prefix = -1;
6264                 g->codes[init_code].first = (stbi_uc)init_code;
6265                 g->codes[init_code].suffix = (stbi_uc)init_code;
6266         }
6267
6268         // support no starting clear code
6269         avail = clear + 2;
6270         oldcode = -1;
6271
6272         len = 0;
6273         for (;;) {
6274                 if (valid_bits < codesize) {
6275                         if (len == 0) {
6276                                 len = stbi__get8(s); // start new block
6277                                 if (len == 0)
6278                                         return g->out;
6279                         }
6280                         --len;
6281                         bits |= (stbi__int32)stbi__get8(s) << valid_bits;
6282                         valid_bits += 8;
6283                 }
6284                 else {
6285                         stbi__int32 code = bits & codemask;
6286                         bits >>= codesize;
6287                         valid_bits -= codesize;
6288                         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6289                         if (code == clear) {  // clear code
6290                                 codesize = lzw_cs + 1;
6291                                 codemask = (1 << codesize) - 1;
6292                                 avail = clear + 2;
6293                                 oldcode = -1;
6294                                 first = 0;
6295                         }
6296                         else if (code == clear + 1) { // end of stream code
6297                                 stbi__skip(s, len);
6298                                 while ((len = stbi__get8(s)) > 0)
6299                                         stbi__skip(s, len);
6300                                 return g->out;
6301                         }
6302                         else if (code <= avail) {
6303                                 if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
6304
6305                                 if (oldcode >= 0) {
6306                                         p = &g->codes[avail++];
6307                                         if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
6308                                         p->prefix = (stbi__int16)oldcode;
6309                                         p->first = g->codes[oldcode].first;
6310                                         p->suffix = (code == avail) ? p->first : g->codes[code].first;
6311                                 }
6312                                 else if (code == avail)
6313                                         return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6314
6315                                 stbi__out_gif_code(g, (stbi__uint16)code);
6316
6317                                 if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6318                                         codesize++;
6319                                         codemask = (1 << codesize) - 1;
6320                                 }
6321
6322                                 oldcode = code;
6323                         }
6324                         else {
6325                                 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6326                         }
6327                 }
6328         }
6329 }
6330
6331 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
6332 {
6333         int x, y;
6334         stbi_uc *c = g->pal[g->bgindex];
6335         for (y = y0; y < y1; y += 4 * g->w) {
6336                 for (x = x0; x < x1; x += 4) {
6337                         stbi_uc *p = &g->out[y + x];
6338                         p[0] = c[2];
6339                         p[1] = c[1];
6340                         p[2] = c[0];
6341                         p[3] = 0;
6342                 }
6343         }
6344 }
6345
6346 // this function is designed to support animated gifs, although stb_image doesn't support it
6347 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
6348 {
6349         int i;
6350         stbi_uc *prev_out = 0;
6351
6352         if (g->out == 0 && !stbi__gif_header(s, g, comp, 0))
6353                 return 0; // stbi__g_failure_reason set by stbi__gif_header
6354
6355         if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
6356                 return stbi__errpuc("too large", "GIF too large");
6357
6358         prev_out = g->out;
6359         g->out = (stbi_uc *)stbi__malloc_mad3(4, g->w, g->h, 0);
6360         if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6361
6362         switch ((g->eflags & 0x1C) >> 2) {
6363         case 0: // unspecified (also always used on 1st frame)
6364                 stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
6365                 break;
6366         case 1: // do not dispose
6367                 if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6368                 g->old_out = prev_out;
6369                 break;
6370         case 2: // dispose to background
6371                 if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6372                 stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
6373                 break;
6374         case 3: // dispose to previous
6375                 if (g->old_out) {
6376                         for (i = g->start_y; i < g->max_y; i += 4 * g->w)
6377                                 memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
6378                 }
6379                 break;
6380         }
6381
6382         for (;;) {
6383                 switch (stbi__get8(s)) {
6384                 case 0x2C: /* Image Descriptor */
6385                 {
6386                         int prev_trans = -1;
6387                         stbi__int32 x, y, w, h;
6388                         stbi_uc *o;
6389
6390                         x = stbi__get16le(s);
6391                         y = stbi__get16le(s);
6392                         w = stbi__get16le(s);
6393                         h = stbi__get16le(s);
6394                         if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6395                                 return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6396
6397                         g->line_size = g->w * 4;
6398                         g->start_x = x * 4;
6399                         g->start_y = y * g->line_size;
6400                         g->max_x = g->start_x + w * 4;
6401                         g->max_y = g->start_y + h * g->line_size;
6402                         g->cur_x = g->start_x;
6403                         g->cur_y = g->start_y;
6404
6405                         g->lflags = stbi__get8(s);
6406
6407                         if (g->lflags & 0x40) {
6408                                 g->step = 8 * g->line_size; // first interlaced spacing
6409                                 g->parse = 3;
6410                         }
6411                         else {
6412                                 g->step = g->line_size;
6413                                 g->parse = 0;
6414                         }
6415
6416                         if (g->lflags & 0x80) {
6417                                 stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6418                                 g->color_table = (stbi_uc *)g->lpal;
6419                         }
6420                         else if (g->flags & 0x80) {
6421                                 if (g->transparent >= 0 && (g->eflags & 0x01)) {
6422                                         prev_trans = g->pal[g->transparent][3];
6423                                         g->pal[g->transparent][3] = 0;
6424                                 }
6425                                 g->color_table = (stbi_uc *)g->pal;
6426                         }
6427                         else
6428                                 return stbi__errpuc("missing color table", "Corrupt GIF");
6429
6430                         o = stbi__process_gif_raster(s, g);
6431                         if (o == NULL) return NULL;
6432
6433                         if (prev_trans != -1)
6434                                 g->pal[g->transparent][3] = (stbi_uc)prev_trans;
6435
6436                         return o;
6437                 }
6438
6439                 case 0x21: // Comment Extension.
6440                 {
6441                         int len;
6442                         if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
6443                                 len = stbi__get8(s);
6444                                 if (len == 4) {
6445                                         g->eflags = stbi__get8(s);
6446                                         g->delay = stbi__get16le(s);
6447                                         g->transparent = stbi__get8(s);
6448                                 }
6449                                 else {
6450                                         stbi__skip(s, len);
6451                                         break;
6452                                 }
6453                         }
6454                         while ((len = stbi__get8(s)) != 0)
6455                                 stbi__skip(s, len);
6456                         break;
6457                 }
6458
6459                 case 0x3B: // gif stream termination code
6460                         return (stbi_uc *)s; // using '1' causes warning on some compilers
6461
6462                 default:
6463                         return stbi__errpuc("unknown code", "Corrupt GIF");
6464                 }
6465         }
6466
6467         STBI_NOTUSED(req_comp);
6468 }
6469
6470 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6471 {
6472         stbi_uc *u = 0;
6473         stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));
6474         memset(g, 0, sizeof(*g));
6475         STBI_NOTUSED(ri);
6476
6477         u = stbi__gif_load_next(s, g, comp, req_comp);
6478         if (u == (stbi_uc *)s) u = 0;  // end of animated gif marker
6479         if (u) {
6480                 *x = g->w;
6481                 *y = g->h;
6482                 if (req_comp && req_comp != 4)
6483                         u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
6484         }
6485         else if (g->out)
6486                 STBI_FREE(g->out);
6487         STBI_FREE(g);
6488         return u;
6489 }
6490
6491 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6492 {
6493         return stbi__gif_info_raw(s, x, y, comp);
6494 }
6495 #endif
6496
6497 // *************************************************************************************************
6498 // Radiance RGBE HDR loader
6499 // originally by Nicolas Schulz
6500 #ifndef STBI_NO_HDR
6501 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6502 {
6503         int i;
6504         for (i = 0; signature[i]; ++i)
6505                 if (stbi__get8(s) != signature[i])
6506                         return 0;
6507         stbi__rewind(s);
6508         return 1;
6509 }
6510
6511 static int stbi__hdr_test(stbi__context* s)
6512 {
6513         int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6514         stbi__rewind(s);
6515         if (!r) {
6516                 r = stbi__hdr_test_core(s, "#?RGBE\n");
6517                 stbi__rewind(s);
6518         }
6519         return r;
6520 }
6521
6522 #define STBI__HDR_BUFLEN  1024
6523 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6524 {
6525         int len = 0;
6526         char c = '\0';
6527
6528         c = (char)stbi__get8(z);
6529
6530         while (!stbi__at_eof(z) && c != '\n') {
6531                 buffer[len++] = c;
6532                 if (len == STBI__HDR_BUFLEN - 1) {
6533                         // flush to end of line
6534                         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6535                                 ;
6536                         break;
6537                 }
6538                 c = (char)stbi__get8(z);
6539         }
6540
6541         buffer[len] = 0;
6542         return buffer;
6543 }
6544
6545 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6546 {
6547         if (input[3] != 0) {
6548                 float f1;
6549                 // Exponent
6550                 f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
6551                 if (req_comp <= 2)
6552                         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6553                 else {
6554                         output[0] = input[0] * f1;
6555                         output[1] = input[1] * f1;
6556                         output[2] = input[2] * f1;
6557                 }
6558                 if (req_comp == 2) output[1] = 1;
6559                 if (req_comp == 4) output[3] = 1;
6560         }
6561         else {
6562                 switch (req_comp) {
6563                 case 4: output[3] = 1; /* fallthrough */
6564                 case 3: output[0] = output[1] = output[2] = 0;
6565                         break;
6566                 case 2: output[1] = 1; /* fallthrough */
6567                 case 1: output[0] = 0;
6568                         break;
6569                 }
6570         }
6571 }
6572
6573 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6574 {
6575         char buffer[STBI__HDR_BUFLEN];
6576         char *token;
6577         int valid = 0;
6578         int width, height;
6579         stbi_uc *scanline;
6580         float *hdr_data;
6581         int len;
6582         unsigned char count, value;
6583         int i, j, k, c1, c2, z;
6584         const char *headerToken;
6585         STBI_NOTUSED(ri);
6586
6587         // Check identifier
6588         headerToken = stbi__hdr_gettoken(s, buffer);
6589         if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6590                 return stbi__errpf("not HDR", "Corrupt HDR image");
6591
6592         // Parse header
6593         for (;;) {
6594                 token = stbi__hdr_gettoken(s, buffer);
6595                 if (token[0] == 0) break;
6596                 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6597         }
6598
6599         if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6600
6601         // Parse width and height
6602         // can't use sscanf() if we're not using stdio!
6603         token = stbi__hdr_gettoken(s, buffer);
6604         if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6605         token += 3;
6606         height = (int)strtol(token, &token, 10);
6607         while (*token == ' ') ++token;
6608         if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6609         token += 3;
6610         width = (int)strtol(token, NULL, 10);
6611
6612         *x = width;
6613         *y = height;
6614
6615         if (comp) *comp = 3;
6616         if (req_comp == 0) req_comp = 3;
6617
6618         if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6619                 return stbi__errpf("too large", "HDR image is too large");
6620
6621         // Read data
6622         hdr_data = (float *)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6623         if (!hdr_data)
6624                 return stbi__errpf("outofmem", "Out of memory");
6625
6626         // Load image data
6627         // image data is stored as some number of sca
6628         if (width < 8 || width >= 32768) {
6629                 // Read flat data
6630                 for (j = 0; j < height; ++j) {
6631                         for (i = 0; i < width; ++i) {
6632                                 stbi_uc rgbe[4];
6633                         main_decode_loop:
6634                                 stbi__getn(s, rgbe, 4);
6635                                 stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6636                         }
6637                 }
6638         }
6639         else {
6640                 // Read RLE-encoded data
6641                 scanline = NULL;
6642
6643                 for (j = 0; j < height; ++j) {
6644                         c1 = stbi__get8(s);
6645                         c2 = stbi__get8(s);
6646                         len = stbi__get8(s);
6647                         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6648                                 // not run-length encoded, so we have to actually use THIS data as a decoded
6649                                 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6650                                 stbi_uc rgbe[4];
6651                                 rgbe[0] = (stbi_uc)c1;
6652                                 rgbe[1] = (stbi_uc)c2;
6653                                 rgbe[2] = (stbi_uc)len;
6654                                 rgbe[3] = (stbi_uc)stbi__get8(s);
6655                                 stbi__hdr_convert(hdr_data, rgbe, req_comp);
6656                                 i = 1;
6657                                 j = 0;
6658                                 STBI_FREE(scanline);
6659                                 goto main_decode_loop; // yes, this makes no sense
6660                         }
6661                         len <<= 8;
6662                         len |= stbi__get8(s);
6663                         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6664                         if (scanline == NULL) {
6665                                 scanline = (stbi_uc *)stbi__malloc_mad2(width, 4, 0);
6666                                 if (!scanline) {
6667                                         STBI_FREE(hdr_data);
6668                                         return stbi__errpf("outofmem", "Out of memory");
6669                                 }
6670                         }
6671
6672                         for (k = 0; k < 4; ++k) {
6673                                 int nleft;
6674                                 i = 0;
6675                                 while ((nleft = width - i) > 0) {
6676                                         count = stbi__get8(s);
6677                                         if (count > 128) {
6678                                                 // Run
6679                                                 value = stbi__get8(s);
6680                                                 count -= 128;
6681                                                 if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6682                                                 for (z = 0; z < count; ++z)
6683                                                         scanline[i++ * 4 + k] = value;
6684                                         }
6685                                         else {
6686                                                 // Dump
6687                                                 if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6688                                                 for (z = 0; z < count; ++z)
6689                                                         scanline[i++ * 4 + k] = stbi__get8(s);
6690                                         }
6691                                 }
6692                         }
6693                         for (i = 0; i < width; ++i)
6694                                 stbi__hdr_convert(hdr_data + (j*width + i)*req_comp, scanline + i * 4, req_comp);
6695                 }
6696                 if (scanline)
6697                         STBI_FREE(scanline);
6698         }
6699
6700         return hdr_data;
6701 }
6702
6703 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6704 {
6705         char buffer[STBI__HDR_BUFLEN];
6706         char *token;
6707         int valid = 0;
6708         int dummy;
6709
6710         if (!x) x = &dummy;
6711         if (!y) y = &dummy;
6712         if (!comp) comp = &dummy;
6713
6714         if (stbi__hdr_test(s) == 0) {
6715                 stbi__rewind(s);
6716                 return 0;
6717         }
6718
6719         for (;;) {
6720                 token = stbi__hdr_gettoken(s, buffer);
6721                 if (token[0] == 0) break;
6722                 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6723         }
6724
6725         if (!valid) {
6726                 stbi__rewind(s);
6727                 return 0;
6728         }
6729         token = stbi__hdr_gettoken(s, buffer);
6730         if (strncmp(token, "-Y ", 3)) {
6731                 stbi__rewind(s);
6732                 return 0;
6733         }
6734         token += 3;
6735         *y = (int)strtol(token, &token, 10);
6736         while (*token == ' ') ++token;
6737         if (strncmp(token, "+X ", 3)) {
6738                 stbi__rewind(s);
6739                 return 0;
6740         }
6741         token += 3;
6742         *x = (int)strtol(token, NULL, 10);
6743         *comp = 3;
6744         return 1;
6745 }
6746 #endif // STBI_NO_HDR
6747
6748 #ifndef STBI_NO_BMP
6749 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6750 {
6751         void *p;
6752         stbi__bmp_data info;
6753
6754         info.all_a = 255;
6755         p = stbi__bmp_parse_header(s, &info);
6756         stbi__rewind(s);
6757         if (p == NULL)
6758                 return 0;
6759         if (x) *x = s->img_x;
6760         if (y) *y = s->img_y;
6761         if (comp) *comp = info.ma ? 4 : 3;
6762         return 1;
6763 }
6764 #endif
6765
6766 #ifndef STBI_NO_PSD
6767 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6768 {
6769         int channelCount, dummy;
6770         if (!x) x = &dummy;
6771         if (!y) y = &dummy;
6772         if (!comp) comp = &dummy;
6773         if (stbi__get32be(s) != 0x38425053) {
6774                 stbi__rewind(s);
6775                 return 0;
6776         }
6777         if (stbi__get16be(s) != 1) {
6778                 stbi__rewind(s);
6779                 return 0;
6780         }
6781         stbi__skip(s, 6);
6782         channelCount = stbi__get16be(s);
6783         if (channelCount < 0 || channelCount > 16) {
6784                 stbi__rewind(s);
6785                 return 0;
6786         }
6787         *y = stbi__get32be(s);
6788         *x = stbi__get32be(s);
6789         if (stbi__get16be(s) != 8) {
6790                 stbi__rewind(s);
6791                 return 0;
6792         }
6793         if (stbi__get16be(s) != 3) {
6794                 stbi__rewind(s);
6795                 return 0;
6796         }
6797         *comp = 4;
6798         return 1;
6799 }
6800 #endif
6801
6802 #ifndef STBI_NO_PIC
6803 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6804 {
6805         int act_comp = 0, num_packets = 0, chained, dummy;
6806         stbi__pic_packet packets[10];
6807
6808         if (!x) x = &dummy;
6809         if (!y) y = &dummy;
6810         if (!comp) comp = &dummy;
6811
6812         if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
6813                 stbi__rewind(s);
6814                 return 0;
6815         }
6816
6817         stbi__skip(s, 88);
6818
6819         *x = stbi__get16be(s);
6820         *y = stbi__get16be(s);
6821         if (stbi__at_eof(s)) {
6822                 stbi__rewind(s);
6823                 return 0;
6824         }
6825         if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
6826                 stbi__rewind(s);
6827                 return 0;
6828         }
6829
6830         stbi__skip(s, 8);
6831
6832         do {
6833                 stbi__pic_packet *packet;
6834
6835                 if (num_packets == sizeof(packets) / sizeof(packets[0]))
6836                         return 0;
6837
6838                 packet = &packets[num_packets++];
6839                 chained = stbi__get8(s);
6840                 packet->size = stbi__get8(s);
6841                 packet->type = stbi__get8(s);
6842                 packet->channel = stbi__get8(s);
6843                 act_comp |= packet->channel;
6844
6845                 if (stbi__at_eof(s)) {
6846                         stbi__rewind(s);
6847                         return 0;
6848                 }
6849                 if (packet->size != 8) {
6850                         stbi__rewind(s);
6851                         return 0;
6852                 }
6853         } while (chained);
6854
6855         *comp = (act_comp & 0x10 ? 4 : 3);
6856
6857         return 1;
6858 }
6859 #endif
6860
6861 // *************************************************************************************************
6862 // Portable Gray Map and Portable Pixel Map loader
6863 // by Ken Miller
6864 //
6865 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6866 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6867 //
6868 // Known limitations:
6869 //    Does not support comments in the header section
6870 //    Does not support ASCII image data (formats P2 and P3)
6871 //    Does not support 16-bit-per-channel
6872
6873 #ifndef STBI_NO_PNM
6874
6875 static int      stbi__pnm_test(stbi__context *s)
6876 {
6877         char p, t;
6878         p = (char)stbi__get8(s);
6879         t = (char)stbi__get8(s);
6880         if (p != 'P' || (t != '5' && t != '6')) {
6881                 stbi__rewind(s);
6882                 return 0;
6883         }
6884         return 1;
6885 }
6886
6887 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6888 {
6889         stbi_uc *out;
6890         STBI_NOTUSED(ri);
6891
6892         if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6893                 return 0;
6894
6895         *x = s->img_x;
6896         *y = s->img_y;
6897         if (comp) *comp = s->img_n;
6898
6899         if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6900                 return stbi__errpuc("too large", "PNM too large");
6901
6902         out = (stbi_uc *)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6903         if (!out) return stbi__errpuc("outofmem", "Out of memory");
6904         stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6905
6906         if (req_comp && req_comp != s->img_n) {
6907                 out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6908                 if (out == NULL) return out; // stbi__convert_format frees input on failure
6909         }
6910         return out;
6911 }
6912
6913 static int      stbi__pnm_isspace(char c)
6914 {
6915         return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6916 }
6917
6918 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6919 {
6920         for (;;) {
6921                 while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6922                         *c = (char)stbi__get8(s);
6923
6924                 if (stbi__at_eof(s) || *c != '#')
6925                         break;
6926
6927                 while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
6928                         *c = (char)stbi__get8(s);
6929         }
6930 }
6931
6932 static int      stbi__pnm_isdigit(char c)
6933 {
6934         return c >= '0' && c <= '9';
6935 }
6936
6937 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
6938 {
6939         int value = 0;
6940
6941         while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6942                 value = value * 10 + (*c - '0');
6943                 *c = (char)stbi__get8(s);
6944         }
6945
6946         return value;
6947 }
6948
6949 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6950 {
6951         int maxv, dummy;
6952         char c, p, t;
6953
6954         if (!x) x = &dummy;
6955         if (!y) y = &dummy;
6956         if (!comp) comp = &dummy;
6957
6958         stbi__rewind(s);
6959
6960         // Get identifier
6961         p = (char)stbi__get8(s);
6962         t = (char)stbi__get8(s);
6963         if (p != 'P' || (t != '5' && t != '6')) {
6964                 stbi__rewind(s);
6965                 return 0;
6966         }
6967
6968         *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
6969
6970         c = (char)stbi__get8(s);
6971         stbi__pnm_skip_whitespace(s, &c);
6972
6973         *x = stbi__pnm_getinteger(s, &c); // read width
6974         stbi__pnm_skip_whitespace(s, &c);
6975
6976         *y = stbi__pnm_getinteger(s, &c); // read height
6977         stbi__pnm_skip_whitespace(s, &c);
6978
6979         maxv = stbi__pnm_getinteger(s, &c);  // read max value
6980
6981         if (maxv > 255)
6982                 return stbi__err("max value > 255", "PPM image not 8-bit");
6983         else
6984                 return 1;
6985 }
6986 #endif
6987
6988 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6989 {
6990 #ifndef STBI_NO_JPEG
6991         if (stbi__jpeg_info(s, x, y, comp)) return 1;
6992 #endif
6993
6994 #ifndef STBI_NO_PNG
6995         if (stbi__png_info(s, x, y, comp))  return 1;
6996 #endif
6997
6998 #ifndef STBI_NO_GIF
6999         if (stbi__gif_info(s, x, y, comp))  return 1;
7000 #endif
7001
7002 #ifndef STBI_NO_BMP
7003         if (stbi__bmp_info(s, x, y, comp))  return 1;
7004 #endif
7005
7006 #ifndef STBI_NO_PSD
7007         if (stbi__psd_info(s, x, y, comp))  return 1;
7008 #endif
7009
7010 #ifndef STBI_NO_PIC
7011         if (stbi__pic_info(s, x, y, comp))  return 1;
7012 #endif
7013
7014 #ifndef STBI_NO_PNM
7015         if (stbi__pnm_info(s, x, y, comp))  return 1;
7016 #endif
7017
7018 #ifndef STBI_NO_HDR
7019         if (stbi__hdr_info(s, x, y, comp))  return 1;
7020 #endif
7021
7022         // test tga last because it's a crappy test!
7023 #ifndef STBI_NO_TGA
7024         if (stbi__tga_info(s, x, y, comp))
7025                 return 1;
7026 #endif
7027         return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7028 }
7029
7030 #ifndef STBI_NO_STDIO
7031 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7032 {
7033         FILE *f = stbi__fopen(filename, "rb");
7034         int result;
7035         if (!f) return stbi__err("can't fopen", "Unable to open file");
7036         result = stbi_info_from_file(f, x, y, comp);
7037         fclose(f);
7038         return result;
7039 }
7040
7041 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7042 {
7043         int r;
7044         stbi__context s;
7045         long pos = ftell(f);
7046         stbi__start_file(&s, f);
7047         r = stbi__info_main(&s, x, y, comp);
7048         fseek(f, pos, SEEK_SET);
7049         return r;
7050 }
7051 #endif // !STBI_NO_STDIO
7052
7053 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7054 {
7055         stbi__context s;
7056         stbi__start_mem(&s, buffer, len);
7057         return stbi__info_main(&s, x, y, comp);
7058 }
7059
7060 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7061 {
7062         stbi__context s;
7063         stbi__start_callbacks(&s, (stbi_io_callbacks *)c, user);
7064         return stbi__info_main(&s, x, y, comp);
7065 }
7066
7067 #endif // STB_IMAGE_IMPLEMENTATION
7068
7069 /*
7070 revision history:
7071 2.16  (2017-07-23) all functions have 16-bit variants;
7072 STBI_NO_STDIO works again;
7073 compilation fixes;
7074 fix rounding in unpremultiply;
7075 optimize vertical flip;
7076 disable raw_len validation;
7077 documentation fixes
7078 2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7079 warning fixes; disable run-time SSE detection on gcc;
7080 uniform handling of optional "return" values;
7081 thread-safe initialization of zlib tables
7082 2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7083 2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7084 2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7085 2.11  (2016-04-02) allocate large structures on the stack
7086 remove white matting for transparent PSD
7087 fix reported channel count for PNG & BMP
7088 re-enable SSE2 in non-gcc 64-bit
7089 support RGB-formatted JPEG
7090 read 16-bit PNGs (only as 8-bit)
7091 2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7092 2.09  (2016-01-16) allow comments in PNM files
7093 16-bit-per-pixel TGA (not bit-per-component)
7094 info() for TGA could break due to .hdr handling
7095 info() for BMP to shares code instead of sloppy parse
7096 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7097 code cleanup
7098 2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7099 2.07  (2015-09-13) fix compiler warnings
7100 partial animated GIF support
7101 limited 16-bpc PSD support
7102 #ifdef unused functions
7103 bug with < 92 byte PIC,PNM,HDR,TGA
7104 2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7105 2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7106 2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7107 2.03  (2015-04-12) extra corruption checking (mmozeiko)
7108 stbi_set_flip_vertically_on_load (nguillemot)
7109 fix NEON support; fix mingw support
7110 2.02  (2015-01-19) fix incorrect assert, fix warning
7111 2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7112 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7113 2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7114 progressive JPEG (stb)
7115 PGM/PPM support (Ken Miller)
7116 STBI_MALLOC,STBI_REALLOC,STBI_FREE
7117 GIF bugfix -- seemingly never worked
7118 STBI_NO_*, STBI_ONLY_*
7119 1.48  (2014-12-14) fix incorrectly-named assert()
7120 1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7121 optimize PNG (ryg)
7122 fix bug in interlaced PNG with user-specified channel count (stb)
7123 1.46  (2014-08-26)
7124 fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7125 1.45  (2014-08-16)
7126 fix MSVC-ARM internal compiler error by wrapping malloc
7127 1.44  (2014-08-07)
7128 various warning fixes from Ronny Chevalier
7129 1.43  (2014-07-15)
7130 fix MSVC-only compiler problem in code changed in 1.42
7131 1.42  (2014-07-09)
7132 don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7133 fixes to stbi__cleanup_jpeg path
7134 added STBI_ASSERT to avoid requiring assert.h
7135 1.41  (2014-06-25)
7136 fix search&replace from 1.36 that messed up comments/error messages
7137 1.40  (2014-06-22)
7138 fix gcc struct-initialization warning
7139 1.39  (2014-06-15)
7140 fix to TGA optimization when req_comp != number of components in TGA;
7141 fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7142 add support for BMP version 5 (more ignored fields)
7143 1.38  (2014-06-06)
7144 suppress MSVC warnings on integer casts truncating values
7145 fix accidental rename of 'skip' field of I/O
7146 1.37  (2014-06-04)
7147 remove duplicate typedef
7148 1.36  (2014-06-03)
7149 convert to header file single-file library
7150 if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7151 1.35  (2014-05-27)
7152 various warnings
7153 fix broken STBI_SIMD path
7154 fix bug where stbi_load_from_file no longer left file pointer in correct place
7155 fix broken non-easy path for 32-bit BMP (possibly never used)
7156 TGA optimization by Arseny Kapoulkine
7157 1.34  (unknown)
7158 use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7159 1.33  (2011-07-14)
7160 make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7161 1.32  (2011-07-13)
7162 support for "info" function for all supported filetypes (SpartanJ)
7163 1.31  (2011-06-20)
7164 a few more leak fixes, bug in PNG handling (SpartanJ)
7165 1.30  (2011-06-11)
7166 added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7167 removed deprecated format-specific test/load functions
7168 removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7169 error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7170 fix inefficiency in decoding 32-bit BMP (David Woo)
7171 1.29  (2010-08-16)
7172 various warning fixes from Aurelien Pocheville
7173 1.28  (2010-08-01)
7174 fix bug in GIF palette transparency (SpartanJ)
7175 1.27  (2010-08-01)
7176 cast-to-stbi_uc to fix warnings
7177 1.26  (2010-07-24)
7178 fix bug in file buffering for PNG reported by SpartanJ
7179 1.25  (2010-07-17)
7180 refix trans_data warning (Won Chun)
7181 1.24  (2010-07-12)
7182 perf improvements reading from files on platforms with lock-heavy fgetc()
7183 minor perf improvements for jpeg
7184 deprecated type-specific functions so we'll get feedback if they're needed
7185 attempt to fix trans_data warning (Won Chun)
7186 1.23    fixed bug in iPhone support
7187 1.22  (2010-07-10)
7188 removed image *writing* support
7189 stbi_info support from Jetro Lauha
7190 GIF support from Jean-Marc Lienher
7191 iPhone PNG-extensions from James Brown
7192 warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7193 1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7194 1.20    added support for Softimage PIC, by Tom Seddon
7195 1.19    bug in interlaced PNG corruption check (found by ryg)
7196 1.18  (2008-08-02)
7197 fix a threading bug (local mutable static)
7198 1.17    support interlaced PNG
7199 1.16    major bugfix - stbi__convert_format converted one too many pixels
7200 1.15    initialize some fields for thread safety
7201 1.14    fix threadsafe conversion bug
7202 header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7203 1.13    threadsafe
7204 1.12    const qualifiers in the API
7205 1.11    Support installable IDCT, colorspace conversion routines
7206 1.10    Fixes for 64-bit (don't use "unsigned long")
7207 optimized upsampling by Fabian "ryg" Giesen
7208 1.09    Fix format-conversion for PSD code (bad global variables!)
7209 1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7210 1.07    attempt to fix C++ warning/errors again
7211 1.06    attempt to fix C++ warning/errors again
7212 1.05    fix TGA loading to return correct *comp and use good luminance calc
7213 1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7214 1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7215 1.02    support for (subset of) HDR files, float interface for preferred access to them
7216 1.01    fix bug: possible bug in handling right-side up bmps... not sure
7217 fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7218 1.00    interface to zlib that skips zlib header
7219 0.99    correct handling of alpha in palette
7220 0.98    TGA loader by lonesock; dynamically add loaders (untested)
7221 0.97    jpeg errors on too large a file; also catch another malloc failure
7222 0.96    fix detection of invalid v value - particleman@mollyrocket forum
7223 0.95    during header scan, seek to markers in case of padding
7224 0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7225 0.93    handle jpegtran output; verbose errors
7226 0.92    read 4,8,16,24,32-bit BMP files of several formats
7227 0.91    output 24-bit Windows 3.0 BMP files
7228 0.90    fix a few more warnings; bump version number to approach 1.0
7229 0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7230 0.60    fix compiling as c++
7231 0.59    fix warnings: merge Dave Moore's -Wall fixes
7232 0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7233 0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7234 0.56    fix bug: zlib uncompressed mode len vs. nlen
7235 0.55    fix bug: restart_interval not initialized to 0
7236 0.54    allow NULL for 'int *comp'
7237 0.53    fix bug in png 3->4; speedup png decoding
7238 0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7239 0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7240 on 'test' only check type, not whether we support this variant
7241 0.50  (2006-11-19)
7242 first released version
7243 */
7244
7245
7246 /*
7247 ------------------------------------------------------------------------------
7248 This software is available under 2 licenses -- choose whichever you prefer.
7249 ------------------------------------------------------------------------------
7250 ALTERNATIVE A - MIT License
7251 Copyright (c) 2017 Sean Barrett
7252 Permission is hereby granted, free of charge, to any person obtaining a copy of
7253 this software and associated documentation files (the "Software"), to deal in
7254 the Software without restriction, including without limitation the rights to
7255 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7256 of the Software, and to permit persons to whom the Software is furnished to do
7257 so, subject to the following conditions:
7258 The above copyright notice and this permission notice shall be included in all
7259 copies or substantial portions of the Software.
7260 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7261 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7262 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7263 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7264 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7265 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7266 SOFTWARE.
7267 ------------------------------------------------------------------------------
7268 ALTERNATIVE B - Public Domain (www.unlicense.org)
7269 This is free and unencumbered software released into the public domain.
7270 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7271 software, either in source code form or as a compiled binary, for any purpose,
7272 commercial or non-commercial, and by any means.
7273 In jurisdictions that recognize copyright laws, the author or authors of this
7274 software dedicate any and all copyright interest in the software to the public
7275 domain. We make this dedication for the benefit of the public at large and to
7276 the detriment of our heirs and successors. We intend this dedication to be an
7277 overt act of relinquishment in perpetuity of all present and future rights to
7278 this software under copyright law.
7279 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7280 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7281 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7282 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7283 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7284 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7285 ------------------------------------------------------------------------------
7286 */