cross compile build script
[fishladder.git] / stb / stb_vorbis.h
1 // Ogg Vorbis audio decoder - v1.19 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster github:alxprd
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // github:manxorist saga musix github:infatum
33 // Timur Gagiev Maxwell Koo Peter Waller
34 // github:audinowho Dougall Johnson
35 //
36 // Partial history:
37 // 1.19 - 2020-02-05 - warnings
38 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
39 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
40 // 1.16 - 2019-03-04 - fix warnings
41 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
42 // 1.14 - 2018-02-11 - delete bogus dealloca usage
43 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
44 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
45 // 1.11 - 2017-07-23 - fix MinGW compilation
46 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
47 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
48 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
49 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
50 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
51 // some crash fixes when out of memory or with corrupt files
52 // fix some inappropriately signed shifts
53 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
54 // 1.04 - 2014-08-27 - fix missing const-correct case in API
55 // 1.03 - 2014-08-07 - warning fixes
56 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
57 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
58 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
59 // (API change) report sample rate for decode-full-file funcs
60 //
61 // See end of file for full version history.
62
63
64 //////////////////////////////////////////////////////////////////////////////
65 //
66 // HEADER BEGINS HERE
67 //
68
69 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
70 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
71
72 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
73 #define STB_VORBIS_NO_STDIO 1
74 #endif
75
76 #ifndef STB_VORBIS_NO_STDIO
77 #include <stdio.h>
78 #endif
79
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83
84 /////////// THREAD SAFETY
85
86 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
87 // them from multiple threads at the same time. However, you can have multiple
88 // stb_vorbis* handles and decode from them independently in multiple thrads.
89
90
91 /////////// MEMORY ALLOCATION
92
93 // normally stb_vorbis uses malloc() to allocate memory at startup,
94 // and alloca() to allocate temporary memory during a frame on the
95 // stack. (Memory consumption will depend on the amount of setup
96 // data in the file and how you set the compile flags for speed
97 // vs. size. In my test files the maximal-size usage is ~150KB.)
98 //
99 // You can modify the wrapper functions in the source (setup_malloc,
100 // setup_temp_malloc, temp_malloc) to change this behavior, or you
101 // can use a simpler allocation model: you pass in a buffer from
102 // which stb_vorbis will allocate _all_ its memory (including the
103 // temp memory). "open" may fail with a VORBIS_outofmem if you
104 // do not pass in enough data; there is no way to determine how
105 // much you do need except to succeed (at which point you can
106 // query get_info to find the exact amount required. yes I know
107 // this is lame).
108 //
109 // If you pass in a non-NULL buffer of the type below, allocation
110 // will occur from it as described above. Otherwise just pass NULL
111 // to use malloc()/alloca()
112
113 typedef struct
114 {
115 char *alloc_buffer;
116 int alloc_buffer_length_in_bytes;
117 } stb_vorbis_alloc;
118
119
120 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
121
122 typedef struct stb_vorbis stb_vorbis;
123
124 typedef struct
125 {
126 unsigned int sample_rate;
127 int channels;
128
129 unsigned int setup_memory_required;
130 unsigned int setup_temp_memory_required;
131 unsigned int temp_memory_required;
132
133 int max_frame_size;
134 } stb_vorbis_info;
135
136 typedef struct
137 {
138 char *vendor;
139
140 int comment_list_length;
141 char **comment_list;
142 } stb_vorbis_comment;
143
144 // get general information about the file
145 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
146
147 // get ogg comments
148 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
149
150 // get the last error detected (clears it, too)
151 extern int stb_vorbis_get_error(stb_vorbis *f);
152
153 // close an ogg vorbis file and free all memory in use
154 extern void stb_vorbis_close(stb_vorbis *f);
155
156 // this function returns the offset (in samples) from the beginning of the
157 // file that will be returned by the next decode, if it is known, or -1
158 // otherwise. after a flush_pushdata() call, this may take a while before
159 // it becomes valid again.
160 // NOT WORKING YET after a seek with PULLDATA API
161 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
162
163 // returns the current seek point within the file, or offset from the beginning
164 // of the memory buffer. In pushdata mode it returns 0.
165 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
166
167 /////////// PUSHDATA API
168
169 #ifndef STB_VORBIS_NO_PUSHDATA_API
170
171 // this API allows you to get blocks of data from any source and hand
172 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
173 // you how much it used, and you have to give it the rest next time;
174 // and stb_vorbis may not have enough data to work with and you will
175 // need to give it the same data again PLUS more. Note that the Vorbis
176 // specification does not bound the size of an individual frame.
177
178 extern stb_vorbis *stb_vorbis_open_pushdata(
179 const unsigned char * datablock, int datablock_length_in_bytes,
180 int *datablock_memory_consumed_in_bytes,
181 int *error,
182 const stb_vorbis_alloc *alloc_buffer);
183 // create a vorbis decoder by passing in the initial data block containing
184 // the ogg&vorbis headers (you don't need to do parse them, just provide
185 // the first N bytes of the file--you're told if it's not enough, see below)
186 // on success, returns an stb_vorbis *, does not set error, returns the amount of
187 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
188 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
189 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
190 // incomplete and you need to pass in a larger block from the start of the file
191
192 extern int stb_vorbis_decode_frame_pushdata(
193 stb_vorbis *f,
194 const unsigned char *datablock, int datablock_length_in_bytes,
195 int *channels, // place to write number of float * buffers
196 float ***output, // place to write float ** array of float * buffers
197 int *samples // place to write number of output samples
198 );
199 // decode a frame of audio sample data if possible from the passed-in data block
200 //
201 // return value: number of bytes we used from datablock
202 //
203 // possible cases:
204 // 0 bytes used, 0 samples output (need more data)
205 // N bytes used, 0 samples output (resynching the stream, keep going)
206 // N bytes used, M samples output (one frame of data)
207 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
208 // frame, because Vorbis always "discards" the first frame.
209 //
210 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
211 // instead only datablock_length_in_bytes-3 or less. This is because it wants
212 // to avoid missing parts of a page header if they cross a datablock boundary,
213 // without writing state-machiney code to record a partial detection.
214 //
215 // The number of channels returned are stored in *channels (which can be
216 // NULL--it is always the same as the number of channels reported by
217 // get_info). *output will contain an array of float* buffers, one per
218 // channel. In other words, (*output)[0][0] contains the first sample from
219 // the first channel, and (*output)[1][0] contains the first sample from
220 // the second channel.
221
222 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
223 // inform stb_vorbis that your next datablock will not be contiguous with
224 // previous ones (e.g. you've seeked in the data); future attempts to decode
225 // frames will cause stb_vorbis to resynchronize (as noted above), and
226 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
227 // will begin decoding the _next_ frame.
228 //
229 // if you want to seek using pushdata, you need to seek in your file, then
230 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
231 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
232 // if you don't like the result, seek your file again and repeat.
233 #endif
234
235
236 ////////// PULLING INPUT API
237
238 #ifndef STB_VORBIS_NO_PULLDATA_API
239 // This API assumes stb_vorbis is allowed to pull data from a source--
240 // either a block of memory containing the _entire_ vorbis stream, or a
241 // FILE * that you or it create, or possibly some other reading mechanism
242 // if you go modify the source to replace the FILE * case with some kind
243 // of callback to your code. (But if you don't support seeking, you may
244 // just want to go ahead and use pushdata.)
245
246 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
247 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
248 #endif
249 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
250 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
251 #endif
252 // decode an entire file and output the data interleaved into a malloc()ed
253 // buffer stored in *output. The return value is the number of samples
254 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
255 // When you're done with it, just free() the pointer returned in *output.
256
257 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
258 int *error, const stb_vorbis_alloc *alloc_buffer);
259 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
260 // this must be the entire stream!). on failure, returns NULL and sets *error
261
262 #ifndef STB_VORBIS_NO_STDIO
263 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
264 int *error, const stb_vorbis_alloc *alloc_buffer);
265 // create an ogg vorbis decoder from a filename via fopen(). on failure,
266 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
267
268 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
269 int *error, const stb_vorbis_alloc *alloc_buffer);
270 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
271 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
272 // note that stb_vorbis must "own" this stream; if you seek it in between
273 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
274 // perform stb_vorbis_seek_*() operations on this file, it will assume it
275 // owns the _entire_ rest of the file after the start point. Use the next
276 // function, stb_vorbis_open_file_section(), to limit it.
277
278 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
279 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
280 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
281 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
282 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
283 // this stream; if you seek it in between calls to stb_vorbis, it will become
284 // confused.
285 #endif
286
287 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
288 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
289 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
290 // after calling seek_frame(), the next call to get_frame_*() will include
291 // the specified sample. after calling stb_vorbis_seek(), the next call to
292 // stb_vorbis_get_samples_* will start with the specified sample. If you
293 // do not need to seek to EXACTLY the target sample when using get_samples_*,
294 // you can also use seek_frame().
295
296 extern int stb_vorbis_seek_start(stb_vorbis *f);
297 // this function is equivalent to stb_vorbis_seek(f,0)
298
299 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
300 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
301 // these functions return the total length of the vorbis stream
302
303 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
304 // decode the next frame and return the number of samples. the number of
305 // channels returned are stored in *channels (which can be NULL--it is always
306 // the same as the number of channels reported by get_info). *output will
307 // contain an array of float* buffers, one per channel. These outputs will
308 // be overwritten on the next call to stb_vorbis_get_frame_*.
309 //
310 // You generally should not intermix calls to stb_vorbis_get_frame_*()
311 // and stb_vorbis_get_samples_*(), since the latter calls the former.
312
313 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
314 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
315 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
316 #endif
317 // decode the next frame and return the number of *samples* per channel.
318 // Note that for interleaved data, you pass in the number of shorts (the
319 // size of your array), but the return value is the number of samples per
320 // channel, not the total number of samples.
321 //
322 // The data is coerced to the number of channels you request according to the
323 // channel coercion rules (see below). You must pass in the size of your
324 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
325 // The maximum buffer size needed can be gotten from get_info(); however,
326 // the Vorbis I specification implies an absolute maximum of 4096 samples
327 // per channel.
328
329 // Channel coercion rules:
330 // Let M be the number of channels requested, and N the number of channels present,
331 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
332 // and stereo R be the sum of all R and center channels (channel assignment from the
333 // vorbis spec).
334 // M N output
335 // 1 k sum(Ck) for all k
336 // 2 * stereo L, stereo R
337 // k l k > l, the first l channels, then 0s
338 // k l k <= l, the first k channels
339 // Note that this is not _good_ surround etc. mixing at all! It's just so
340 // you get something useful.
341
342 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
343 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
344 // gets num_samples samples, not necessarily on a frame boundary--this requires
345 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
346 // Returns the number of samples stored per channel; it may be less than requested
347 // at the end of the file. If there are no more samples in the file, returns 0.
348
349 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
350 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
351 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
352 #endif
353 // gets num_samples samples, not necessarily on a frame boundary--this requires
354 // buffering so you have to supply the buffers. Applies the coercion rules above
355 // to produce 'channels' channels. Returns the number of samples stored per channel;
356 // it may be less than requested at the end of the file. If there are no more
357 // samples in the file, returns 0.
358
359 #endif
360
361 //////// ERROR CODES
362
363 enum STBVorbisError
364 {
365 VORBIS__no_error,
366
367 VORBIS_need_more_data=1, // not a real error
368
369 VORBIS_invalid_api_mixing, // can't mix API modes
370 VORBIS_outofmem, // not enough memory
371 VORBIS_feature_not_supported, // uses floor 0
372 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
373 VORBIS_file_open_failure, // fopen() failed
374 VORBIS_seek_without_length, // can't seek in unknown-length file
375
376 VORBIS_unexpected_eof=10, // file is truncated?
377 VORBIS_seek_invalid, // seek past EOF
378
379 // decoding errors (corrupt/invalid stream) -- you probably
380 // don't care about the exact details of these
381
382 // vorbis errors:
383 VORBIS_invalid_setup=20,
384 VORBIS_invalid_stream,
385
386 // ogg errors:
387 VORBIS_missing_capture_pattern=30,
388 VORBIS_invalid_stream_structure_version,
389 VORBIS_continued_packet_flag_invalid,
390 VORBIS_incorrect_stream_serial_number,
391 VORBIS_invalid_first_page,
392 VORBIS_bad_packet_type,
393 VORBIS_cant_find_last_page,
394 VORBIS_seek_failed,
395 VORBIS_ogg_skeleton_not_supported
396 };
397
398
399 #ifdef __cplusplus
400 }
401 #endif
402
403 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
404 //
405 // HEADER ENDS HERE
406 //
407 //////////////////////////////////////////////////////////////////////////////
408
409 #ifndef STB_VORBIS_HEADER_ONLY
410
411 // global configuration settings (e.g. set these in the project/makefile),
412 // or just set them in this file at the top (although ideally the first few
413 // should be visible when the header file is compiled too, although it's not
414 // crucial)
415
416 // STB_VORBIS_NO_PUSHDATA_API
417 // does not compile the code for the various stb_vorbis_*_pushdata()
418 // functions
419 // #define STB_VORBIS_NO_PUSHDATA_API
420
421 // STB_VORBIS_NO_PULLDATA_API
422 // does not compile the code for the non-pushdata APIs
423 // #define STB_VORBIS_NO_PULLDATA_API
424
425 // STB_VORBIS_NO_STDIO
426 // does not compile the code for the APIs that use FILE *s internally
427 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
428 // #define STB_VORBIS_NO_STDIO
429
430 // STB_VORBIS_NO_INTEGER_CONVERSION
431 // does not compile the code for converting audio sample data from
432 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
433 // #define STB_VORBIS_NO_INTEGER_CONVERSION
434
435 // STB_VORBIS_NO_FAST_SCALED_FLOAT
436 // does not use a fast float-to-int trick to accelerate float-to-int on
437 // most platforms which requires endianness be defined correctly.
438 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
439
440
441 // STB_VORBIS_MAX_CHANNELS [number]
442 // globally define this to the maximum number of channels you need.
443 // The spec does not put a restriction on channels except that
444 // the count is stored in a byte, so 255 is the hard limit.
445 // Reducing this saves about 16 bytes per value, so using 16 saves
446 // (255-16)*16 or around 4KB. Plus anything other memory usage
447 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
448 // 6 (5.1 audio), or 2 (stereo only).
449 #ifndef STB_VORBIS_MAX_CHANNELS
450 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
451 #endif
452
453 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
454 // after a flush_pushdata(), stb_vorbis begins scanning for the
455 // next valid page, without backtracking. when it finds something
456 // that looks like a page, it streams through it and verifies its
457 // CRC32. Should that validation fail, it keeps scanning. But it's
458 // possible that _while_ streaming through to check the CRC32 of
459 // one candidate page, it sees another candidate page. This #define
460 // determines how many "overlapping" candidate pages it can search
461 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
462 // garbage pages could be as big as 64KB, but probably average ~16KB.
463 // So don't hose ourselves by scanning an apparent 64KB page and
464 // missing a ton of real ones in the interim; so minimum of 2
465 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
466 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
467 #endif
468
469 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
470 // sets the log size of the huffman-acceleration table. Maximum
471 // supported value is 24. with larger numbers, more decodings are O(1),
472 // but the table size is larger so worse cache missing, so you'll have
473 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
474 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
475 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
476 #endif
477
478 // STB_VORBIS_FAST_BINARY_LENGTH [number]
479 // sets the log size of the binary-search acceleration table. this
480 // is used in similar fashion to the fast-huffman size to set initial
481 // parameters for the binary search
482
483 // STB_VORBIS_FAST_HUFFMAN_INT
484 // The fast huffman tables are much more efficient if they can be
485 // stored as 16-bit results instead of 32-bit results. This restricts
486 // the codebooks to having only 65535 possible outcomes, though.
487 // (At least, accelerated by the huffman table.)
488 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
489 #define STB_VORBIS_FAST_HUFFMAN_SHORT
490 #endif
491
492 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
493 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
494 // back on binary searching for the correct one. This requires storing
495 // extra tables with the huffman codes in sorted order. Defining this
496 // symbol trades off space for speed by forcing a linear search in the
497 // non-fast case, except for "sparse" codebooks.
498 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
499
500 // STB_VORBIS_DIVIDES_IN_RESIDUE
501 // stb_vorbis precomputes the result of the scalar residue decoding
502 // that would otherwise require a divide per chunk. you can trade off
503 // space for time by defining this symbol.
504 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
505
506 // STB_VORBIS_DIVIDES_IN_CODEBOOK
507 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
508 // stored, or with all elements being chosen from a small range of values,
509 // and all values possible in all elements. By default, stb_vorbis expands
510 // this latter kind out to look like the former kind for ease of decoding,
511 // because otherwise an integer divide-per-vector-element is required to
512 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
513 // trade off storage for speed.
514 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
515
516 #ifdef STB_VORBIS_CODEBOOK_SHORTS
517 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
518 #endif
519
520 // STB_VORBIS_DIVIDE_TABLE
521 // this replaces small integer divides in the floor decode loop with
522 // table lookups. made less than 1% difference, so disabled by default.
523
524 // STB_VORBIS_NO_INLINE_DECODE
525 // disables the inlining of the scalar codebook fast-huffman decode.
526 // might save a little codespace; useful for debugging
527 // #define STB_VORBIS_NO_INLINE_DECODE
528
529 // STB_VORBIS_NO_DEFER_FLOOR
530 // Normally we only decode the floor without synthesizing the actual
531 // full curve. We can instead synthesize the curve immediately. This
532 // requires more memory and is very likely slower, so I don't think
533 // you'd ever want to do it except for debugging.
534 // #define STB_VORBIS_NO_DEFER_FLOOR
535
536
537
538
539 //////////////////////////////////////////////////////////////////////////////
540
541 #ifdef STB_VORBIS_NO_PULLDATA_API
542 #define STB_VORBIS_NO_INTEGER_CONVERSION
543 #define STB_VORBIS_NO_STDIO
544 #endif
545
546 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
547 #define STB_VORBIS_NO_STDIO 1
548 #endif
549
550 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
551 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
552
553 // only need endianness for fast-float-to-int, which we don't
554 // use for pushdata
555
556 #ifndef STB_VORBIS_BIG_ENDIAN
557 #define STB_VORBIS_ENDIAN 0
558 #else
559 #define STB_VORBIS_ENDIAN 1
560 #endif
561
562 #endif
563 #endif
564
565
566 #ifndef STB_VORBIS_NO_STDIO
567 #include <stdio.h>
568 #endif
569
570 #ifndef STB_VORBIS_NO_CRT
571 #include <stdlib.h>
572 #include <string.h>
573 #include <assert.h>
574 #include <math.h>
575
576 // find definition of alloca if it's not in stdlib.h:
577 #if defined(_MSC_VER) || defined(__MINGW32__)
578 #include <malloc.h>
579 #endif
580 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
581 #include <alloca.h>
582 #endif
583 #else // STB_VORBIS_NO_CRT
584 #define NULL 0
585 #define malloc(s) 0
586 #define free(s) ((void) 0)
587 #define realloc(s) 0
588 #endif // STB_VORBIS_NO_CRT
589
590 #include <limits.h>
591
592 #ifdef __MINGW32__
593 // eff you mingw:
594 // "fixed":
595 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
596 // "no that broke the build, reverted, who cares about C":
597 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
598 #ifdef __forceinline
599 #undef __forceinline
600 #endif
601 #define __forceinline
602 #ifdef alloca
603 #undef alloca
604 #endif
605 #define alloca __builtin_alloca
606 #elif !defined(_MSC_VER)
607 #if __GNUC__
608 #define __forceinline inline
609 #else
610 #define __forceinline
611 #endif
612 #endif
613
614 #if STB_VORBIS_MAX_CHANNELS > 256
615 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
616 #endif
617
618 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
619 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
620 #endif
621
622
623 #if 0
624 #include <crtdbg.h>
625 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
626 #else
627 #define CHECK(f) ((void) 0)
628 #endif
629
630 #define MAX_BLOCKSIZE_LOG 13 // from specification
631 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
632
633
634 typedef unsigned char uint8;
635 typedef signed char int8;
636 typedef unsigned short uint16;
637 typedef signed short int16;
638 typedef unsigned int uint32;
639 typedef signed int int32;
640
641 #ifndef TRUE
642 #define TRUE 1
643 #define FALSE 0
644 #endif
645
646 typedef float codetype;
647
648 // @NOTE
649 //
650 // Some arrays below are tagged "//varies", which means it's actually
651 // a variable-sized piece of data, but rather than malloc I assume it's
652 // small enough it's better to just allocate it all together with the
653 // main thing
654 //
655 // Most of the variables are specified with the smallest size I could pack
656 // them into. It might give better performance to make them all full-sized
657 // integers. It should be safe to freely rearrange the structures or change
658 // the sizes larger--nothing relies on silently truncating etc., nor the
659 // order of variables.
660
661 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
662 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
663
664 typedef struct
665 {
666 int dimensions, entries;
667 uint8 *codeword_lengths;
668 float minimum_value;
669 float delta_value;
670 uint8 value_bits;
671 uint8 lookup_type;
672 uint8 sequence_p;
673 uint8 sparse;
674 uint32 lookup_values;
675 codetype *multiplicands;
676 uint32 *codewords;
677 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
678 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
679 #else
680 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
681 #endif
682 uint32 *sorted_codewords;
683 int *sorted_values;
684 int sorted_entries;
685 } Codebook;
686
687 typedef struct
688 {
689 uint8 order;
690 uint16 rate;
691 uint16 bark_map_size;
692 uint8 amplitude_bits;
693 uint8 amplitude_offset;
694 uint8 number_of_books;
695 uint8 book_list[16]; // varies
696 } Floor0;
697
698 typedef struct
699 {
700 uint8 partitions;
701 uint8 partition_class_list[32]; // varies
702 uint8 class_dimensions[16]; // varies
703 uint8 class_subclasses[16]; // varies
704 uint8 class_masterbooks[16]; // varies
705 int16 subclass_books[16][8]; // varies
706 uint16 Xlist[31*8+2]; // varies
707 uint8 sorted_order[31*8+2];
708 uint8 neighbors[31*8+2][2];
709 uint8 floor1_multiplier;
710 uint8 rangebits;
711 int values;
712 } Floor1;
713
714 typedef union
715 {
716 Floor0 floor0;
717 Floor1 floor1;
718 } Floor;
719
720 typedef struct
721 {
722 uint32 begin, end;
723 uint32 part_size;
724 uint8 classifications;
725 uint8 classbook;
726 uint8 **classdata;
727 int16 (*residue_books)[8];
728 } Residue;
729
730 typedef struct
731 {
732 uint8 magnitude;
733 uint8 angle;
734 uint8 mux;
735 } MappingChannel;
736
737 typedef struct
738 {
739 uint16 coupling_steps;
740 MappingChannel *chan;
741 uint8 submaps;
742 uint8 submap_floor[15]; // varies
743 uint8 submap_residue[15]; // varies
744 } Mapping;
745
746 typedef struct
747 {
748 uint8 blockflag;
749 uint8 mapping;
750 uint16 windowtype;
751 uint16 transformtype;
752 } Mode;
753
754 typedef struct
755 {
756 uint32 goal_crc; // expected crc if match
757 int bytes_left; // bytes left in packet
758 uint32 crc_so_far; // running crc
759 int bytes_done; // bytes processed in _current_ chunk
760 uint32 sample_loc; // granule pos encoded in page
761 } CRCscan;
762
763 typedef struct
764 {
765 uint32 page_start, page_end;
766 uint32 last_decoded_sample;
767 } ProbedPage;
768
769 struct stb_vorbis
770 {
771 // user-accessible info
772 unsigned int sample_rate;
773 int channels;
774
775 unsigned int setup_memory_required;
776 unsigned int temp_memory_required;
777 unsigned int setup_temp_memory_required;
778
779 char *vendor;
780 int comment_list_length;
781 char **comment_list;
782
783 // input config
784 #ifndef STB_VORBIS_NO_STDIO
785 FILE *f;
786 uint32 f_start;
787 int close_on_free;
788 #endif
789
790 uint8 *stream;
791 uint8 *stream_start;
792 uint8 *stream_end;
793
794 uint32 stream_len;
795
796 uint8 push_mode;
797
798 // the page to seek to when seeking to start, may be zero
799 uint32 first_audio_page_offset;
800
801 // p_first is the page on which the first audio packet ends
802 // (but not necessarily the page on which it starts)
803 ProbedPage p_first, p_last;
804
805 // memory management
806 stb_vorbis_alloc alloc;
807 int setup_offset;
808 int temp_offset;
809
810 // run-time results
811 int eof;
812 enum STBVorbisError error;
813
814 // user-useful data
815
816 // header info
817 int blocksize[2];
818 int blocksize_0, blocksize_1;
819 int codebook_count;
820 Codebook *codebooks;
821 int floor_count;
822 uint16 floor_types[64]; // varies
823 Floor *floor_config;
824 int residue_count;
825 uint16 residue_types[64]; // varies
826 Residue *residue_config;
827 int mapping_count;
828 Mapping *mapping;
829 int mode_count;
830 Mode mode_config[64]; // varies
831
832 uint32 total_samples;
833
834 // decode buffer
835 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
836 float *outputs [STB_VORBIS_MAX_CHANNELS];
837
838 float *previous_window[STB_VORBIS_MAX_CHANNELS];
839 int previous_length;
840
841 #ifndef STB_VORBIS_NO_DEFER_FLOOR
842 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
843 #else
844 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
845 #endif
846
847 uint32 current_loc; // sample location of next frame to decode
848 int current_loc_valid;
849
850 // per-blocksize precomputed data
851
852 // twiddle factors
853 float *A[2],*B[2],*C[2];
854 float *window[2];
855 uint16 *bit_reverse[2];
856
857 // current page/packet/segment streaming info
858 uint32 serial; // stream serial number for verification
859 int last_page;
860 int segment_count;
861 uint8 segments[255];
862 uint8 page_flag;
863 uint8 bytes_in_seg;
864 uint8 first_decode;
865 int next_seg;
866 int last_seg; // flag that we're on the last segment
867 int last_seg_which; // what was the segment number of the last seg?
868 uint32 acc;
869 int valid_bits;
870 int packet_bytes;
871 int end_seg_with_known_loc;
872 uint32 known_loc_for_packet;
873 int discard_samples_deferred;
874 uint32 samples_output;
875
876 // push mode scanning
877 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
878 #ifndef STB_VORBIS_NO_PUSHDATA_API
879 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
880 #endif
881
882 // sample-access
883 int channel_buffer_start;
884 int channel_buffer_end;
885 };
886
887 #if defined(STB_VORBIS_NO_PUSHDATA_API)
888 #define IS_PUSH_MODE(f) FALSE
889 #elif defined(STB_VORBIS_NO_PULLDATA_API)
890 #define IS_PUSH_MODE(f) TRUE
891 #else
892 #define IS_PUSH_MODE(f) ((f)->push_mode)
893 #endif
894
895 typedef struct stb_vorbis vorb;
896
897 static int error(vorb *f, enum STBVorbisError e)
898 {
899 f->error = e;
900 if (!f->eof && e != VORBIS_need_more_data) {
901 f->error=e; // breakpoint for debugging
902 }
903 return 0;
904 }
905
906
907 // these functions are used for allocating temporary memory
908 // while decoding. if you can afford the stack space, use
909 // alloca(); otherwise, provide a temp buffer and it will
910 // allocate out of those.
911
912 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
913
914 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
915 #define temp_free(f,p) (void)0
916 #define temp_alloc_save(f) ((f)->temp_offset)
917 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
918
919 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
920
921 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
922 static void *make_block_array(void *mem, int count, int size)
923 {
924 int i;
925 void ** p = (void **) mem;
926 char *q = (char *) (p + count);
927 for (i=0; i < count; ++i) {
928 p[i] = q;
929 q += size;
930 }
931 return p;
932 }
933
934 static void *setup_malloc(vorb *f, int sz)
935 {
936 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
937 f->setup_memory_required += sz;
938 if (f->alloc.alloc_buffer) {
939 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
940 if (f->setup_offset + sz > f->temp_offset) return NULL;
941 f->setup_offset += sz;
942 return p;
943 }
944 return sz ? malloc(sz) : NULL;
945 }
946
947 static void setup_free(vorb *f, void *p)
948 {
949 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
950 free(p);
951 }
952
953 static void *setup_temp_malloc(vorb *f, int sz)
954 {
955 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
956 if (f->alloc.alloc_buffer) {
957 if (f->temp_offset - sz < f->setup_offset) return NULL;
958 f->temp_offset -= sz;
959 return (char *) f->alloc.alloc_buffer + f->temp_offset;
960 }
961 return malloc(sz);
962 }
963
964 static void setup_temp_free(vorb *f, void *p, int sz)
965 {
966 if (f->alloc.alloc_buffer) {
967 f->temp_offset += (sz+3)&~3;
968 return;
969 }
970 free(p);
971 }
972
973 #define CRC32_POLY 0x04c11db7 // from spec
974
975 static uint32 crc_table[256];
976 static void crc32_init(void)
977 {
978 int i,j;
979 uint32 s;
980 for(i=0; i < 256; i++) {
981 for (s=(uint32) i << 24, j=0; j < 8; ++j)
982 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
983 crc_table[i] = s;
984 }
985 }
986
987 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
988 {
989 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
990 }
991
992
993 // used in setup, and for huffman that doesn't go fast path
994 static unsigned int bit_reverse(unsigned int n)
995 {
996 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
997 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
998 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
999 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
1000 return (n >> 16) | (n << 16);
1001 }
1002
1003 static float square(float x)
1004 {
1005 return x*x;
1006 }
1007
1008 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1009 // as required by the specification. fast(?) implementation from stb.h
1010 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
1011 static int ilog(int32 n)
1012 {
1013 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1014
1015 if (n < 0) return 0; // signed n returns 0
1016
1017 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1018 if (n < (1 << 14))
1019 if (n < (1 << 4)) return 0 + log2_4[n ];
1020 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
1021 else return 10 + log2_4[n >> 10];
1022 else if (n < (1 << 24))
1023 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
1024 else return 20 + log2_4[n >> 20];
1025 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
1026 else return 30 + log2_4[n >> 30];
1027 }
1028
1029 #ifndef M_PI
1030 #define M_PI 3.14159265358979323846264f // from CRC
1031 #endif
1032
1033 // code length assigned to a value with no huffman encoding
1034 #define NO_CODE 255
1035
1036 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1037 //
1038 // these functions are only called at setup, and only a few times
1039 // per file
1040
1041 static float float32_unpack(uint32 x)
1042 {
1043 // from the specification
1044 uint32 mantissa = x & 0x1fffff;
1045 uint32 sign = x & 0x80000000;
1046 uint32 exp = (x & 0x7fe00000) >> 21;
1047 double res = sign ? -(double)mantissa : (double)mantissa;
1048 return (float) ldexp((float)res, exp-788);
1049 }
1050
1051
1052 // zlib & jpeg huffman tables assume that the output symbols
1053 // can either be arbitrarily arranged, or have monotonically
1054 // increasing frequencies--they rely on the lengths being sorted;
1055 // this makes for a very simple generation algorithm.
1056 // vorbis allows a huffman table with non-sorted lengths. This
1057 // requires a more sophisticated construction, since symbols in
1058 // order do not map to huffman codes "in order".
1059 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1060 {
1061 if (!c->sparse) {
1062 c->codewords [symbol] = huff_code;
1063 } else {
1064 c->codewords [count] = huff_code;
1065 c->codeword_lengths[count] = len;
1066 values [count] = symbol;
1067 }
1068 }
1069
1070 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1071 {
1072 int i,k,m=0;
1073 uint32 available[32];
1074
1075 memset(available, 0, sizeof(available));
1076 // find the first entry
1077 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1078 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1079 // add to the list
1080 add_entry(c, 0, k, m++, len[k], values);
1081 // add all available leaves
1082 for (i=1; i <= len[k]; ++i)
1083 available[i] = 1U << (32-i);
1084 // note that the above code treats the first case specially,
1085 // but it's really the same as the following code, so they
1086 // could probably be combined (except the initial code is 0,
1087 // and I use 0 in available[] to mean 'empty')
1088 for (i=k+1; i < n; ++i) {
1089 uint32 res;
1090 int z = len[i], y;
1091 if (z == NO_CODE) continue;
1092 // find lowest available leaf (should always be earliest,
1093 // which is what the specification calls for)
1094 // note that this property, and the fact we can never have
1095 // more than one free leaf at a given level, isn't totally
1096 // trivial to prove, but it seems true and the assert never
1097 // fires, so!
1098 while (z > 0 && !available[z]) --z;
1099 if (z == 0) { return FALSE; }
1100 res = available[z];
1101 assert(z >= 0 && z < 32);
1102 available[z] = 0;
1103 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1104 // propagate availability up the tree
1105 if (z != len[i]) {
1106 assert(len[i] >= 0 && len[i] < 32);
1107 for (y=len[i]; y > z; --y) {
1108 assert(available[y] == 0);
1109 available[y] = res + (1 << (32-y));
1110 }
1111 }
1112 }
1113 return TRUE;
1114 }
1115
1116 // accelerated huffman table allows fast O(1) match of all symbols
1117 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
1118 static void compute_accelerated_huffman(Codebook *c)
1119 {
1120 int i, len;
1121 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1122 c->fast_huffman[i] = -1;
1123
1124 len = c->sparse ? c->sorted_entries : c->entries;
1125 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1126 if (len > 32767) len = 32767; // largest possible value we can encode!
1127 #endif
1128 for (i=0; i < len; ++i) {
1129 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1130 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1131 // set table entries for all bit combinations in the higher bits
1132 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1133 c->fast_huffman[z] = i;
1134 z += 1 << c->codeword_lengths[i];
1135 }
1136 }
1137 }
1138 }
1139
1140 #ifdef _MSC_VER
1141 #define STBV_CDECL __cdecl
1142 #else
1143 #define STBV_CDECL
1144 #endif
1145
1146 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1147 {
1148 uint32 x = * (uint32 *) p;
1149 uint32 y = * (uint32 *) q;
1150 return x < y ? -1 : x > y;
1151 }
1152
1153 static int include_in_sort(Codebook *c, uint8 len)
1154 {
1155 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1156 if (len == NO_CODE) return FALSE;
1157 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1158 return FALSE;
1159 }
1160
1161 // if the fast table above doesn't work, we want to binary
1162 // search them... need to reverse the bits
1163 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1164 {
1165 int i, len;
1166 // build a list of all the entries
1167 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1168 // this is kind of a frivolous optimization--I don't see any performance improvement,
1169 // but it's like 4 extra lines of code, so.
1170 if (!c->sparse) {
1171 int k = 0;
1172 for (i=0; i < c->entries; ++i)
1173 if (include_in_sort(c, lengths[i]))
1174 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1175 assert(k == c->sorted_entries);
1176 } else {
1177 for (i=0; i < c->sorted_entries; ++i)
1178 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1179 }
1180
1181 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1182 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1183
1184 len = c->sparse ? c->sorted_entries : c->entries;
1185 // now we need to indicate how they correspond; we could either
1186 // #1: sort a different data structure that says who they correspond to
1187 // #2: for each sorted entry, search the original list to find who corresponds
1188 // #3: for each original entry, find the sorted entry
1189 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1190 for (i=0; i < len; ++i) {
1191 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1192 if (include_in_sort(c,huff_len)) {
1193 uint32 code = bit_reverse(c->codewords[i]);
1194 int x=0, n=c->sorted_entries;
1195 while (n > 1) {
1196 // invariant: sc[x] <= code < sc[x+n]
1197 int m = x + (n >> 1);
1198 if (c->sorted_codewords[m] <= code) {
1199 x = m;
1200 n -= (n>>1);
1201 } else {
1202 n >>= 1;
1203 }
1204 }
1205 assert(c->sorted_codewords[x] == code);
1206 if (c->sparse) {
1207 c->sorted_values[x] = values[i];
1208 c->codeword_lengths[x] = huff_len;
1209 } else {
1210 c->sorted_values[x] = i;
1211 }
1212 }
1213 }
1214 }
1215
1216 // only run while parsing the header (3 times)
1217 static int vorbis_validate(uint8 *data)
1218 {
1219 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1220 return memcmp(data, vorbis, 6) == 0;
1221 }
1222
1223 // called from setup only, once per code book
1224 // (formula implied by specification)
1225 static int lookup1_values(int entries, int dim)
1226 {
1227 int r = (int) floor(exp((float) log((float) entries) / dim));
1228 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1229 ++r; // floor() to avoid _ftol() when non-CRT
1230 if (pow((float) r+1, dim) <= entries)
1231 return -1;
1232 if ((int) floor(pow((float) r, dim)) > entries)
1233 return -1;
1234 return r;
1235 }
1236
1237 // called twice per file
1238 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1239 {
1240 int n4 = n >> 2, n8 = n >> 3;
1241 int k,k2;
1242
1243 for (k=k2=0; k < n4; ++k,k2+=2) {
1244 A[k2 ] = (float) cos(4*k*M_PI/n);
1245 A[k2+1] = (float) -sin(4*k*M_PI/n);
1246 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1247 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1248 }
1249 for (k=k2=0; k < n8; ++k,k2+=2) {
1250 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1251 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1252 }
1253 }
1254
1255 static void compute_window(int n, float *window)
1256 {
1257 int n2 = n >> 1, i;
1258 for (i=0; i < n2; ++i)
1259 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1260 }
1261
1262 static void compute_bitreverse(int n, uint16 *rev)
1263 {
1264 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1265 int i, n8 = n >> 3;
1266 for (i=0; i < n8; ++i)
1267 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1268 }
1269
1270 static int init_blocksize(vorb *f, int b, int n)
1271 {
1272 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1273 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1274 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1275 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1276 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1277 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1278 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1279 if (!f->window[b]) return error(f, VORBIS_outofmem);
1280 compute_window(n, f->window[b]);
1281 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1282 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1283 compute_bitreverse(n, f->bit_reverse[b]);
1284 return TRUE;
1285 }
1286
1287 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1288 {
1289 int low = -1;
1290 int high = 65536;
1291 int i;
1292 for (i=0; i < n; ++i) {
1293 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1294 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1295 }
1296 }
1297
1298 // this has been repurposed so y is now the original index instead of y
1299 typedef struct
1300 {
1301 uint16 x,id;
1302 } stbv__floor_ordering;
1303
1304 static int STBV_CDECL point_compare(const void *p, const void *q)
1305 {
1306 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1307 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1308 return a->x < b->x ? -1 : a->x > b->x;
1309 }
1310
1311 //
1312 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1313
1314
1315 #if defined(STB_VORBIS_NO_STDIO)
1316 #define USE_MEMORY(z) TRUE
1317 #else
1318 #define USE_MEMORY(z) ((z)->stream)
1319 #endif
1320
1321 static uint8 get8(vorb *z)
1322 {
1323 if (USE_MEMORY(z)) {
1324 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1325 return *z->stream++;
1326 }
1327
1328 #ifndef STB_VORBIS_NO_STDIO
1329 {
1330 int c = fgetc(z->f);
1331 if (c == EOF) { z->eof = TRUE; return 0; }
1332 return c;
1333 }
1334 #endif
1335 }
1336
1337 static uint32 get32(vorb *f)
1338 {
1339 uint32 x;
1340 x = get8(f);
1341 x += get8(f) << 8;
1342 x += get8(f) << 16;
1343 x += (uint32) get8(f) << 24;
1344 return x;
1345 }
1346
1347 static int getn(vorb *z, uint8 *data, int n)
1348 {
1349 if (USE_MEMORY(z)) {
1350 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1351 memcpy(data, z->stream, n);
1352 z->stream += n;
1353 return 1;
1354 }
1355
1356 #ifndef STB_VORBIS_NO_STDIO
1357 if (fread(data, n, 1, z->f) == 1)
1358 return 1;
1359 else {
1360 z->eof = 1;
1361 return 0;
1362 }
1363 #endif
1364 }
1365
1366 static void skip(vorb *z, int n)
1367 {
1368 if (USE_MEMORY(z)) {
1369 z->stream += n;
1370 if (z->stream >= z->stream_end) z->eof = 1;
1371 return;
1372 }
1373 #ifndef STB_VORBIS_NO_STDIO
1374 {
1375 long x = ftell(z->f);
1376 fseek(z->f, x+n, SEEK_SET);
1377 }
1378 #endif
1379 }
1380
1381 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1382 {
1383 #ifndef STB_VORBIS_NO_PUSHDATA_API
1384 if (f->push_mode) return 0;
1385 #endif
1386 f->eof = 0;
1387 if (USE_MEMORY(f)) {
1388 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1389 f->stream = f->stream_end;
1390 f->eof = 1;
1391 return 0;
1392 } else {
1393 f->stream = f->stream_start + loc;
1394 return 1;
1395 }
1396 }
1397 #ifndef STB_VORBIS_NO_STDIO
1398 if (loc + f->f_start < loc || loc >= 0x80000000) {
1399 loc = 0x7fffffff;
1400 f->eof = 1;
1401 } else {
1402 loc += f->f_start;
1403 }
1404 if (!fseek(f->f, loc, SEEK_SET))
1405 return 1;
1406 f->eof = 1;
1407 fseek(f->f, f->f_start, SEEK_END);
1408 return 0;
1409 #endif
1410 }
1411
1412
1413 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1414
1415 static int capture_pattern(vorb *f)
1416 {
1417 if (0x4f != get8(f)) return FALSE;
1418 if (0x67 != get8(f)) return FALSE;
1419 if (0x67 != get8(f)) return FALSE;
1420 if (0x53 != get8(f)) return FALSE;
1421 return TRUE;
1422 }
1423
1424 #define PAGEFLAG_continued_packet 1
1425 #define PAGEFLAG_first_page 2
1426 #define PAGEFLAG_last_page 4
1427
1428 static int start_page_no_capturepattern(vorb *f)
1429 {
1430 uint32 loc0,loc1,n;
1431 if (f->first_decode && !IS_PUSH_MODE(f)) {
1432 f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1433 }
1434 // stream structure version
1435 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1436 // header flag
1437 f->page_flag = get8(f);
1438 // absolute granule position
1439 loc0 = get32(f);
1440 loc1 = get32(f);
1441 // @TODO: validate loc0,loc1 as valid positions?
1442 // stream serial number -- vorbis doesn't interleave, so discard
1443 get32(f);
1444 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1445 // page sequence number
1446 n = get32(f);
1447 f->last_page = n;
1448 // CRC32
1449 get32(f);
1450 // page_segments
1451 f->segment_count = get8(f);
1452 if (!getn(f, f->segments, f->segment_count))
1453 return error(f, VORBIS_unexpected_eof);
1454 // assume we _don't_ know any the sample position of any segments
1455 f->end_seg_with_known_loc = -2;
1456 if (loc0 != ~0U || loc1 != ~0U) {
1457 int i;
1458 // determine which packet is the last one that will complete
1459 for (i=f->segment_count-1; i >= 0; --i)
1460 if (f->segments[i] < 255)
1461 break;
1462 // 'i' is now the index of the _last_ segment of a packet that ends
1463 if (i >= 0) {
1464 f->end_seg_with_known_loc = i;
1465 f->known_loc_for_packet = loc0;
1466 }
1467 }
1468 if (f->first_decode) {
1469 int i,len;
1470 len = 0;
1471 for (i=0; i < f->segment_count; ++i)
1472 len += f->segments[i];
1473 len += 27 + f->segment_count;
1474 f->p_first.page_end = f->p_first.page_start + len;
1475 f->p_first.last_decoded_sample = loc0;
1476 }
1477 f->next_seg = 0;
1478 return TRUE;
1479 }
1480
1481 static int start_page(vorb *f)
1482 {
1483 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1484 return start_page_no_capturepattern(f);
1485 }
1486
1487 static int start_packet(vorb *f)
1488 {
1489 while (f->next_seg == -1) {
1490 if (!start_page(f)) return FALSE;
1491 if (f->page_flag & PAGEFLAG_continued_packet)
1492 return error(f, VORBIS_continued_packet_flag_invalid);
1493 }
1494 f->last_seg = FALSE;
1495 f->valid_bits = 0;
1496 f->packet_bytes = 0;
1497 f->bytes_in_seg = 0;
1498 // f->next_seg is now valid
1499 return TRUE;
1500 }
1501
1502 static int maybe_start_packet(vorb *f)
1503 {
1504 if (f->next_seg == -1) {
1505 int x = get8(f);
1506 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1507 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1508 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1509 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1510 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1511 if (!start_page_no_capturepattern(f)) return FALSE;
1512 if (f->page_flag & PAGEFLAG_continued_packet) {
1513 // set up enough state that we can read this packet if we want,
1514 // e.g. during recovery
1515 f->last_seg = FALSE;
1516 f->bytes_in_seg = 0;
1517 return error(f, VORBIS_continued_packet_flag_invalid);
1518 }
1519 }
1520 return start_packet(f);
1521 }
1522
1523 static int next_segment(vorb *f)
1524 {
1525 int len;
1526 if (f->last_seg) return 0;
1527 if (f->next_seg == -1) {
1528 f->last_seg_which = f->segment_count-1; // in case start_page fails
1529 if (!start_page(f)) { f->last_seg = 1; return 0; }
1530 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1531 }
1532 len = f->segments[f->next_seg++];
1533 if (len < 255) {
1534 f->last_seg = TRUE;
1535 f->last_seg_which = f->next_seg-1;
1536 }
1537 if (f->next_seg >= f->segment_count)
1538 f->next_seg = -1;
1539 assert(f->bytes_in_seg == 0);
1540 f->bytes_in_seg = len;
1541 return len;
1542 }
1543
1544 #define EOP (-1)
1545 #define INVALID_BITS (-1)
1546
1547 static int get8_packet_raw(vorb *f)
1548 {
1549 if (!f->bytes_in_seg) { // CLANG!
1550 if (f->last_seg) return EOP;
1551 else if (!next_segment(f)) return EOP;
1552 }
1553 assert(f->bytes_in_seg > 0);
1554 --f->bytes_in_seg;
1555 ++f->packet_bytes;
1556 return get8(f);
1557 }
1558
1559 static int get8_packet(vorb *f)
1560 {
1561 int x = get8_packet_raw(f);
1562 f->valid_bits = 0;
1563 return x;
1564 }
1565
1566 static int get32_packet(vorb *f)
1567 {
1568 uint32 x;
1569 x = get8_packet(f);
1570 x += get8_packet(f) << 8;
1571 x += get8_packet(f) << 16;
1572 x += (uint32) get8_packet(f) << 24;
1573 return x;
1574 }
1575
1576 static void flush_packet(vorb *f)
1577 {
1578 while (get8_packet_raw(f) != EOP);
1579 }
1580
1581 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1582 // as the huffman decoder?
1583 static uint32 get_bits(vorb *f, int n)
1584 {
1585 uint32 z;
1586
1587 if (f->valid_bits < 0) return 0;
1588 if (f->valid_bits < n) {
1589 if (n > 24) {
1590 // the accumulator technique below would not work correctly in this case
1591 z = get_bits(f, 24);
1592 z += get_bits(f, n-24) << 24;
1593 return z;
1594 }
1595 if (f->valid_bits == 0) f->acc = 0;
1596 while (f->valid_bits < n) {
1597 int z = get8_packet_raw(f);
1598 if (z == EOP) {
1599 f->valid_bits = INVALID_BITS;
1600 return 0;
1601 }
1602 f->acc += z << f->valid_bits;
1603 f->valid_bits += 8;
1604 }
1605 }
1606 if (f->valid_bits < 0) return 0;
1607 z = f->acc & ((1 << n)-1);
1608 f->acc >>= n;
1609 f->valid_bits -= n;
1610 return z;
1611 }
1612
1613 // @OPTIMIZE: primary accumulator for huffman
1614 // expand the buffer to as many bits as possible without reading off end of packet
1615 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1616 // e.g. cache them locally and decode locally
1617 static __forceinline void prep_huffman(vorb *f)
1618 {
1619 if (f->valid_bits <= 24) {
1620 if (f->valid_bits == 0) f->acc = 0;
1621 do {
1622 int z;
1623 if (f->last_seg && !f->bytes_in_seg) return;
1624 z = get8_packet_raw(f);
1625 if (z == EOP) return;
1626 f->acc += (unsigned) z << f->valid_bits;
1627 f->valid_bits += 8;
1628 } while (f->valid_bits <= 24);
1629 }
1630 }
1631
1632 enum
1633 {
1634 VORBIS_packet_id = 1,
1635 VORBIS_packet_comment = 3,
1636 VORBIS_packet_setup = 5
1637 };
1638
1639 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1640 {
1641 int i;
1642 prep_huffman(f);
1643
1644 if (c->codewords == NULL && c->sorted_codewords == NULL)
1645 return -1;
1646
1647 // cases to use binary search: sorted_codewords && !c->codewords
1648 // sorted_codewords && c->entries > 8
1649 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1650 // binary search
1651 uint32 code = bit_reverse(f->acc);
1652 int x=0, n=c->sorted_entries, len;
1653
1654 while (n > 1) {
1655 // invariant: sc[x] <= code < sc[x+n]
1656 int m = x + (n >> 1);
1657 if (c->sorted_codewords[m] <= code) {
1658 x = m;
1659 n -= (n>>1);
1660 } else {
1661 n >>= 1;
1662 }
1663 }
1664 // x is now the sorted index
1665 if (!c->sparse) x = c->sorted_values[x];
1666 // x is now sorted index if sparse, or symbol otherwise
1667 len = c->codeword_lengths[x];
1668 if (f->valid_bits >= len) {
1669 f->acc >>= len;
1670 f->valid_bits -= len;
1671 return x;
1672 }
1673
1674 f->valid_bits = 0;
1675 return -1;
1676 }
1677
1678 // if small, linear search
1679 assert(!c->sparse);
1680 for (i=0; i < c->entries; ++i) {
1681 if (c->codeword_lengths[i] == NO_CODE) continue;
1682 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1683 if (f->valid_bits >= c->codeword_lengths[i]) {
1684 f->acc >>= c->codeword_lengths[i];
1685 f->valid_bits -= c->codeword_lengths[i];
1686 return i;
1687 }
1688 f->valid_bits = 0;
1689 return -1;
1690 }
1691 }
1692
1693 error(f, VORBIS_invalid_stream);
1694 f->valid_bits = 0;
1695 return -1;
1696 }
1697
1698 #ifndef STB_VORBIS_NO_INLINE_DECODE
1699
1700 #define DECODE_RAW(var, f,c) \
1701 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1702 prep_huffman(f); \
1703 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1704 var = c->fast_huffman[var]; \
1705 if (var >= 0) { \
1706 int n = c->codeword_lengths[var]; \
1707 f->acc >>= n; \
1708 f->valid_bits -= n; \
1709 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1710 } else { \
1711 var = codebook_decode_scalar_raw(f,c); \
1712 }
1713
1714 #else
1715
1716 static int codebook_decode_scalar(vorb *f, Codebook *c)
1717 {
1718 int i;
1719 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1720 prep_huffman(f);
1721 // fast huffman table lookup
1722 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1723 i = c->fast_huffman[i];
1724 if (i >= 0) {
1725 f->acc >>= c->codeword_lengths[i];
1726 f->valid_bits -= c->codeword_lengths[i];
1727 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1728 return i;
1729 }
1730 return codebook_decode_scalar_raw(f,c);
1731 }
1732
1733 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1734
1735 #endif
1736
1737 #define DECODE(var,f,c) \
1738 DECODE_RAW(var,f,c) \
1739 if (c->sparse) var = c->sorted_values[var];
1740
1741 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1742 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1743 #else
1744 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1745 #endif
1746
1747
1748
1749
1750
1751
1752 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1753 // where we avoid one addition
1754 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1755 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1756 #define CODEBOOK_ELEMENT_BASE(c) (0)
1757
1758 static int codebook_decode_start(vorb *f, Codebook *c)
1759 {
1760 int z = -1;
1761
1762 // type 0 is only legal in a scalar context
1763 if (c->lookup_type == 0)
1764 error(f, VORBIS_invalid_stream);
1765 else {
1766 DECODE_VQ(z,f,c);
1767 if (c->sparse) assert(z < c->sorted_entries);
1768 if (z < 0) { // check for EOP
1769 if (!f->bytes_in_seg)
1770 if (f->last_seg)
1771 return z;
1772 error(f, VORBIS_invalid_stream);
1773 }
1774 }
1775 return z;
1776 }
1777
1778 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1779 {
1780 int i,z = codebook_decode_start(f,c);
1781 if (z < 0) return FALSE;
1782 if (len > c->dimensions) len = c->dimensions;
1783
1784 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1785 if (c->lookup_type == 1) {
1786 float last = CODEBOOK_ELEMENT_BASE(c);
1787 int div = 1;
1788 for (i=0; i < len; ++i) {
1789 int off = (z / div) % c->lookup_values;
1790 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1791 output[i] += val;
1792 if (c->sequence_p) last = val + c->minimum_value;
1793 div *= c->lookup_values;
1794 }
1795 return TRUE;
1796 }
1797 #endif
1798
1799 z *= c->dimensions;
1800 if (c->sequence_p) {
1801 float last = CODEBOOK_ELEMENT_BASE(c);
1802 for (i=0; i < len; ++i) {
1803 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1804 output[i] += val;
1805 last = val + c->minimum_value;
1806 }
1807 } else {
1808 float last = CODEBOOK_ELEMENT_BASE(c);
1809 for (i=0; i < len; ++i) {
1810 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1811 }
1812 }
1813
1814 return TRUE;
1815 }
1816
1817 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1818 {
1819 int i,z = codebook_decode_start(f,c);
1820 float last = CODEBOOK_ELEMENT_BASE(c);
1821 if (z < 0) return FALSE;
1822 if (len > c->dimensions) len = c->dimensions;
1823
1824 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1825 if (c->lookup_type == 1) {
1826 int div = 1;
1827 for (i=0; i < len; ++i) {
1828 int off = (z / div) % c->lookup_values;
1829 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1830 output[i*step] += val;
1831 if (c->sequence_p) last = val;
1832 div *= c->lookup_values;
1833 }
1834 return TRUE;
1835 }
1836 #endif
1837
1838 z *= c->dimensions;
1839 for (i=0; i < len; ++i) {
1840 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1841 output[i*step] += val;
1842 if (c->sequence_p) last = val;
1843 }
1844
1845 return TRUE;
1846 }
1847
1848 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1849 {
1850 int c_inter = *c_inter_p;
1851 int p_inter = *p_inter_p;
1852 int i,z, effective = c->dimensions;
1853
1854 // type 0 is only legal in a scalar context
1855 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1856
1857 while (total_decode > 0) {
1858 float last = CODEBOOK_ELEMENT_BASE(c);
1859 DECODE_VQ(z,f,c);
1860 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1861 assert(!c->sparse || z < c->sorted_entries);
1862 #endif
1863 if (z < 0) {
1864 if (!f->bytes_in_seg)
1865 if (f->last_seg) return FALSE;
1866 return error(f, VORBIS_invalid_stream);
1867 }
1868
1869 // if this will take us off the end of the buffers, stop short!
1870 // we check by computing the length of the virtual interleaved
1871 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1872 // and the length we'll be using (effective)
1873 if (c_inter + p_inter*ch + effective > len * ch) {
1874 effective = len*ch - (p_inter*ch - c_inter);
1875 }
1876
1877 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1878 if (c->lookup_type == 1) {
1879 int div = 1;
1880 for (i=0; i < effective; ++i) {
1881 int off = (z / div) % c->lookup_values;
1882 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1883 if (outputs[c_inter])
1884 outputs[c_inter][p_inter] += val;
1885 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1886 if (c->sequence_p) last = val;
1887 div *= c->lookup_values;
1888 }
1889 } else
1890 #endif
1891 {
1892 z *= c->dimensions;
1893 if (c->sequence_p) {
1894 for (i=0; i < effective; ++i) {
1895 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1896 if (outputs[c_inter])
1897 outputs[c_inter][p_inter] += val;
1898 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1899 last = val;
1900 }
1901 } else {
1902 for (i=0; i < effective; ++i) {
1903 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1904 if (outputs[c_inter])
1905 outputs[c_inter][p_inter] += val;
1906 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1907 }
1908 }
1909 }
1910
1911 total_decode -= effective;
1912 }
1913 *c_inter_p = c_inter;
1914 *p_inter_p = p_inter;
1915 return TRUE;
1916 }
1917
1918 static int predict_point(int x, int x0, int x1, int y0, int y1)
1919 {
1920 int dy = y1 - y0;
1921 int adx = x1 - x0;
1922 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1923 int err = abs(dy) * (x - x0);
1924 int off = err / adx;
1925 return dy < 0 ? y0 - off : y0 + off;
1926 }
1927
1928 // the following table is block-copied from the specification
1929 static float inverse_db_table[256] =
1930 {
1931 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1932 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1933 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1934 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1935 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1936 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1937 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1938 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1939 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1940 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1941 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1942 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1943 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1944 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1945 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1946 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1947 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1948 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1949 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1950 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1951 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1952 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1953 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1954 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1955 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1956 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1957 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1958 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1959 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1960 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1961 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1962 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1963 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1964 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1965 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1966 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1967 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1968 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1969 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1970 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1971 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1972 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1973 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1974 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1975 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1976 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1977 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1978 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1979 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1980 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1981 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1982 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1983 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1984 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1985 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1986 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1987 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1988 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1989 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1990 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1991 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1992 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1993 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1994 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1995 };
1996
1997
1998 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1999 // note that you must produce bit-identical output to decode correctly;
2000 // this specific sequence of operations is specified in the spec (it's
2001 // drawing integer-quantized frequency-space lines that the encoder
2002 // expects to be exactly the same)
2003 // ... also, isn't the whole point of Bresenham's algorithm to NOT
2004 // have to divide in the setup? sigh.
2005 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2006 #define LINE_OP(a,b) a *= b
2007 #else
2008 #define LINE_OP(a,b) a = b
2009 #endif
2010
2011 #ifdef STB_VORBIS_DIVIDE_TABLE
2012 #define DIVTAB_NUMER 32
2013 #define DIVTAB_DENOM 64
2014 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2015 #endif
2016
2017 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2018 {
2019 int dy = y1 - y0;
2020 int adx = x1 - x0;
2021 int ady = abs(dy);
2022 int base;
2023 int x=x0,y=y0;
2024 int err = 0;
2025 int sy;
2026
2027 #ifdef STB_VORBIS_DIVIDE_TABLE
2028 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2029 if (dy < 0) {
2030 base = -integer_divide_table[ady][adx];
2031 sy = base-1;
2032 } else {
2033 base = integer_divide_table[ady][adx];
2034 sy = base+1;
2035 }
2036 } else {
2037 base = dy / adx;
2038 if (dy < 0)
2039 sy = base - 1;
2040 else
2041 sy = base+1;
2042 }
2043 #else
2044 base = dy / adx;
2045 if (dy < 0)
2046 sy = base - 1;
2047 else
2048 sy = base+1;
2049 #endif
2050 ady -= abs(base) * adx;
2051 if (x1 > n) x1 = n;
2052 if (x < x1) {
2053 LINE_OP(output[x], inverse_db_table[y&255]);
2054 for (++x; x < x1; ++x) {
2055 err += ady;
2056 if (err >= adx) {
2057 err -= adx;
2058 y += sy;
2059 } else
2060 y += base;
2061 LINE_OP(output[x], inverse_db_table[y&255]);
2062 }
2063 }
2064 }
2065
2066 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2067 {
2068 int k;
2069 if (rtype == 0) {
2070 int step = n / book->dimensions;
2071 for (k=0; k < step; ++k)
2072 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2073 return FALSE;
2074 } else {
2075 for (k=0; k < n; ) {
2076 if (!codebook_decode(f, book, target+offset, n-k))
2077 return FALSE;
2078 k += book->dimensions;
2079 offset += book->dimensions;
2080 }
2081 }
2082 return TRUE;
2083 }
2084
2085 // n is 1/2 of the blocksize --
2086 // specification: "Correct per-vector decode length is [n]/2"
2087 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2088 {
2089 int i,j,pass;
2090 Residue *r = f->residue_config + rn;
2091 int rtype = f->residue_types[rn];
2092 int c = r->classbook;
2093 int classwords = f->codebooks[c].dimensions;
2094 unsigned int actual_size = rtype == 2 ? n*2 : n;
2095 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2096 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2097 int n_read = limit_r_end - limit_r_begin;
2098 int part_read = n_read / r->part_size;
2099 int temp_alloc_point = temp_alloc_save(f);
2100 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2101 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2102 #else
2103 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2104 #endif
2105
2106 CHECK(f);
2107
2108 for (i=0; i < ch; ++i)
2109 if (!do_not_decode[i])
2110 memset(residue_buffers[i], 0, sizeof(float) * n);
2111
2112 if (rtype == 2 && ch != 1) {
2113 for (j=0; j < ch; ++j)
2114 if (!do_not_decode[j])
2115 break;
2116 if (j == ch)
2117 goto done;
2118
2119 for (pass=0; pass < 8; ++pass) {
2120 int pcount = 0, class_set = 0;
2121 if (ch == 2) {
2122 while (pcount < part_read) {
2123 int z = r->begin + pcount*r->part_size;
2124 int c_inter = (z & 1), p_inter = z>>1;
2125 if (pass == 0) {
2126 Codebook *c = f->codebooks+r->classbook;
2127 int q;
2128 DECODE(q,f,c);
2129 if (q == EOP) goto done;
2130 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2131 part_classdata[0][class_set] = r->classdata[q];
2132 #else
2133 for (i=classwords-1; i >= 0; --i) {
2134 classifications[0][i+pcount] = q % r->classifications;
2135 q /= r->classifications;
2136 }
2137 #endif
2138 }
2139 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2140 int z = r->begin + pcount*r->part_size;
2141 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2142 int c = part_classdata[0][class_set][i];
2143 #else
2144 int c = classifications[0][pcount];
2145 #endif
2146 int b = r->residue_books[c][pass];
2147 if (b >= 0) {
2148 Codebook *book = f->codebooks + b;
2149 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2150 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2151 goto done;
2152 #else
2153 // saves 1%
2154 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2155 goto done;
2156 #endif
2157 } else {
2158 z += r->part_size;
2159 c_inter = z & 1;
2160 p_inter = z >> 1;
2161 }
2162 }
2163 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2164 ++class_set;
2165 #endif
2166 }
2167 } else if (ch > 2) {
2168 while (pcount < part_read) {
2169 int z = r->begin + pcount*r->part_size;
2170 int c_inter = z % ch, p_inter = z/ch;
2171 if (pass == 0) {
2172 Codebook *c = f->codebooks+r->classbook;
2173 int q;
2174 DECODE(q,f,c);
2175 if (q == EOP) goto done;
2176 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2177 part_classdata[0][class_set] = r->classdata[q];
2178 #else
2179 for (i=classwords-1; i >= 0; --i) {
2180 classifications[0][i+pcount] = q % r->classifications;
2181 q /= r->classifications;
2182 }
2183 #endif
2184 }
2185 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2186 int z = r->begin + pcount*r->part_size;
2187 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2188 int c = part_classdata[0][class_set][i];
2189 #else
2190 int c = classifications[0][pcount];
2191 #endif
2192 int b = r->residue_books[c][pass];
2193 if (b >= 0) {
2194 Codebook *book = f->codebooks + b;
2195 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2196 goto done;
2197 } else {
2198 z += r->part_size;
2199 c_inter = z % ch;
2200 p_inter = z / ch;
2201 }
2202 }
2203 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2204 ++class_set;
2205 #endif
2206 }
2207 }
2208 }
2209 goto done;
2210 }
2211 CHECK(f);
2212
2213 for (pass=0; pass < 8; ++pass) {
2214 int pcount = 0, class_set=0;
2215 while (pcount < part_read) {
2216 if (pass == 0) {
2217 for (j=0; j < ch; ++j) {
2218 if (!do_not_decode[j]) {
2219 Codebook *c = f->codebooks+r->classbook;
2220 int temp;
2221 DECODE(temp,f,c);
2222 if (temp == EOP) goto done;
2223 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2224 part_classdata[j][class_set] = r->classdata[temp];
2225 #else
2226 for (i=classwords-1; i >= 0; --i) {
2227 classifications[j][i+pcount] = temp % r->classifications;
2228 temp /= r->classifications;
2229 }
2230 #endif
2231 }
2232 }
2233 }
2234 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2235 for (j=0; j < ch; ++j) {
2236 if (!do_not_decode[j]) {
2237 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2238 int c = part_classdata[j][class_set][i];
2239 #else
2240 int c = classifications[j][pcount];
2241 #endif
2242 int b = r->residue_books[c][pass];
2243 if (b >= 0) {
2244 float *target = residue_buffers[j];
2245 int offset = r->begin + pcount * r->part_size;
2246 int n = r->part_size;
2247 Codebook *book = f->codebooks + b;
2248 if (!residue_decode(f, book, target, offset, n, rtype))
2249 goto done;
2250 }
2251 }
2252 }
2253 }
2254 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2255 ++class_set;
2256 #endif
2257 }
2258 }
2259 done:
2260 CHECK(f);
2261 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2262 temp_free(f,part_classdata);
2263 #else
2264 temp_free(f,classifications);
2265 #endif
2266 temp_alloc_restore(f,temp_alloc_point);
2267 }
2268
2269
2270 #if 0
2271 // slow way for debugging
2272 void inverse_mdct_slow(float *buffer, int n)
2273 {
2274 int i,j;
2275 int n2 = n >> 1;
2276 float *x = (float *) malloc(sizeof(*x) * n2);
2277 memcpy(x, buffer, sizeof(*x) * n2);
2278 for (i=0; i < n; ++i) {
2279 float acc = 0;
2280 for (j=0; j < n2; ++j)
2281 // formula from paper:
2282 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2283 // formula from wikipedia
2284 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2285 // these are equivalent, except the formula from the paper inverts the multiplier!
2286 // however, what actually works is NO MULTIPLIER!?!
2287 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2288 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2289 buffer[i] = acc;
2290 }
2291 free(x);
2292 }
2293 #elif 0
2294 // same as above, but just barely able to run in real time on modern machines
2295 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2296 {
2297 float mcos[16384];
2298 int i,j;
2299 int n2 = n >> 1, nmask = (n << 2) -1;
2300 float *x = (float *) malloc(sizeof(*x) * n2);
2301 memcpy(x, buffer, sizeof(*x) * n2);
2302 for (i=0; i < 4*n; ++i)
2303 mcos[i] = (float) cos(M_PI / 2 * i / n);
2304
2305 for (i=0; i < n; ++i) {
2306 float acc = 0;
2307 for (j=0; j < n2; ++j)
2308 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2309 buffer[i] = acc;
2310 }
2311 free(x);
2312 }
2313 #elif 0
2314 // transform to use a slow dct-iv; this is STILL basically trivial,
2315 // but only requires half as many ops
2316 void dct_iv_slow(float *buffer, int n)
2317 {
2318 float mcos[16384];
2319 float x[2048];
2320 int i,j;
2321 int n2 = n >> 1, nmask = (n << 3) - 1;
2322 memcpy(x, buffer, sizeof(*x) * n);
2323 for (i=0; i < 8*n; ++i)
2324 mcos[i] = (float) cos(M_PI / 4 * i / n);
2325 for (i=0; i < n; ++i) {
2326 float acc = 0;
2327 for (j=0; j < n; ++j)
2328 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2329 buffer[i] = acc;
2330 }
2331 }
2332
2333 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2334 {
2335 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2336 float temp[4096];
2337
2338 memcpy(temp, buffer, n2 * sizeof(float));
2339 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2340
2341 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2342 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2343 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2344 }
2345 #endif
2346
2347 #ifndef LIBVORBIS_MDCT
2348 #define LIBVORBIS_MDCT 0
2349 #endif
2350
2351 #if LIBVORBIS_MDCT
2352 // directly call the vorbis MDCT using an interface documented
2353 // by Jeff Roberts... useful for performance comparison
2354 typedef struct
2355 {
2356 int n;
2357 int log2n;
2358
2359 float *trig;
2360 int *bitrev;
2361
2362 float scale;
2363 } mdct_lookup;
2364
2365 extern void mdct_init(mdct_lookup *lookup, int n);
2366 extern void mdct_clear(mdct_lookup *l);
2367 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2368
2369 mdct_lookup M1,M2;
2370
2371 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2372 {
2373 mdct_lookup *M;
2374 if (M1.n == n) M = &M1;
2375 else if (M2.n == n) M = &M2;
2376 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2377 else {
2378 if (M2.n) __asm int 3;
2379 mdct_init(&M2, n);
2380 M = &M2;
2381 }
2382
2383 mdct_backward(M, buffer, buffer);
2384 }
2385 #endif
2386
2387
2388 // the following were split out into separate functions while optimizing;
2389 // they could be pushed back up but eh. __forceinline showed no change;
2390 // they're probably already being inlined.
2391 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2392 {
2393 float *ee0 = e + i_off;
2394 float *ee2 = ee0 + k_off;
2395 int i;
2396
2397 assert((n & 3) == 0);
2398 for (i=(n>>2); i > 0; --i) {
2399 float k00_20, k01_21;
2400 k00_20 = ee0[ 0] - ee2[ 0];
2401 k01_21 = ee0[-1] - ee2[-1];
2402 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2403 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2404 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2405 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2406 A += 8;
2407
2408 k00_20 = ee0[-2] - ee2[-2];
2409 k01_21 = ee0[-3] - ee2[-3];
2410 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2411 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2412 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2413 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2414 A += 8;
2415
2416 k00_20 = ee0[-4] - ee2[-4];
2417 k01_21 = ee0[-5] - ee2[-5];
2418 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2419 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2420 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2421 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2422 A += 8;
2423
2424 k00_20 = ee0[-6] - ee2[-6];
2425 k01_21 = ee0[-7] - ee2[-7];
2426 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2427 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2428 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2429 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2430 A += 8;
2431 ee0 -= 8;
2432 ee2 -= 8;
2433 }
2434 }
2435
2436 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2437 {
2438 int i;
2439 float k00_20, k01_21;
2440
2441 float *e0 = e + d0;
2442 float *e2 = e0 + k_off;
2443
2444 for (i=lim >> 2; i > 0; --i) {
2445 k00_20 = e0[-0] - e2[-0];
2446 k01_21 = e0[-1] - e2[-1];
2447 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2448 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2449 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2450 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2451
2452 A += k1;
2453
2454 k00_20 = e0[-2] - e2[-2];
2455 k01_21 = e0[-3] - e2[-3];
2456 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2457 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2458 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2459 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2460
2461 A += k1;
2462
2463 k00_20 = e0[-4] - e2[-4];
2464 k01_21 = e0[-5] - e2[-5];
2465 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2466 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2467 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2468 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2469
2470 A += k1;
2471
2472 k00_20 = e0[-6] - e2[-6];
2473 k01_21 = e0[-7] - e2[-7];
2474 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2475 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2476 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2477 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2478
2479 e0 -= 8;
2480 e2 -= 8;
2481
2482 A += k1;
2483 }
2484 }
2485
2486 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2487 {
2488 int i;
2489 float A0 = A[0];
2490 float A1 = A[0+1];
2491 float A2 = A[0+a_off];
2492 float A3 = A[0+a_off+1];
2493 float A4 = A[0+a_off*2+0];
2494 float A5 = A[0+a_off*2+1];
2495 float A6 = A[0+a_off*3+0];
2496 float A7 = A[0+a_off*3+1];
2497
2498 float k00,k11;
2499
2500 float *ee0 = e +i_off;
2501 float *ee2 = ee0+k_off;
2502
2503 for (i=n; i > 0; --i) {
2504 k00 = ee0[ 0] - ee2[ 0];
2505 k11 = ee0[-1] - ee2[-1];
2506 ee0[ 0] = ee0[ 0] + ee2[ 0];
2507 ee0[-1] = ee0[-1] + ee2[-1];
2508 ee2[ 0] = (k00) * A0 - (k11) * A1;
2509 ee2[-1] = (k11) * A0 + (k00) * A1;
2510
2511 k00 = ee0[-2] - ee2[-2];
2512 k11 = ee0[-3] - ee2[-3];
2513 ee0[-2] = ee0[-2] + ee2[-2];
2514 ee0[-3] = ee0[-3] + ee2[-3];
2515 ee2[-2] = (k00) * A2 - (k11) * A3;
2516 ee2[-3] = (k11) * A2 + (k00) * A3;
2517
2518 k00 = ee0[-4] - ee2[-4];
2519 k11 = ee0[-5] - ee2[-5];
2520 ee0[-4] = ee0[-4] + ee2[-4];
2521 ee0[-5] = ee0[-5] + ee2[-5];
2522 ee2[-4] = (k00) * A4 - (k11) * A5;
2523 ee2[-5] = (k11) * A4 + (k00) * A5;
2524
2525 k00 = ee0[-6] - ee2[-6];
2526 k11 = ee0[-7] - ee2[-7];
2527 ee0[-6] = ee0[-6] + ee2[-6];
2528 ee0[-7] = ee0[-7] + ee2[-7];
2529 ee2[-6] = (k00) * A6 - (k11) * A7;
2530 ee2[-7] = (k11) * A6 + (k00) * A7;
2531
2532 ee0 -= k0;
2533 ee2 -= k0;
2534 }
2535 }
2536
2537 static __forceinline void iter_54(float *z)
2538 {
2539 float k00,k11,k22,k33;
2540 float y0,y1,y2,y3;
2541
2542 k00 = z[ 0] - z[-4];
2543 y0 = z[ 0] + z[-4];
2544 y2 = z[-2] + z[-6];
2545 k22 = z[-2] - z[-6];
2546
2547 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2548 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2549
2550 // done with y0,y2
2551
2552 k33 = z[-3] - z[-7];
2553
2554 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2555 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2556
2557 // done with k33
2558
2559 k11 = z[-1] - z[-5];
2560 y1 = z[-1] + z[-5];
2561 y3 = z[-3] + z[-7];
2562
2563 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2564 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2565 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2566 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2567 }
2568
2569 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2570 {
2571 int a_off = base_n >> 3;
2572 float A2 = A[0+a_off];
2573 float *z = e + i_off;
2574 float *base = z - 16 * n;
2575
2576 while (z > base) {
2577 float k00,k11;
2578
2579 k00 = z[-0] - z[-8];
2580 k11 = z[-1] - z[-9];
2581 z[-0] = z[-0] + z[-8];
2582 z[-1] = z[-1] + z[-9];
2583 z[-8] = k00;
2584 z[-9] = k11 ;
2585
2586 k00 = z[ -2] - z[-10];
2587 k11 = z[ -3] - z[-11];
2588 z[ -2] = z[ -2] + z[-10];
2589 z[ -3] = z[ -3] + z[-11];
2590 z[-10] = (k00+k11) * A2;
2591 z[-11] = (k11-k00) * A2;
2592
2593 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2594 k11 = z[ -5] - z[-13];
2595 z[ -4] = z[ -4] + z[-12];
2596 z[ -5] = z[ -5] + z[-13];
2597 z[-12] = k11;
2598 z[-13] = k00;
2599
2600 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2601 k11 = z[ -7] - z[-15];
2602 z[ -6] = z[ -6] + z[-14];
2603 z[ -7] = z[ -7] + z[-15];
2604 z[-14] = (k00+k11) * A2;
2605 z[-15] = (k00-k11) * A2;
2606
2607 iter_54(z);
2608 iter_54(z-8);
2609 z -= 16;
2610 }
2611 }
2612
2613 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2614 {
2615 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2616 int ld;
2617 // @OPTIMIZE: reduce register pressure by using fewer variables?
2618 int save_point = temp_alloc_save(f);
2619 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2620 float *u=NULL,*v=NULL;
2621 // twiddle factors
2622 float *A = f->A[blocktype];
2623
2624 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2625 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2626
2627 // kernel from paper
2628
2629
2630 // merged:
2631 // copy and reflect spectral data
2632 // step 0
2633
2634 // note that it turns out that the items added together during
2635 // this step are, in fact, being added to themselves (as reflected
2636 // by step 0). inexplicable inefficiency! this became obvious
2637 // once I combined the passes.
2638
2639 // so there's a missing 'times 2' here (for adding X to itself).
2640 // this propagates through linearly to the end, where the numbers
2641 // are 1/2 too small, and need to be compensated for.
2642
2643 {
2644 float *d,*e, *AA, *e_stop;
2645 d = &buf2[n2-2];
2646 AA = A;
2647 e = &buffer[0];
2648 e_stop = &buffer[n2];
2649 while (e != e_stop) {
2650 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2651 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2652 d -= 2;
2653 AA += 2;
2654 e += 4;
2655 }
2656
2657 e = &buffer[n2-3];
2658 while (d >= buf2) {
2659 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2660 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2661 d -= 2;
2662 AA += 2;
2663 e -= 4;
2664 }
2665 }
2666
2667 // now we use symbolic names for these, so that we can
2668 // possibly swap their meaning as we change which operations
2669 // are in place
2670
2671 u = buffer;
2672 v = buf2;
2673
2674 // step 2 (paper output is w, now u)
2675 // this could be in place, but the data ends up in the wrong
2676 // place... _somebody_'s got to swap it, so this is nominated
2677 {
2678 float *AA = &A[n2-8];
2679 float *d0,*d1, *e0, *e1;
2680
2681 e0 = &v[n4];
2682 e1 = &v[0];
2683
2684 d0 = &u[n4];
2685 d1 = &u[0];
2686
2687 while (AA >= A) {
2688 float v40_20, v41_21;
2689
2690 v41_21 = e0[1] - e1[1];
2691 v40_20 = e0[0] - e1[0];
2692 d0[1] = e0[1] + e1[1];
2693 d0[0] = e0[0] + e1[0];
2694 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2695 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2696
2697 v41_21 = e0[3] - e1[3];
2698 v40_20 = e0[2] - e1[2];
2699 d0[3] = e0[3] + e1[3];
2700 d0[2] = e0[2] + e1[2];
2701 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2702 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2703
2704 AA -= 8;
2705
2706 d0 += 4;
2707 d1 += 4;
2708 e0 += 4;
2709 e1 += 4;
2710 }
2711 }
2712
2713 // step 3
2714 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2715
2716 // optimized step 3:
2717
2718 // the original step3 loop can be nested r inside s or s inside r;
2719 // it's written originally as s inside r, but this is dumb when r
2720 // iterates many times, and s few. So I have two copies of it and
2721 // switch between them halfway.
2722
2723 // this is iteration 0 of step 3
2724 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2725 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2726
2727 // this is iteration 1 of step 3
2728 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2729 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2730 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2731 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2732
2733 l=2;
2734 for (; l < (ld-3)>>1; ++l) {
2735 int k0 = n >> (l+2), k0_2 = k0>>1;
2736 int lim = 1 << (l+1);
2737 int i;
2738 for (i=0; i < lim; ++i)
2739 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2740 }
2741
2742 for (; l < ld-6; ++l) {
2743 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2744 int rlim = n >> (l+6), r;
2745 int lim = 1 << (l+1);
2746 int i_off;
2747 float *A0 = A;
2748 i_off = n2-1;
2749 for (r=rlim; r > 0; --r) {
2750 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2751 A0 += k1*4;
2752 i_off -= 8;
2753 }
2754 }
2755
2756 // iterations with count:
2757 // ld-6,-5,-4 all interleaved together
2758 // the big win comes from getting rid of needless flops
2759 // due to the constants on pass 5 & 4 being all 1 and 0;
2760 // combining them to be simultaneous to improve cache made little difference
2761 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2762
2763 // output is u
2764
2765 // step 4, 5, and 6
2766 // cannot be in-place because of step 5
2767 {
2768 uint16 *bitrev = f->bit_reverse[blocktype];
2769 // weirdly, I'd have thought reading sequentially and writing
2770 // erratically would have been better than vice-versa, but in
2771 // fact that's not what my testing showed. (That is, with
2772 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2773
2774 float *d0 = &v[n4-4];
2775 float *d1 = &v[n2-4];
2776 while (d0 >= v) {
2777 int k4;
2778
2779 k4 = bitrev[0];
2780 d1[3] = u[k4+0];
2781 d1[2] = u[k4+1];
2782 d0[3] = u[k4+2];
2783 d0[2] = u[k4+3];
2784
2785 k4 = bitrev[1];
2786 d1[1] = u[k4+0];
2787 d1[0] = u[k4+1];
2788 d0[1] = u[k4+2];
2789 d0[0] = u[k4+3];
2790
2791 d0 -= 4;
2792 d1 -= 4;
2793 bitrev += 2;
2794 }
2795 }
2796 // (paper output is u, now v)
2797
2798
2799 // data must be in buf2
2800 assert(v == buf2);
2801
2802 // step 7 (paper output is v, now v)
2803 // this is now in place
2804 {
2805 float *C = f->C[blocktype];
2806 float *d, *e;
2807
2808 d = v;
2809 e = v + n2 - 4;
2810
2811 while (d < e) {
2812 float a02,a11,b0,b1,b2,b3;
2813
2814 a02 = d[0] - e[2];
2815 a11 = d[1] + e[3];
2816
2817 b0 = C[1]*a02 + C[0]*a11;
2818 b1 = C[1]*a11 - C[0]*a02;
2819
2820 b2 = d[0] + e[ 2];
2821 b3 = d[1] - e[ 3];
2822
2823 d[0] = b2 + b0;
2824 d[1] = b3 + b1;
2825 e[2] = b2 - b0;
2826 e[3] = b1 - b3;
2827
2828 a02 = d[2] - e[0];
2829 a11 = d[3] + e[1];
2830
2831 b0 = C[3]*a02 + C[2]*a11;
2832 b1 = C[3]*a11 - C[2]*a02;
2833
2834 b2 = d[2] + e[ 0];
2835 b3 = d[3] - e[ 1];
2836
2837 d[2] = b2 + b0;
2838 d[3] = b3 + b1;
2839 e[0] = b2 - b0;
2840 e[1] = b1 - b3;
2841
2842 C += 4;
2843 d += 4;
2844 e -= 4;
2845 }
2846 }
2847
2848 // data must be in buf2
2849
2850
2851 // step 8+decode (paper output is X, now buffer)
2852 // this generates pairs of data a la 8 and pushes them directly through
2853 // the decode kernel (pushing rather than pulling) to avoid having
2854 // to make another pass later
2855
2856 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2857
2858 {
2859 float *d0,*d1,*d2,*d3;
2860
2861 float *B = f->B[blocktype] + n2 - 8;
2862 float *e = buf2 + n2 - 8;
2863 d0 = &buffer[0];
2864 d1 = &buffer[n2-4];
2865 d2 = &buffer[n2];
2866 d3 = &buffer[n-4];
2867 while (e >= v) {
2868 float p0,p1,p2,p3;
2869
2870 p3 = e[6]*B[7] - e[7]*B[6];
2871 p2 = -e[6]*B[6] - e[7]*B[7];
2872
2873 d0[0] = p3;
2874 d1[3] = - p3;
2875 d2[0] = p2;
2876 d3[3] = p2;
2877
2878 p1 = e[4]*B[5] - e[5]*B[4];
2879 p0 = -e[4]*B[4] - e[5]*B[5];
2880
2881 d0[1] = p1;
2882 d1[2] = - p1;
2883 d2[1] = p0;
2884 d3[2] = p0;
2885
2886 p3 = e[2]*B[3] - e[3]*B[2];
2887 p2 = -e[2]*B[2] - e[3]*B[3];
2888
2889 d0[2] = p3;
2890 d1[1] = - p3;
2891 d2[2] = p2;
2892 d3[1] = p2;
2893
2894 p1 = e[0]*B[1] - e[1]*B[0];
2895 p0 = -e[0]*B[0] - e[1]*B[1];
2896
2897 d0[3] = p1;
2898 d1[0] = - p1;
2899 d2[3] = p0;
2900 d3[0] = p0;
2901
2902 B -= 8;
2903 e -= 8;
2904 d0 += 4;
2905 d2 += 4;
2906 d1 -= 4;
2907 d3 -= 4;
2908 }
2909 }
2910
2911 temp_free(f,buf2);
2912 temp_alloc_restore(f,save_point);
2913 }
2914
2915 #if 0
2916 // this is the original version of the above code, if you want to optimize it from scratch
2917 void inverse_mdct_naive(float *buffer, int n)
2918 {
2919 float s;
2920 float A[1 << 12], B[1 << 12], C[1 << 11];
2921 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2922 int n3_4 = n - n4, ld;
2923 // how can they claim this only uses N words?!
2924 // oh, because they're only used sparsely, whoops
2925 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2926 // set up twiddle factors
2927
2928 for (k=k2=0; k < n4; ++k,k2+=2) {
2929 A[k2 ] = (float) cos(4*k*M_PI/n);
2930 A[k2+1] = (float) -sin(4*k*M_PI/n);
2931 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2932 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2933 }
2934 for (k=k2=0; k < n8; ++k,k2+=2) {
2935 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2936 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2937 }
2938
2939 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2940 // Note there are bugs in that pseudocode, presumably due to them attempting
2941 // to rename the arrays nicely rather than representing the way their actual
2942 // implementation bounces buffers back and forth. As a result, even in the
2943 // "some formulars corrected" version, a direct implementation fails. These
2944 // are noted below as "paper bug".
2945
2946 // copy and reflect spectral data
2947 for (k=0; k < n2; ++k) u[k] = buffer[k];
2948 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2949 // kernel from paper
2950 // step 1
2951 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2952 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2953 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2954 }
2955 // step 2
2956 for (k=k4=0; k < n8; k+=1, k4+=4) {
2957 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2958 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2959 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2960 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2961 }
2962 // step 3
2963 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2964 for (l=0; l < ld-3; ++l) {
2965 int k0 = n >> (l+2), k1 = 1 << (l+3);
2966 int rlim = n >> (l+4), r4, r;
2967 int s2lim = 1 << (l+2), s2;
2968 for (r=r4=0; r < rlim; r4+=4,++r) {
2969 for (s2=0; s2 < s2lim; s2+=2) {
2970 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2971 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2972 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2973 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2974 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2975 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2976 }
2977 }
2978 if (l+1 < ld-3) {
2979 // paper bug: ping-ponging of u&w here is omitted
2980 memcpy(w, u, sizeof(u));
2981 }
2982 }
2983
2984 // step 4
2985 for (i=0; i < n8; ++i) {
2986 int j = bit_reverse(i) >> (32-ld+3);
2987 assert(j < n8);
2988 if (i == j) {
2989 // paper bug: original code probably swapped in place; if copying,
2990 // need to directly copy in this case
2991 int i8 = i << 3;
2992 v[i8+1] = u[i8+1];
2993 v[i8+3] = u[i8+3];
2994 v[i8+5] = u[i8+5];
2995 v[i8+7] = u[i8+7];
2996 } else if (i < j) {
2997 int i8 = i << 3, j8 = j << 3;
2998 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2999 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3000 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3001 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3002 }
3003 }
3004 // step 5
3005 for (k=0; k < n2; ++k) {
3006 w[k] = v[k*2+1];
3007 }
3008 // step 6
3009 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3010 u[n-1-k2] = w[k4];
3011 u[n-2-k2] = w[k4+1];
3012 u[n3_4 - 1 - k2] = w[k4+2];
3013 u[n3_4 - 2 - k2] = w[k4+3];
3014 }
3015 // step 7
3016 for (k=k2=0; k < n8; ++k, k2 += 2) {
3017 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3018 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3019 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3020 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3021 }
3022 // step 8
3023 for (k=k2=0; k < n4; ++k,k2 += 2) {
3024 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3025 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3026 }
3027
3028 // decode kernel to output
3029 // determined the following value experimentally
3030 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3031 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3032 s = 0.5; // theoretically would be n4
3033
3034 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3035 // so it needs to use the "old" B values to behave correctly, or else
3036 // set s to 1.0 ]]]
3037 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3038 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3039 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3040 }
3041 #endif
3042
3043 static float *get_window(vorb *f, int len)
3044 {
3045 len <<= 1;
3046 if (len == f->blocksize_0) return f->window[0];
3047 if (len == f->blocksize_1) return f->window[1];
3048 return NULL;
3049 }
3050
3051 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3052 typedef int16 YTYPE;
3053 #else
3054 typedef int YTYPE;
3055 #endif
3056 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3057 {
3058 int n2 = n >> 1;
3059 int s = map->chan[i].mux, floor;
3060 floor = map->submap_floor[s];
3061 if (f->floor_types[floor] == 0) {
3062 return error(f, VORBIS_invalid_stream);
3063 } else {
3064 Floor1 *g = &f->floor_config[floor].floor1;
3065 int j,q;
3066 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3067 for (q=1; q < g->values; ++q) {
3068 j = g->sorted_order[q];
3069 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3070 if (finalY[j] >= 0)
3071 #else
3072 if (step2_flag[j])
3073 #endif
3074 {
3075 int hy = finalY[j] * g->floor1_multiplier;
3076 int hx = g->Xlist[j];
3077 if (lx != hx)
3078 draw_line(target, lx,ly, hx,hy, n2);
3079 CHECK(f);
3080 lx = hx, ly = hy;
3081 }
3082 }
3083 if (lx < n2) {
3084 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3085 for (j=lx; j < n2; ++j)
3086 LINE_OP(target[j], inverse_db_table[ly]);
3087 CHECK(f);
3088 }
3089 }
3090 return TRUE;
3091 }
3092
3093 // The meaning of "left" and "right"
3094 //
3095 // For a given frame:
3096 // we compute samples from 0..n
3097 // window_center is n/2
3098 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3099 // all of the samples from left_end to right_start can be output without mixing; however,
3100 // this interval is 0-length except when transitioning between short and long frames
3101 // all of the samples from right_start to right_end need to be mixed with the next frame,
3102 // which we don't have, so those get saved in a buffer
3103 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3104 // has to be the same as frame N+1's left_end-left_start (which they are by
3105 // construction)
3106
3107 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3108 {
3109 Mode *m;
3110 int i, n, prev, next, window_center;
3111 f->channel_buffer_start = f->channel_buffer_end = 0;
3112
3113 retry:
3114 if (f->eof) return FALSE;
3115 if (!maybe_start_packet(f))
3116 return FALSE;
3117 // check packet type
3118 if (get_bits(f,1) != 0) {
3119 if (IS_PUSH_MODE(f))
3120 return error(f,VORBIS_bad_packet_type);
3121 while (EOP != get8_packet(f));
3122 goto retry;
3123 }
3124
3125 if (f->alloc.alloc_buffer)
3126 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3127
3128 i = get_bits(f, ilog(f->mode_count-1));
3129 if (i == EOP) return FALSE;
3130 if (i >= f->mode_count) return FALSE;
3131 *mode = i;
3132 m = f->mode_config + i;
3133 if (m->blockflag) {
3134 n = f->blocksize_1;
3135 prev = get_bits(f,1);
3136 next = get_bits(f,1);
3137 } else {
3138 prev = next = 0;
3139 n = f->blocksize_0;
3140 }
3141
3142 // WINDOWING
3143
3144 window_center = n >> 1;
3145 if (m->blockflag && !prev) {
3146 *p_left_start = (n - f->blocksize_0) >> 2;
3147 *p_left_end = (n + f->blocksize_0) >> 2;
3148 } else {
3149 *p_left_start = 0;
3150 *p_left_end = window_center;
3151 }
3152 if (m->blockflag && !next) {
3153 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3154 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3155 } else {
3156 *p_right_start = window_center;
3157 *p_right_end = n;
3158 }
3159
3160 return TRUE;
3161 }
3162
3163 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3164 {
3165 Mapping *map;
3166 int i,j,k,n,n2;
3167 int zero_channel[256];
3168 int really_zero_channel[256];
3169
3170 // WINDOWING
3171
3172 n = f->blocksize[m->blockflag];
3173 map = &f->mapping[m->mapping];
3174
3175 // FLOORS
3176 n2 = n >> 1;
3177
3178 CHECK(f);
3179
3180 for (i=0; i < f->channels; ++i) {
3181 int s = map->chan[i].mux, floor;
3182 zero_channel[i] = FALSE;
3183 floor = map->submap_floor[s];
3184 if (f->floor_types[floor] == 0) {
3185 return error(f, VORBIS_invalid_stream);
3186 } else {
3187 Floor1 *g = &f->floor_config[floor].floor1;
3188 if (get_bits(f, 1)) {
3189 short *finalY;
3190 uint8 step2_flag[256];
3191 static int range_list[4] = { 256, 128, 86, 64 };
3192 int range = range_list[g->floor1_multiplier-1];
3193 int offset = 2;
3194 finalY = f->finalY[i];
3195 finalY[0] = get_bits(f, ilog(range)-1);
3196 finalY[1] = get_bits(f, ilog(range)-1);
3197 for (j=0; j < g->partitions; ++j) {
3198 int pclass = g->partition_class_list[j];
3199 int cdim = g->class_dimensions[pclass];
3200 int cbits = g->class_subclasses[pclass];
3201 int csub = (1 << cbits)-1;
3202 int cval = 0;
3203 if (cbits) {
3204 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3205 DECODE(cval,f,c);
3206 }
3207 for (k=0; k < cdim; ++k) {
3208 int book = g->subclass_books[pclass][cval & csub];
3209 cval = cval >> cbits;
3210 if (book >= 0) {
3211 int temp;
3212 Codebook *c = f->codebooks + book;
3213 DECODE(temp,f,c);
3214 finalY[offset++] = temp;
3215 } else
3216 finalY[offset++] = 0;
3217 }
3218 }
3219 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3220 step2_flag[0] = step2_flag[1] = 1;
3221 for (j=2; j < g->values; ++j) {
3222 int low, high, pred, highroom, lowroom, room, val;
3223 low = g->neighbors[j][0];
3224 high = g->neighbors[j][1];
3225 //neighbors(g->Xlist, j, &low, &high);
3226 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3227 val = finalY[j];
3228 highroom = range - pred;
3229 lowroom = pred;
3230 if (highroom < lowroom)
3231 room = highroom * 2;
3232 else
3233 room = lowroom * 2;
3234 if (val) {
3235 step2_flag[low] = step2_flag[high] = 1;
3236 step2_flag[j] = 1;
3237 if (val >= room)
3238 if (highroom > lowroom)
3239 finalY[j] = val - lowroom + pred;
3240 else
3241 finalY[j] = pred - val + highroom - 1;
3242 else
3243 if (val & 1)
3244 finalY[j] = pred - ((val+1)>>1);
3245 else
3246 finalY[j] = pred + (val>>1);
3247 } else {
3248 step2_flag[j] = 0;
3249 finalY[j] = pred;
3250 }
3251 }
3252
3253 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3254 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3255 #else
3256 // defer final floor computation until _after_ residue
3257 for (j=0; j < g->values; ++j) {
3258 if (!step2_flag[j])
3259 finalY[j] = -1;
3260 }
3261 #endif
3262 } else {
3263 error:
3264 zero_channel[i] = TRUE;
3265 }
3266 // So we just defer everything else to later
3267
3268 // at this point we've decoded the floor into buffer
3269 }
3270 }
3271 CHECK(f);
3272 // at this point we've decoded all floors
3273
3274 if (f->alloc.alloc_buffer)
3275 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3276
3277 // re-enable coupled channels if necessary
3278 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3279 for (i=0; i < map->coupling_steps; ++i)
3280 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3281 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3282 }
3283
3284 CHECK(f);
3285 // RESIDUE DECODE
3286 for (i=0; i < map->submaps; ++i) {
3287 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3288 int r;
3289 uint8 do_not_decode[256];
3290 int ch = 0;
3291 for (j=0; j < f->channels; ++j) {
3292 if (map->chan[j].mux == i) {
3293 if (zero_channel[j]) {
3294 do_not_decode[ch] = TRUE;
3295 residue_buffers[ch] = NULL;
3296 } else {
3297 do_not_decode[ch] = FALSE;
3298 residue_buffers[ch] = f->channel_buffers[j];
3299 }
3300 ++ch;
3301 }
3302 }
3303 r = map->submap_residue[i];
3304 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3305 }
3306
3307 if (f->alloc.alloc_buffer)
3308 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3309 CHECK(f);
3310
3311 // INVERSE COUPLING
3312 for (i = map->coupling_steps-1; i >= 0; --i) {
3313 int n2 = n >> 1;
3314 float *m = f->channel_buffers[map->chan[i].magnitude];
3315 float *a = f->channel_buffers[map->chan[i].angle ];
3316 for (j=0; j < n2; ++j) {
3317 float a2,m2;
3318 if (m[j] > 0)
3319 if (a[j] > 0)
3320 m2 = m[j], a2 = m[j] - a[j];
3321 else
3322 a2 = m[j], m2 = m[j] + a[j];
3323 else
3324 if (a[j] > 0)
3325 m2 = m[j], a2 = m[j] + a[j];
3326 else
3327 a2 = m[j], m2 = m[j] - a[j];
3328 m[j] = m2;
3329 a[j] = a2;
3330 }
3331 }
3332 CHECK(f);
3333
3334 // finish decoding the floors
3335 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3336 for (i=0; i < f->channels; ++i) {
3337 if (really_zero_channel[i]) {
3338 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3339 } else {
3340 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3341 }
3342 }
3343 #else
3344 for (i=0; i < f->channels; ++i) {
3345 if (really_zero_channel[i]) {
3346 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3347 } else {
3348 for (j=0; j < n2; ++j)
3349 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3350 }
3351 }
3352 #endif
3353
3354 // INVERSE MDCT
3355 CHECK(f);
3356 for (i=0; i < f->channels; ++i)
3357 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3358 CHECK(f);
3359
3360 // this shouldn't be necessary, unless we exited on an error
3361 // and want to flush to get to the next packet
3362 flush_packet(f);
3363
3364 if (f->first_decode) {
3365 // assume we start so first non-discarded sample is sample 0
3366 // this isn't to spec, but spec would require us to read ahead
3367 // and decode the size of all current frames--could be done,
3368 // but presumably it's not a commonly used feature
3369 f->current_loc = -n2; // start of first frame is positioned for discard
3370 // we might have to discard samples "from" the next frame too,
3371 // if we're lapping a large block then a small at the start?
3372 f->discard_samples_deferred = n - right_end;
3373 f->current_loc_valid = TRUE;
3374 f->first_decode = FALSE;
3375 } else if (f->discard_samples_deferred) {
3376 if (f->discard_samples_deferred >= right_start - left_start) {
3377 f->discard_samples_deferred -= (right_start - left_start);
3378 left_start = right_start;
3379 *p_left = left_start;
3380 } else {
3381 left_start += f->discard_samples_deferred;
3382 *p_left = left_start;
3383 f->discard_samples_deferred = 0;
3384 }
3385 } else if (f->previous_length == 0 && f->current_loc_valid) {
3386 // we're recovering from a seek... that means we're going to discard
3387 // the samples from this packet even though we know our position from
3388 // the last page header, so we need to update the position based on
3389 // the discarded samples here
3390 // but wait, the code below is going to add this in itself even
3391 // on a discard, so we don't need to do it here...
3392 }
3393
3394 // check if we have ogg information about the sample # for this packet
3395 if (f->last_seg_which == f->end_seg_with_known_loc) {
3396 // if we have a valid current loc, and this is final:
3397 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3398 uint32 current_end = f->known_loc_for_packet;
3399 // then let's infer the size of the (probably) short final frame
3400 if (current_end < f->current_loc + (right_end-left_start)) {
3401 if (current_end < f->current_loc) {
3402 // negative truncation, that's impossible!
3403 *len = 0;
3404 } else {
3405 *len = current_end - f->current_loc;
3406 }
3407 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3408 if (*len > right_end) *len = right_end; // this should never happen
3409 f->current_loc += *len;
3410 return TRUE;
3411 }
3412 }
3413 // otherwise, just set our sample loc
3414 // guess that the ogg granule pos refers to the _middle_ of the
3415 // last frame?
3416 // set f->current_loc to the position of left_start
3417 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3418 f->current_loc_valid = TRUE;
3419 }
3420 if (f->current_loc_valid)
3421 f->current_loc += (right_start - left_start);
3422
3423 if (f->alloc.alloc_buffer)
3424 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3425 *len = right_end; // ignore samples after the window goes to 0
3426 CHECK(f);
3427
3428 return TRUE;
3429 }
3430
3431 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3432 {
3433 int mode, left_end, right_end;
3434 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3435 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3436 }
3437
3438 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3439 {
3440 int prev,i,j;
3441 // we use right&left (the start of the right- and left-window sin()-regions)
3442 // to determine how much to return, rather than inferring from the rules
3443 // (same result, clearer code); 'left' indicates where our sin() window
3444 // starts, therefore where the previous window's right edge starts, and
3445 // therefore where to start mixing from the previous buffer. 'right'
3446 // indicates where our sin() ending-window starts, therefore that's where
3447 // we start saving, and where our returned-data ends.
3448
3449 // mixin from previous window
3450 if (f->previous_length) {
3451 int i,j, n = f->previous_length;
3452 float *w = get_window(f, n);
3453 if (w == NULL) return 0;
3454 for (i=0; i < f->channels; ++i) {
3455 for (j=0; j < n; ++j)
3456 f->channel_buffers[i][left+j] =
3457 f->channel_buffers[i][left+j]*w[ j] +
3458 f->previous_window[i][ j]*w[n-1-j];
3459 }
3460 }
3461
3462 prev = f->previous_length;
3463
3464 // last half of this data becomes previous window
3465 f->previous_length = len - right;
3466
3467 // @OPTIMIZE: could avoid this copy by double-buffering the
3468 // output (flipping previous_window with channel_buffers), but
3469 // then previous_window would have to be 2x as large, and
3470 // channel_buffers couldn't be temp mem (although they're NOT
3471 // currently temp mem, they could be (unless we want to level
3472 // performance by spreading out the computation))
3473 for (i=0; i < f->channels; ++i)
3474 for (j=0; right+j < len; ++j)
3475 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3476
3477 if (!prev)
3478 // there was no previous packet, so this data isn't valid...
3479 // this isn't entirely true, only the would-have-overlapped data
3480 // isn't valid, but this seems to be what the spec requires
3481 return 0;
3482
3483 // truncate a short frame
3484 if (len < right) right = len;
3485
3486 f->samples_output += right-left;
3487
3488 return right - left;
3489 }
3490
3491 static int vorbis_pump_first_frame(stb_vorbis *f)
3492 {
3493 int len, right, left, res;
3494 res = vorbis_decode_packet(f, &len, &left, &right);
3495 if (res)
3496 vorbis_finish_frame(f, len, left, right);
3497 return res;
3498 }
3499
3500 #ifndef STB_VORBIS_NO_PUSHDATA_API
3501 static int is_whole_packet_present(stb_vorbis *f)
3502 {
3503 // make sure that we have the packet available before continuing...
3504 // this requires a full ogg parse, but we know we can fetch from f->stream
3505
3506 // instead of coding this out explicitly, we could save the current read state,
3507 // read the next packet with get8() until end-of-packet, check f->eof, then
3508 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3509 // of state to restore (primarily the page segment table)
3510
3511 int s = f->next_seg, first = TRUE;
3512 uint8 *p = f->stream;
3513
3514 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3515 for (; s < f->segment_count; ++s) {
3516 p += f->segments[s];
3517 if (f->segments[s] < 255) // stop at first short segment
3518 break;
3519 }
3520 // either this continues, or it ends it...
3521 if (s == f->segment_count)
3522 s = -1; // set 'crosses page' flag
3523 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3524 first = FALSE;
3525 }
3526 for (; s == -1;) {
3527 uint8 *q;
3528 int n;
3529
3530 // check that we have the page header ready
3531 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3532 // validate the page
3533 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3534 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3535 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3536 if (f->previous_length)
3537 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3538 // if no previous length, we're resynching, so we can come in on a continued-packet,
3539 // which we'll just drop
3540 } else {
3541 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3542 }
3543 n = p[26]; // segment counts
3544 q = p+27; // q points to segment table
3545 p = q + n; // advance past header
3546 // make sure we've read the segment table
3547 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3548 for (s=0; s < n; ++s) {
3549 p += q[s];
3550 if (q[s] < 255)
3551 break;
3552 }
3553 if (s == n)
3554 s = -1; // set 'crosses page' flag
3555 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3556 first = FALSE;
3557 }
3558 return TRUE;
3559 }
3560 #endif // !STB_VORBIS_NO_PUSHDATA_API
3561
3562 static int start_decoder(vorb *f)
3563 {
3564 uint8 header[6], x,y;
3565 int len,i,j,k, max_submaps = 0;
3566 int longest_floorlist=0;
3567
3568 // first page, first packet
3569 f->first_decode = TRUE;
3570
3571 if (!start_page(f)) return FALSE;
3572 // validate page flag
3573 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3574 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3575 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3576 // check for expected packet length
3577 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3578 if (f->segments[0] != 30) {
3579 // check for the Ogg skeleton fishead identifying header to refine our error
3580 if (f->segments[0] == 64 &&
3581 getn(f, header, 6) &&
3582 header[0] == 'f' &&
3583 header[1] == 'i' &&
3584 header[2] == 's' &&
3585 header[3] == 'h' &&
3586 header[4] == 'e' &&
3587 header[5] == 'a' &&
3588 get8(f) == 'd' &&
3589 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3590 else
3591 return error(f, VORBIS_invalid_first_page);
3592 }
3593
3594 // read packet
3595 // check packet header
3596 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3597 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3598 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3599 // vorbis_version
3600 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3601 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3602 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3603 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3604 get32(f); // bitrate_maximum
3605 get32(f); // bitrate_nominal
3606 get32(f); // bitrate_minimum
3607 x = get8(f);
3608 {
3609 int log0,log1;
3610 log0 = x & 15;
3611 log1 = x >> 4;
3612 f->blocksize_0 = 1 << log0;
3613 f->blocksize_1 = 1 << log1;
3614 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3615 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3616 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3617 }
3618
3619 // framing_flag
3620 x = get8(f);
3621 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3622
3623 // second packet!
3624 if (!start_page(f)) return FALSE;
3625
3626 if (!start_packet(f)) return FALSE;
3627
3628 if (!next_segment(f)) return FALSE;
3629
3630 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup);
3631 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3632 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3633 //file vendor
3634 len = get32_packet(f);
3635 f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3636 for(i=0; i < len; ++i) {
3637 f->vendor[i] = get8_packet(f);
3638 }
3639 f->vendor[len] = (char)'\0';
3640 //user comments
3641 f->comment_list_length = get32_packet(f);
3642 f->comment_list = (char**)setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3643
3644 for(i=0; i < f->comment_list_length; ++i) {
3645 len = get32_packet(f);
3646 f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3647
3648 for(j=0; j < len; ++j) {
3649 f->comment_list[i][j] = get8_packet(f);
3650 }
3651 f->comment_list[i][len] = (char)'\0';
3652 }
3653
3654 // framing_flag
3655 x = get8_packet(f);
3656 if (!(x & 1)) return error(f, VORBIS_invalid_setup);
3657
3658
3659 skip(f, f->bytes_in_seg);
3660 f->bytes_in_seg = 0;
3661
3662 do {
3663 len = next_segment(f);
3664 skip(f, len);
3665 f->bytes_in_seg = 0;
3666 } while (len);
3667
3668 // third packet!
3669 if (!start_packet(f)) return FALSE;
3670
3671 #ifndef STB_VORBIS_NO_PUSHDATA_API
3672 if (IS_PUSH_MODE(f)) {
3673 if (!is_whole_packet_present(f)) {
3674 // convert error in ogg header to write type
3675 if (f->error == VORBIS_invalid_stream)
3676 f->error = VORBIS_invalid_setup;
3677 return FALSE;
3678 }
3679 }
3680 #endif
3681
3682 crc32_init(); // always init it, to avoid multithread race conditions
3683
3684 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3685 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3686 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3687
3688 // codebooks
3689
3690 f->codebook_count = get_bits(f,8) + 1;
3691 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3692 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3693 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3694 for (i=0; i < f->codebook_count; ++i) {
3695 uint32 *values;
3696 int ordered, sorted_count;
3697 int total=0;
3698 uint8 *lengths;
3699 Codebook *c = f->codebooks+i;
3700 CHECK(f);
3701 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3702 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3703 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3704 x = get_bits(f, 8);
3705 c->dimensions = (get_bits(f, 8)<<8) + x;
3706 x = get_bits(f, 8);
3707 y = get_bits(f, 8);
3708 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3709 ordered = get_bits(f,1);
3710 c->sparse = ordered ? 0 : get_bits(f,1);
3711
3712 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3713
3714 if (c->sparse)
3715 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3716 else
3717 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3718
3719 if (!lengths) return error(f, VORBIS_outofmem);
3720
3721 if (ordered) {
3722 int current_entry = 0;
3723 int current_length = get_bits(f,5) + 1;
3724 while (current_entry < c->entries) {
3725 int limit = c->entries - current_entry;
3726 int n = get_bits(f, ilog(limit));
3727 if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3728 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3729 memset(lengths + current_entry, current_length, n);
3730 current_entry += n;
3731 ++current_length;
3732 }
3733 } else {
3734 for (j=0; j < c->entries; ++j) {
3735 int present = c->sparse ? get_bits(f,1) : 1;
3736 if (present) {
3737 lengths[j] = get_bits(f, 5) + 1;
3738 ++total;
3739 if (lengths[j] == 32)
3740 return error(f, VORBIS_invalid_setup);
3741 } else {
3742 lengths[j] = NO_CODE;
3743 }
3744 }
3745 }
3746
3747 if (c->sparse && total >= c->entries >> 2) {
3748 // convert sparse items to non-sparse!
3749 if (c->entries > (int) f->setup_temp_memory_required)
3750 f->setup_temp_memory_required = c->entries;
3751
3752 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3753 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3754 memcpy(c->codeword_lengths, lengths, c->entries);
3755 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3756 lengths = c->codeword_lengths;
3757 c->sparse = 0;
3758 }
3759
3760 // compute the size of the sorted tables
3761 if (c->sparse) {
3762 sorted_count = total;
3763 } else {
3764 sorted_count = 0;
3765 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3766 for (j=0; j < c->entries; ++j)
3767 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3768 ++sorted_count;
3769 #endif
3770 }
3771
3772 c->sorted_entries = sorted_count;
3773 values = NULL;
3774
3775 CHECK(f);
3776 if (!c->sparse) {
3777 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3778 if (!c->codewords) return error(f, VORBIS_outofmem);
3779 } else {
3780 unsigned int size;
3781 if (c->sorted_entries) {
3782 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3783 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3784 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3785 if (!c->codewords) return error(f, VORBIS_outofmem);
3786 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3787 if (!values) return error(f, VORBIS_outofmem);
3788 }
3789 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3790 if (size > f->setup_temp_memory_required)
3791 f->setup_temp_memory_required = size;
3792 }
3793
3794 if (!compute_codewords(c, lengths, c->entries, values)) {
3795 if (c->sparse) setup_temp_free(f, values, 0);
3796 return error(f, VORBIS_invalid_setup);
3797 }
3798
3799 if (c->sorted_entries) {
3800 // allocate an extra slot for sentinels
3801 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3802 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3803 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3804 // so that we can catch that case without an extra if
3805 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3806 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3807 ++c->sorted_values;
3808 c->sorted_values[-1] = -1;
3809 compute_sorted_huffman(c, lengths, values);
3810 }
3811
3812 if (c->sparse) {
3813 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3814 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3815 setup_temp_free(f, lengths, c->entries);
3816 c->codewords = NULL;
3817 }
3818
3819 compute_accelerated_huffman(c);
3820
3821 CHECK(f);
3822 c->lookup_type = get_bits(f, 4);
3823 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3824 if (c->lookup_type > 0) {
3825 uint16 *mults;
3826 c->minimum_value = float32_unpack(get_bits(f, 32));
3827 c->delta_value = float32_unpack(get_bits(f, 32));
3828 c->value_bits = get_bits(f, 4)+1;
3829 c->sequence_p = get_bits(f,1);
3830 if (c->lookup_type == 1) {
3831 int values = lookup1_values(c->entries, c->dimensions);
3832 if (values < 0) return error(f, VORBIS_invalid_setup);
3833 c->lookup_values = (uint32) values;
3834 } else {
3835 c->lookup_values = c->entries * c->dimensions;
3836 }
3837 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3838 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3839 if (mults == NULL) return error(f, VORBIS_outofmem);
3840 for (j=0; j < (int) c->lookup_values; ++j) {
3841 int q = get_bits(f, c->value_bits);
3842 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3843 mults[j] = q;
3844 }
3845
3846 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3847 if (c->lookup_type == 1) {
3848 int len, sparse = c->sparse;
3849 float last=0;
3850 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3851 if (sparse) {
3852 if (c->sorted_entries == 0) goto skip;
3853 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3854 } else
3855 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3856 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3857 len = sparse ? c->sorted_entries : c->entries;
3858 for (j=0; j < len; ++j) {
3859 unsigned int z = sparse ? c->sorted_values[j] : j;
3860 unsigned int div=1;
3861 for (k=0; k < c->dimensions; ++k) {
3862 int off = (z / div) % c->lookup_values;
3863 float val = mults[off];
3864 val = mults[off]*c->delta_value + c->minimum_value + last;
3865 c->multiplicands[j*c->dimensions + k] = val;
3866 if (c->sequence_p)
3867 last = val;
3868 if (k+1 < c->dimensions) {
3869 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3870 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3871 return error(f, VORBIS_invalid_setup);
3872 }
3873 div *= c->lookup_values;
3874 }
3875 }
3876 }
3877 c->lookup_type = 2;
3878 }
3879 else
3880 #endif
3881 {
3882 float last=0;
3883 CHECK(f);
3884 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3885 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3886 for (j=0; j < (int) c->lookup_values; ++j) {
3887 float val = mults[j] * c->delta_value + c->minimum_value + last;
3888 c->multiplicands[j] = val;
3889 if (c->sequence_p)
3890 last = val;
3891 }
3892 }
3893 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3894 skip:;
3895 #endif
3896 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3897
3898 CHECK(f);
3899 }
3900 CHECK(f);
3901 }
3902
3903 // time domain transfers (notused)
3904
3905 x = get_bits(f, 6) + 1;
3906 for (i=0; i < x; ++i) {
3907 uint32 z = get_bits(f, 16);
3908 if (z != 0) return error(f, VORBIS_invalid_setup);
3909 }
3910
3911 // Floors
3912 f->floor_count = get_bits(f, 6)+1;
3913 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3914 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3915 for (i=0; i < f->floor_count; ++i) {
3916 f->floor_types[i] = get_bits(f, 16);
3917 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3918 if (f->floor_types[i] == 0) {
3919 Floor0 *g = &f->floor_config[i].floor0;
3920 g->order = get_bits(f,8);
3921 g->rate = get_bits(f,16);
3922 g->bark_map_size = get_bits(f,16);
3923 g->amplitude_bits = get_bits(f,6);
3924 g->amplitude_offset = get_bits(f,8);
3925 g->number_of_books = get_bits(f,4) + 1;
3926 for (j=0; j < g->number_of_books; ++j)
3927 g->book_list[j] = get_bits(f,8);
3928 return error(f, VORBIS_feature_not_supported);
3929 } else {
3930 stbv__floor_ordering p[31*8+2];
3931 Floor1 *g = &f->floor_config[i].floor1;
3932 int max_class = -1;
3933 g->partitions = get_bits(f, 5);
3934 for (j=0; j < g->partitions; ++j) {
3935 g->partition_class_list[j] = get_bits(f, 4);
3936 if (g->partition_class_list[j] > max_class)
3937 max_class = g->partition_class_list[j];
3938 }
3939 for (j=0; j <= max_class; ++j) {
3940 g->class_dimensions[j] = get_bits(f, 3)+1;
3941 g->class_subclasses[j] = get_bits(f, 2);
3942 if (g->class_subclasses[j]) {
3943 g->class_masterbooks[j] = get_bits(f, 8);
3944 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3945 }
3946 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3947 g->subclass_books[j][k] = get_bits(f,8)-1;
3948 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3949 }
3950 }
3951 g->floor1_multiplier = get_bits(f,2)+1;
3952 g->rangebits = get_bits(f,4);
3953 g->Xlist[0] = 0;
3954 g->Xlist[1] = 1 << g->rangebits;
3955 g->values = 2;
3956 for (j=0; j < g->partitions; ++j) {
3957 int c = g->partition_class_list[j];
3958 for (k=0; k < g->class_dimensions[c]; ++k) {
3959 g->Xlist[g->values] = get_bits(f, g->rangebits);
3960 ++g->values;
3961 }
3962 }
3963 // precompute the sorting
3964 for (j=0; j < g->values; ++j) {
3965 p[j].x = g->Xlist[j];
3966 p[j].id = j;
3967 }
3968 qsort(p, g->values, sizeof(p[0]), point_compare);
3969 for (j=0; j < g->values-1; ++j)
3970 if (p[j].x == p[j+1].x)
3971 return error(f, VORBIS_invalid_setup);
3972 for (j=0; j < g->values; ++j)
3973 g->sorted_order[j] = (uint8) p[j].id;
3974 // precompute the neighbors
3975 for (j=2; j < g->values; ++j) {
3976 int low = 0,hi = 0;
3977 neighbors(g->Xlist, j, &low,&hi);
3978 g->neighbors[j][0] = low;
3979 g->neighbors[j][1] = hi;
3980 }
3981
3982 if (g->values > longest_floorlist)
3983 longest_floorlist = g->values;
3984 }
3985 }
3986
3987 // Residue
3988 f->residue_count = get_bits(f, 6)+1;
3989 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3990 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3991 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3992 for (i=0; i < f->residue_count; ++i) {
3993 uint8 residue_cascade[64];
3994 Residue *r = f->residue_config+i;
3995 f->residue_types[i] = get_bits(f, 16);
3996 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3997 r->begin = get_bits(f, 24);
3998 r->end = get_bits(f, 24);
3999 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4000 r->part_size = get_bits(f,24)+1;
4001 r->classifications = get_bits(f,6)+1;
4002 r->classbook = get_bits(f,8);
4003 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4004 for (j=0; j < r->classifications; ++j) {
4005 uint8 high_bits=0;
4006 uint8 low_bits=get_bits(f,3);
4007 if (get_bits(f,1))
4008 high_bits = get_bits(f,5);
4009 residue_cascade[j] = high_bits*8 + low_bits;
4010 }
4011 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4012 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4013 for (j=0; j < r->classifications; ++j) {
4014 for (k=0; k < 8; ++k) {
4015 if (residue_cascade[j] & (1 << k)) {
4016 r->residue_books[j][k] = get_bits(f, 8);
4017 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4018 } else {
4019 r->residue_books[j][k] = -1;
4020 }
4021 }
4022 }
4023 // precompute the classifications[] array to avoid inner-loop mod/divide
4024 // call it 'classdata' since we already have r->classifications
4025 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4026 if (!r->classdata) return error(f, VORBIS_outofmem);
4027 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4028 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4029 int classwords = f->codebooks[r->classbook].dimensions;
4030 int temp = j;
4031 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4032 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4033 for (k=classwords-1; k >= 0; --k) {
4034 r->classdata[j][k] = temp % r->classifications;
4035 temp /= r->classifications;
4036 }
4037 }
4038 }
4039
4040 f->mapping_count = get_bits(f,6)+1;
4041 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4042 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4043 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4044 for (i=0; i < f->mapping_count; ++i) {
4045 Mapping *m = f->mapping + i;
4046 int mapping_type = get_bits(f,16);
4047 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4048 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4049 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4050 if (get_bits(f,1))
4051 m->submaps = get_bits(f,4)+1;
4052 else
4053 m->submaps = 1;
4054 if (m->submaps > max_submaps)
4055 max_submaps = m->submaps;
4056 if (get_bits(f,1)) {
4057 m->coupling_steps = get_bits(f,8)+1;
4058 if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4059 for (k=0; k < m->coupling_steps; ++k) {
4060 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4061 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4062 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4063 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4064 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4065 }
4066 } else
4067 m->coupling_steps = 0;
4068
4069 // reserved field
4070 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4071 if (m->submaps > 1) {
4072 for (j=0; j < f->channels; ++j) {
4073 m->chan[j].mux = get_bits(f, 4);
4074 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4075 }
4076 } else
4077 // @SPECIFICATION: this case is missing from the spec
4078 for (j=0; j < f->channels; ++j)
4079 m->chan[j].mux = 0;
4080
4081 for (j=0; j < m->submaps; ++j) {
4082 get_bits(f,8); // discard
4083 m->submap_floor[j] = get_bits(f,8);
4084 m->submap_residue[j] = get_bits(f,8);
4085 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4086 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4087 }
4088 }
4089
4090 // Modes
4091 f->mode_count = get_bits(f, 6)+1;
4092 for (i=0; i < f->mode_count; ++i) {
4093 Mode *m = f->mode_config+i;
4094 m->blockflag = get_bits(f,1);
4095 m->windowtype = get_bits(f,16);
4096 m->transformtype = get_bits(f,16);
4097 m->mapping = get_bits(f,8);
4098 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4099 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4100 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4101 }
4102
4103 flush_packet(f);
4104
4105 f->previous_length = 0;
4106
4107 for (i=0; i < f->channels; ++i) {
4108 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4109 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4110 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4111 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4112 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4113 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4114 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4115 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4116 #endif
4117 }
4118
4119 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4120 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4121 f->blocksize[0] = f->blocksize_0;
4122 f->blocksize[1] = f->blocksize_1;
4123
4124 #ifdef STB_VORBIS_DIVIDE_TABLE
4125 if (integer_divide_table[1][1]==0)
4126 for (i=0; i < DIVTAB_NUMER; ++i)
4127 for (j=1; j < DIVTAB_DENOM; ++j)
4128 integer_divide_table[i][j] = i / j;
4129 #endif
4130
4131 // compute how much temporary memory is needed
4132
4133 // 1.
4134 {
4135 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4136 uint32 classify_mem;
4137 int i,max_part_read=0;
4138 for (i=0; i < f->residue_count; ++i) {
4139 Residue *r = f->residue_config + i;
4140 unsigned int actual_size = f->blocksize_1 / 2;
4141 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4142 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4143 int n_read = limit_r_end - limit_r_begin;
4144 int part_read = n_read / r->part_size;
4145 if (part_read > max_part_read)
4146 max_part_read = part_read;
4147 }
4148 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4149 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4150 #else
4151 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4152 #endif
4153
4154 // maximum reasonable partition size is f->blocksize_1
4155
4156 f->temp_memory_required = classify_mem;
4157 if (imdct_mem > f->temp_memory_required)
4158 f->temp_memory_required = imdct_mem;
4159 }
4160
4161
4162 if (f->alloc.alloc_buffer) {
4163 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4164 // check if there's enough temp memory so we don't error later
4165 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4166 return error(f, VORBIS_outofmem);
4167 }
4168
4169 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4170 // without PAGEFLAG_continued_packet, so this either points to the first page, or
4171 // the page after the end of the headers. It might be cleaner to point to a page
4172 // in the middle of the headers, when that's the page where the first audio packet
4173 // starts, but we'd have to also correctly skip the end of any continued packet in
4174 // stb_vorbis_seek_start.
4175 if (f->next_seg == -1) {
4176 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4177 } else {
4178 f->first_audio_page_offset = 0;
4179 }
4180
4181 return TRUE;
4182 }
4183
4184 static void vorbis_deinit(stb_vorbis *p)
4185 {
4186 int i,j;
4187
4188 setup_free(p, p->vendor);
4189 for (i=0; i < p->comment_list_length; ++i) {
4190 setup_free(p, p->comment_list[i]);
4191 }
4192 setup_free(p, p->comment_list);
4193
4194 if (p->residue_config) {
4195 for (i=0; i < p->residue_count; ++i) {
4196 Residue *r = p->residue_config+i;
4197 if (r->classdata) {
4198 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4199 setup_free(p, r->classdata[j]);
4200 setup_free(p, r->classdata);
4201 }
4202 setup_free(p, r->residue_books);
4203 }
4204 }
4205
4206 if (p->codebooks) {
4207 CHECK(p);
4208 for (i=0; i < p->codebook_count; ++i) {
4209 Codebook *c = p->codebooks + i;
4210 setup_free(p, c->codeword_lengths);
4211 setup_free(p, c->multiplicands);
4212 setup_free(p, c->codewords);
4213 setup_free(p, c->sorted_codewords);
4214 // c->sorted_values[-1] is the first entry in the array
4215 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4216 }
4217 setup_free(p, p->codebooks);
4218 }
4219 setup_free(p, p->floor_config);
4220 setup_free(p, p->residue_config);
4221 if (p->mapping) {
4222 for (i=0; i < p->mapping_count; ++i)
4223 setup_free(p, p->mapping[i].chan);
4224 setup_free(p, p->mapping);
4225 }
4226 CHECK(p);
4227 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4228 setup_free(p, p->channel_buffers[i]);
4229 setup_free(p, p->previous_window[i]);
4230 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4231 setup_free(p, p->floor_buffers[i]);
4232 #endif
4233 setup_free(p, p->finalY[i]);
4234 }
4235 for (i=0; i < 2; ++i) {
4236 setup_free(p, p->A[i]);
4237 setup_free(p, p->B[i]);
4238 setup_free(p, p->C[i]);
4239 setup_free(p, p->window[i]);
4240 setup_free(p, p->bit_reverse[i]);
4241 }
4242 #ifndef STB_VORBIS_NO_STDIO
4243 if (p->close_on_free) fclose(p->f);
4244 #endif
4245 }
4246
4247 void stb_vorbis_close(stb_vorbis *p)
4248 {
4249 if (p == NULL) return;
4250 vorbis_deinit(p);
4251 setup_free(p,p);
4252 }
4253
4254 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4255 {
4256 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4257 if (z) {
4258 p->alloc = *z;
4259 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4260 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4261 }
4262 p->eof = 0;
4263 p->error = VORBIS__no_error;
4264 p->stream = NULL;
4265 p->codebooks = NULL;
4266 p->page_crc_tests = -1;
4267 #ifndef STB_VORBIS_NO_STDIO
4268 p->close_on_free = FALSE;
4269 p->f = NULL;
4270 #endif
4271 }
4272
4273 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4274 {
4275 if (f->current_loc_valid)
4276 return f->current_loc;
4277 else
4278 return -1;
4279 }
4280
4281 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4282 {
4283 stb_vorbis_info d;
4284 d.channels = f->channels;
4285 d.sample_rate = f->sample_rate;
4286 d.setup_memory_required = f->setup_memory_required;
4287 d.setup_temp_memory_required = f->setup_temp_memory_required;
4288 d.temp_memory_required = f->temp_memory_required;
4289 d.max_frame_size = f->blocksize_1 >> 1;
4290 return d;
4291 }
4292
4293 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4294 {
4295 stb_vorbis_comment d;
4296 d.vendor = f->vendor;
4297 d.comment_list_length = f->comment_list_length;
4298 d.comment_list = f->comment_list;
4299 return d;
4300 }
4301
4302 int stb_vorbis_get_error(stb_vorbis *f)
4303 {
4304 int e = f->error;
4305 f->error = VORBIS__no_error;
4306 return e;
4307 }
4308
4309 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4310 {
4311 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4312 return p;
4313 }
4314
4315 #ifndef STB_VORBIS_NO_PUSHDATA_API
4316
4317 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4318 {
4319 f->previous_length = 0;
4320 f->page_crc_tests = 0;
4321 f->discard_samples_deferred = 0;
4322 f->current_loc_valid = FALSE;
4323 f->first_decode = FALSE;
4324 f->samples_output = 0;
4325 f->channel_buffer_start = 0;
4326 f->channel_buffer_end = 0;
4327 }
4328
4329 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4330 {
4331 int i,n;
4332 for (i=0; i < f->page_crc_tests; ++i)
4333 f->scan[i].bytes_done = 0;
4334
4335 // if we have room for more scans, search for them first, because
4336 // they may cause us to stop early if their header is incomplete
4337 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4338 if (data_len < 4) return 0;
4339 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4340 // one that straddles a boundary
4341 for (i=0; i < data_len; ++i) {
4342 if (data[i] == 0x4f) {
4343 if (0==memcmp(data+i, ogg_page_header, 4)) {
4344 int j,len;
4345 uint32 crc;
4346 // make sure we have the whole page header
4347 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4348 // only read up to this page start, so hopefully we'll
4349 // have the whole page header start next time
4350 data_len = i;
4351 break;
4352 }
4353 // ok, we have it all; compute the length of the page
4354 len = 27 + data[i+26];
4355 for (j=0; j < data[i+26]; ++j)
4356 len += data[i+27+j];
4357 // scan everything up to the embedded crc (which we must 0)
4358 crc = 0;
4359 for (j=0; j < 22; ++j)
4360 crc = crc32_update(crc, data[i+j]);
4361 // now process 4 0-bytes
4362 for ( ; j < 26; ++j)
4363 crc = crc32_update(crc, 0);
4364 // len is the total number of bytes we need to scan
4365 n = f->page_crc_tests++;
4366 f->scan[n].bytes_left = len-j;
4367 f->scan[n].crc_so_far = crc;
4368 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4369 // if the last frame on a page is continued to the next, then
4370 // we can't recover the sample_loc immediately
4371 if (data[i+27+data[i+26]-1] == 255)
4372 f->scan[n].sample_loc = ~0;
4373 else
4374 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4375 f->scan[n].bytes_done = i+j;
4376 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4377 break;
4378 // keep going if we still have room for more
4379 }
4380 }
4381 }
4382 }
4383
4384 for (i=0; i < f->page_crc_tests;) {
4385 uint32 crc;
4386 int j;
4387 int n = f->scan[i].bytes_done;
4388 int m = f->scan[i].bytes_left;
4389 if (m > data_len - n) m = data_len - n;
4390 // m is the bytes to scan in the current chunk
4391 crc = f->scan[i].crc_so_far;
4392 for (j=0; j < m; ++j)
4393 crc = crc32_update(crc, data[n+j]);
4394 f->scan[i].bytes_left -= m;
4395 f->scan[i].crc_so_far = crc;
4396 if (f->scan[i].bytes_left == 0) {
4397 // does it match?
4398 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4399 // Houston, we have page
4400 data_len = n+m; // consumption amount is wherever that scan ended
4401 f->page_crc_tests = -1; // drop out of page scan mode
4402 f->previous_length = 0; // decode-but-don't-output one frame
4403 f->next_seg = -1; // start a new page
4404 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4405 // to the amount we'd have decoded had we decoded this page
4406 f->current_loc_valid = f->current_loc != ~0U;
4407 return data_len;
4408 }
4409 // delete entry
4410 f->scan[i] = f->scan[--f->page_crc_tests];
4411 } else {
4412 ++i;
4413 }
4414 }
4415
4416 return data_len;
4417 }
4418
4419 // return value: number of bytes we used
4420 int stb_vorbis_decode_frame_pushdata(
4421 stb_vorbis *f, // the file we're decoding
4422 const uint8 *data, int data_len, // the memory available for decoding
4423 int *channels, // place to write number of float * buffers
4424 float ***output, // place to write float ** array of float * buffers
4425 int *samples // place to write number of output samples
4426 )
4427 {
4428 int i;
4429 int len,right,left;
4430
4431 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4432
4433 if (f->page_crc_tests >= 0) {
4434 *samples = 0;
4435 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4436 }
4437
4438 f->stream = (uint8 *) data;
4439 f->stream_end = (uint8 *) data + data_len;
4440 f->error = VORBIS__no_error;
4441
4442 // check that we have the entire packet in memory
4443 if (!is_whole_packet_present(f)) {
4444 *samples = 0;
4445 return 0;
4446 }
4447
4448 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4449 // save the actual error we encountered
4450 enum STBVorbisError error = f->error;
4451 if (error == VORBIS_bad_packet_type) {
4452 // flush and resynch
4453 f->error = VORBIS__no_error;
4454 while (get8_packet(f) != EOP)
4455 if (f->eof) break;
4456 *samples = 0;
4457 return (int) (f->stream - data);
4458 }
4459 if (error == VORBIS_continued_packet_flag_invalid) {
4460 if (f->previous_length == 0) {
4461 // we may be resynching, in which case it's ok to hit one
4462 // of these; just discard the packet
4463 f->error = VORBIS__no_error;
4464 while (get8_packet(f) != EOP)
4465 if (f->eof) break;
4466 *samples = 0;
4467 return (int) (f->stream - data);
4468 }
4469 }
4470 // if we get an error while parsing, what to do?
4471 // well, it DEFINITELY won't work to continue from where we are!
4472 stb_vorbis_flush_pushdata(f);
4473 // restore the error that actually made us bail
4474 f->error = error;
4475 *samples = 0;
4476 return 1;
4477 }
4478
4479 // success!
4480 len = vorbis_finish_frame(f, len, left, right);
4481 for (i=0; i < f->channels; ++i)
4482 f->outputs[i] = f->channel_buffers[i] + left;
4483
4484 if (channels) *channels = f->channels;
4485 *samples = len;
4486 *output = f->outputs;
4487 return (int) (f->stream - data);
4488 }
4489
4490 stb_vorbis *stb_vorbis_open_pushdata(
4491 const unsigned char *data, int data_len, // the memory available for decoding
4492 int *data_used, // only defined if result is not NULL
4493 int *error, const stb_vorbis_alloc *alloc)
4494 {
4495 stb_vorbis *f, p;
4496 vorbis_init(&p, alloc);
4497 p.stream = (uint8 *) data;
4498 p.stream_end = (uint8 *) data + data_len;
4499 p.push_mode = TRUE;
4500 if (!start_decoder(&p)) {
4501 if (p.eof)
4502 *error = VORBIS_need_more_data;
4503 else
4504 *error = p.error;
4505 return NULL;
4506 }
4507 f = vorbis_alloc(&p);
4508 if (f) {
4509 *f = p;
4510 *data_used = (int) (f->stream - data);
4511 *error = 0;
4512 return f;
4513 } else {
4514 vorbis_deinit(&p);
4515 return NULL;
4516 }
4517 }
4518 #endif // STB_VORBIS_NO_PUSHDATA_API
4519
4520 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4521 {
4522 #ifndef STB_VORBIS_NO_PUSHDATA_API
4523 if (f->push_mode) return 0;
4524 #endif
4525 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4526 #ifndef STB_VORBIS_NO_STDIO
4527 return (unsigned int) (ftell(f->f) - f->f_start);
4528 #endif
4529 }
4530
4531 #ifndef STB_VORBIS_NO_PULLDATA_API
4532 //
4533 // DATA-PULLING API
4534 //
4535
4536 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4537 {
4538 for(;;) {
4539 int n;
4540 if (f->eof) return 0;
4541 n = get8(f);
4542 if (n == 0x4f) { // page header candidate
4543 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4544 int i;
4545 // check if we're off the end of a file_section stream
4546 if (retry_loc - 25 > f->stream_len)
4547 return 0;
4548 // check the rest of the header
4549 for (i=1; i < 4; ++i)
4550 if (get8(f) != ogg_page_header[i])
4551 break;
4552 if (f->eof) return 0;
4553 if (i == 4) {
4554 uint8 header[27];
4555 uint32 i, crc, goal, len;
4556 for (i=0; i < 4; ++i)
4557 header[i] = ogg_page_header[i];
4558 for (; i < 27; ++i)
4559 header[i] = get8(f);
4560 if (f->eof) return 0;
4561 if (header[4] != 0) goto invalid;
4562 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4563 for (i=22; i < 26; ++i)
4564 header[i] = 0;
4565 crc = 0;
4566 for (i=0; i < 27; ++i)
4567 crc = crc32_update(crc, header[i]);
4568 len = 0;
4569 for (i=0; i < header[26]; ++i) {
4570 int s = get8(f);
4571 crc = crc32_update(crc, s);
4572 len += s;
4573 }
4574 if (len && f->eof) return 0;
4575 for (i=0; i < len; ++i)
4576 crc = crc32_update(crc, get8(f));
4577 // finished parsing probable page
4578 if (crc == goal) {
4579 // we could now check that it's either got the last
4580 // page flag set, OR it's followed by the capture
4581 // pattern, but I guess TECHNICALLY you could have
4582 // a file with garbage between each ogg page and recover
4583 // from it automatically? So even though that paranoia
4584 // might decrease the chance of an invalid decode by
4585 // another 2^32, not worth it since it would hose those
4586 // invalid-but-useful files?
4587 if (end)
4588 *end = stb_vorbis_get_file_offset(f);
4589 if (last) {
4590 if (header[5] & 0x04)
4591 *last = 1;
4592 else
4593 *last = 0;
4594 }
4595 set_file_offset(f, retry_loc-1);
4596 return 1;
4597 }
4598 }
4599 invalid:
4600 // not a valid page, so rewind and look for next one
4601 set_file_offset(f, retry_loc);
4602 }
4603 }
4604 }
4605
4606
4607 #define SAMPLE_unknown 0xffffffff
4608
4609 // seeking is implemented with a binary search, which narrows down the range to
4610 // 64K, before using a linear search (because finding the synchronization
4611 // pattern can be expensive, and the chance we'd find the end page again is
4612 // relatively high for small ranges)
4613 //
4614 // two initial interpolation-style probes are used at the start of the search
4615 // to try to bound either side of the binary search sensibly, while still
4616 // working in O(log n) time if they fail.
4617
4618 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4619 {
4620 uint8 header[27], lacing[255];
4621 int i,len;
4622
4623 // record where the page starts
4624 z->page_start = stb_vorbis_get_file_offset(f);
4625
4626 // parse the header
4627 getn(f, header, 27);
4628 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4629 return 0;
4630 getn(f, lacing, header[26]);
4631
4632 // determine the length of the payload
4633 len = 0;
4634 for (i=0; i < header[26]; ++i)
4635 len += lacing[i];
4636
4637 // this implies where the page ends
4638 z->page_end = z->page_start + 27 + header[26] + len;
4639
4640 // read the last-decoded sample out of the data
4641 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4642
4643 // restore file state to where we were
4644 set_file_offset(f, z->page_start);
4645 return 1;
4646 }
4647
4648 // rarely used function to seek back to the preceding page while finding the
4649 // start of a packet
4650 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4651 {
4652 unsigned int previous_safe, end;
4653
4654 // now we want to seek back 64K from the limit
4655 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4656 previous_safe = limit_offset - 65536;
4657 else
4658 previous_safe = f->first_audio_page_offset;
4659
4660 set_file_offset(f, previous_safe);
4661
4662 while (vorbis_find_page(f, &end, NULL)) {
4663 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4664 return 1;
4665 set_file_offset(f, end);
4666 }
4667
4668 return 0;
4669 }
4670
4671 // implements the search logic for finding a page and starting decoding. if
4672 // the function succeeds, current_loc_valid will be true and current_loc will
4673 // be less than or equal to the provided sample number (the closer the
4674 // better).
4675 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4676 {
4677 ProbedPage left, right, mid;
4678 int i, start_seg_with_known_loc, end_pos, page_start;
4679 uint32 delta, stream_length, padding, last_sample_limit;
4680 double offset = 0.0, bytes_per_sample = 0.0;
4681 int probe = 0;
4682
4683 // find the last page and validate the target sample
4684 stream_length = stb_vorbis_stream_length_in_samples(f);
4685 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4686 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4687
4688 // this is the maximum difference between the window-center (which is the
4689 // actual granule position value), and the right-start (which the spec
4690 // indicates should be the granule position (give or take one)).
4691 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4692 if (sample_number < padding)
4693 last_sample_limit = 0;
4694 else
4695 last_sample_limit = sample_number - padding;
4696
4697 left = f->p_first;
4698 while (left.last_decoded_sample == ~0U) {
4699 // (untested) the first page does not have a 'last_decoded_sample'
4700 set_file_offset(f, left.page_end);
4701 if (!get_seek_page_info(f, &left)) goto error;
4702 }
4703
4704 right = f->p_last;
4705 assert(right.last_decoded_sample != ~0U);
4706
4707 // starting from the start is handled differently
4708 if (last_sample_limit <= left.last_decoded_sample) {
4709 if (stb_vorbis_seek_start(f)) {
4710 if (f->current_loc > sample_number)
4711 return error(f, VORBIS_seek_failed);
4712 return 1;
4713 }
4714 return 0;
4715 }
4716
4717 while (left.page_end != right.page_start) {
4718 assert(left.page_end < right.page_start);
4719 // search range in bytes
4720 delta = right.page_start - left.page_end;
4721 if (delta <= 65536) {
4722 // there's only 64K left to search - handle it linearly
4723 set_file_offset(f, left.page_end);
4724 } else {
4725 if (probe < 2) {
4726 if (probe == 0) {
4727 // first probe (interpolate)
4728 double data_bytes = right.page_end - left.page_start;
4729 bytes_per_sample = data_bytes / right.last_decoded_sample;
4730 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4731 } else {
4732 // second probe (try to bound the other side)
4733 double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4734 if (error >= 0 && error < 8000) error = 8000;
4735 if (error < 0 && error > -8000) error = -8000;
4736 offset += error * 2;
4737 }
4738
4739 // ensure the offset is valid
4740 if (offset < left.page_end)
4741 offset = left.page_end;
4742 if (offset > right.page_start - 65536)
4743 offset = right.page_start - 65536;
4744
4745 set_file_offset(f, (unsigned int) offset);
4746 } else {
4747 // binary search for large ranges (offset by 32K to ensure
4748 // we don't hit the right page)
4749 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4750 }
4751
4752 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4753 }
4754
4755 for (;;) {
4756 if (!get_seek_page_info(f, &mid)) goto error;
4757 if (mid.last_decoded_sample != ~0U) break;
4758 // (untested) no frames end on this page
4759 set_file_offset(f, mid.page_end);
4760 assert(mid.page_start < right.page_start);
4761 }
4762
4763 // if we've just found the last page again then we're in a tricky file,
4764 // and we're close enough (if it wasn't an interpolation probe).
4765 if (mid.page_start == right.page_start) {
4766 if (probe >= 2 || delta <= 65536)
4767 break;
4768 } else {
4769 if (last_sample_limit < mid.last_decoded_sample)
4770 right = mid;
4771 else
4772 left = mid;
4773 }
4774
4775 ++probe;
4776 }
4777
4778 // seek back to start of the last packet
4779 page_start = left.page_start;
4780 set_file_offset(f, page_start);
4781 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4782 end_pos = f->end_seg_with_known_loc;
4783 assert(end_pos >= 0);
4784
4785 for (;;) {
4786 for (i = end_pos; i > 0; --i)
4787 if (f->segments[i-1] != 255)
4788 break;
4789
4790 start_seg_with_known_loc = i;
4791
4792 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4793 break;
4794
4795 // (untested) the final packet begins on an earlier page
4796 if (!go_to_page_before(f, page_start))
4797 goto error;
4798
4799 page_start = stb_vorbis_get_file_offset(f);
4800 if (!start_page(f)) goto error;
4801 end_pos = f->segment_count - 1;
4802 }
4803
4804 // prepare to start decoding
4805 f->current_loc_valid = FALSE;
4806 f->last_seg = FALSE;
4807 f->valid_bits = 0;
4808 f->packet_bytes = 0;
4809 f->bytes_in_seg = 0;
4810 f->previous_length = 0;
4811 f->next_seg = start_seg_with_known_loc;
4812
4813 for (i = 0; i < start_seg_with_known_loc; i++)
4814 skip(f, f->segments[i]);
4815
4816 // start decoding (optimizable - this frame is generally discarded)
4817 if (!vorbis_pump_first_frame(f))
4818 return 0;
4819 if (f->current_loc > sample_number)
4820 return error(f, VORBIS_seek_failed);
4821 return 1;
4822
4823 error:
4824 // try to restore the file to a valid state
4825 stb_vorbis_seek_start(f);
4826 return error(f, VORBIS_seek_failed);
4827 }
4828
4829 // the same as vorbis_decode_initial, but without advancing
4830 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4831 {
4832 int bits_read, bytes_read;
4833
4834 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4835 return 0;
4836
4837 // either 1 or 2 bytes were read, figure out which so we can rewind
4838 bits_read = 1 + ilog(f->mode_count-1);
4839 if (f->mode_config[*mode].blockflag)
4840 bits_read += 2;
4841 bytes_read = (bits_read + 7) / 8;
4842
4843 f->bytes_in_seg += bytes_read;
4844 f->packet_bytes -= bytes_read;
4845 skip(f, -bytes_read);
4846 if (f->next_seg == -1)
4847 f->next_seg = f->segment_count - 1;
4848 else
4849 f->next_seg--;
4850 f->valid_bits = 0;
4851
4852 return 1;
4853 }
4854
4855 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4856 {
4857 uint32 max_frame_samples;
4858
4859 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4860
4861 // fast page-level search
4862 if (!seek_to_sample_coarse(f, sample_number))
4863 return 0;
4864
4865 assert(f->current_loc_valid);
4866 assert(f->current_loc <= sample_number);
4867
4868 // linear search for the relevant packet
4869 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4870 while (f->current_loc < sample_number) {
4871 int left_start, left_end, right_start, right_end, mode, frame_samples;
4872 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4873 return error(f, VORBIS_seek_failed);
4874 // calculate the number of samples returned by the next frame
4875 frame_samples = right_start - left_start;
4876 if (f->current_loc + frame_samples > sample_number) {
4877 return 1; // the next frame will contain the sample
4878 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4879 // there's a chance the frame after this could contain the sample
4880 vorbis_pump_first_frame(f);
4881 } else {
4882 // this frame is too early to be relevant
4883 f->current_loc += frame_samples;
4884 f->previous_length = 0;
4885 maybe_start_packet(f);
4886 flush_packet(f);
4887 }
4888 }
4889 // the next frame should start with the sample
4890 if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4891 return 1;
4892 }
4893
4894 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4895 {
4896 if (!stb_vorbis_seek_frame(f, sample_number))
4897 return 0;
4898
4899 if (sample_number != f->current_loc) {
4900 int n;
4901 uint32 frame_start = f->current_loc;
4902 stb_vorbis_get_frame_float(f, &n, NULL);
4903 assert(sample_number > frame_start);
4904 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4905 f->channel_buffer_start += (sample_number - frame_start);
4906 }
4907
4908 return 1;
4909 }
4910
4911 int stb_vorbis_seek_start(stb_vorbis *f)
4912 {
4913 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4914 set_file_offset(f, f->first_audio_page_offset);
4915 f->previous_length = 0;
4916 f->first_decode = TRUE;
4917 f->next_seg = -1;
4918 return vorbis_pump_first_frame(f);
4919 }
4920
4921 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4922 {
4923 unsigned int restore_offset, previous_safe;
4924 unsigned int end, last_page_loc;
4925
4926 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4927 if (!f->total_samples) {
4928 unsigned int last;
4929 uint32 lo,hi;
4930 char header[6];
4931
4932 // first, store the current decode position so we can restore it
4933 restore_offset = stb_vorbis_get_file_offset(f);
4934
4935 // now we want to seek back 64K from the end (the last page must
4936 // be at most a little less than 64K, but let's allow a little slop)
4937 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4938 previous_safe = f->stream_len - 65536;
4939 else
4940 previous_safe = f->first_audio_page_offset;
4941
4942 set_file_offset(f, previous_safe);
4943 // previous_safe is now our candidate 'earliest known place that seeking
4944 // to will lead to the final page'
4945
4946 if (!vorbis_find_page(f, &end, &last)) {
4947 // if we can't find a page, we're hosed!
4948 f->error = VORBIS_cant_find_last_page;
4949 f->total_samples = 0xffffffff;
4950 goto done;
4951 }
4952
4953 // check if there are more pages
4954 last_page_loc = stb_vorbis_get_file_offset(f);
4955
4956 // stop when the last_page flag is set, not when we reach eof;
4957 // this allows us to stop short of a 'file_section' end without
4958 // explicitly checking the length of the section
4959 while (!last) {
4960 set_file_offset(f, end);
4961 if (!vorbis_find_page(f, &end, &last)) {
4962 // the last page we found didn't have the 'last page' flag
4963 // set. whoops!
4964 break;
4965 }
4966 previous_safe = last_page_loc+1;
4967 last_page_loc = stb_vorbis_get_file_offset(f);
4968 }
4969
4970 set_file_offset(f, last_page_loc);
4971
4972 // parse the header
4973 getn(f, (unsigned char *)header, 6);
4974 // extract the absolute granule position
4975 lo = get32(f);
4976 hi = get32(f);
4977 if (lo == 0xffffffff && hi == 0xffffffff) {
4978 f->error = VORBIS_cant_find_last_page;
4979 f->total_samples = SAMPLE_unknown;
4980 goto done;
4981 }
4982 if (hi)
4983 lo = 0xfffffffe; // saturate
4984 f->total_samples = lo;
4985
4986 f->p_last.page_start = last_page_loc;
4987 f->p_last.page_end = end;
4988 f->p_last.last_decoded_sample = lo;
4989
4990 done:
4991 set_file_offset(f, restore_offset);
4992 }
4993 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4994 }
4995
4996 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4997 {
4998 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4999 }
5000
5001
5002
5003 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5004 {
5005 int len, right,left,i;
5006 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5007
5008 if (!vorbis_decode_packet(f, &len, &left, &right)) {
5009 f->channel_buffer_start = f->channel_buffer_end = 0;
5010 return 0;
5011 }
5012
5013 len = vorbis_finish_frame(f, len, left, right);
5014 for (i=0; i < f->channels; ++i)
5015 f->outputs[i] = f->channel_buffers[i] + left;
5016
5017 f->channel_buffer_start = left;
5018 f->channel_buffer_end = left+len;
5019
5020 if (channels) *channels = f->channels;
5021 if (output) *output = f->outputs;
5022 return len;
5023 }
5024
5025 #ifndef STB_VORBIS_NO_STDIO
5026
5027 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5028 {
5029 stb_vorbis *f, p;
5030 vorbis_init(&p, alloc);
5031 p.f = file;
5032 p.f_start = (uint32) ftell(file);
5033 p.stream_len = length;
5034 p.close_on_free = close_on_free;
5035 if (start_decoder(&p)) {
5036 f = vorbis_alloc(&p);
5037 if (f) {
5038 *f = p;
5039 vorbis_pump_first_frame(f);
5040 return f;
5041 }
5042 }
5043 if (error) *error = p.error;
5044 vorbis_deinit(&p);
5045 return NULL;
5046 }
5047
5048 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5049 {
5050 unsigned int len, start;
5051 start = (unsigned int) ftell(file);
5052 fseek(file, 0, SEEK_END);
5053 len = (unsigned int) (ftell(file) - start);
5054 fseek(file, start, SEEK_SET);
5055 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5056 }
5057
5058 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5059 {
5060 FILE *f;
5061 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5062 if (0 != fopen_s(&f, filename, "rb"))
5063 f = NULL;
5064 #else
5065 f = fopen(filename, "rb");
5066 #endif
5067 if (f)
5068 return stb_vorbis_open_file(f, TRUE, error, alloc);
5069 if (error) *error = VORBIS_file_open_failure;
5070 return NULL;
5071 }
5072 #endif // STB_VORBIS_NO_STDIO
5073
5074 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5075 {
5076 stb_vorbis *f, p;
5077 if (data == NULL) return NULL;
5078 vorbis_init(&p, alloc);
5079 p.stream = (uint8 *) data;
5080 p.stream_end = (uint8 *) data + len;
5081 p.stream_start = (uint8 *) p.stream;
5082 p.stream_len = len;
5083 p.push_mode = FALSE;
5084 if (start_decoder(&p)) {
5085 f = vorbis_alloc(&p);
5086 if (f) {
5087 *f = p;
5088 vorbis_pump_first_frame(f);
5089 if (error) *error = VORBIS__no_error;
5090 return f;
5091 }
5092 }
5093 if (error) *error = p.error;
5094 vorbis_deinit(&p);
5095 return NULL;
5096 }
5097
5098 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5099 #define PLAYBACK_MONO 1
5100 #define PLAYBACK_LEFT 2
5101 #define PLAYBACK_RIGHT 4
5102
5103 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5104 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5105 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5106
5107 static int8 channel_position[7][6] =
5108 {
5109 { 0 },
5110 { C },
5111 { L, R },
5112 { L, C, R },
5113 { L, R, L, R },
5114 { L, C, R, L, R },
5115 { L, C, R, L, R, C },
5116 };
5117
5118
5119 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5120 typedef union {
5121 float f;
5122 int i;
5123 } float_conv;
5124 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5125 #define FASTDEF(x) float_conv x
5126 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5127 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5128 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5129 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5130 #define check_endianness()
5131 #else
5132 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5133 #define check_endianness()
5134 #define FASTDEF(x)
5135 #endif
5136
5137 static void copy_samples(short *dest, float *src, int len)
5138 {
5139 int i;
5140 check_endianness();
5141 for (i=0; i < len; ++i) {
5142 FASTDEF(temp);
5143 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5144 if ((unsigned int) (v + 32768) > 65535)
5145 v = v < 0 ? -32768 : 32767;
5146 dest[i] = v;
5147 }
5148 }
5149
5150 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5151 {
5152 #define BUFFER_SIZE 32
5153 float buffer[BUFFER_SIZE];
5154 int i,j,o,n = BUFFER_SIZE;
5155 check_endianness();
5156 for (o = 0; o < len; o += BUFFER_SIZE) {
5157 memset(buffer, 0, sizeof(buffer));
5158 if (o + n > len) n = len - o;
5159 for (j=0; j < num_c; ++j) {
5160 if (channel_position[num_c][j] & mask) {
5161 for (i=0; i < n; ++i)
5162 buffer[i] += data[j][d_offset+o+i];
5163 }
5164 }
5165 for (i=0; i < n; ++i) {
5166 FASTDEF(temp);
5167 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5168 if ((unsigned int) (v + 32768) > 65535)
5169 v = v < 0 ? -32768 : 32767;
5170 output[o+i] = v;
5171 }
5172 }
5173 }
5174
5175 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5176 {
5177 #define BUFFER_SIZE 32
5178 float buffer[BUFFER_SIZE];
5179 int i,j,o,n = BUFFER_SIZE >> 1;
5180 // o is the offset in the source data
5181 check_endianness();
5182 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5183 // o2 is the offset in the output data
5184 int o2 = o << 1;
5185 memset(buffer, 0, sizeof(buffer));
5186 if (o + n > len) n = len - o;
5187 for (j=0; j < num_c; ++j) {
5188 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5189 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5190 for (i=0; i < n; ++i) {
5191 buffer[i*2+0] += data[j][d_offset+o+i];
5192 buffer[i*2+1] += data[j][d_offset+o+i];
5193 }
5194 } else if (m == PLAYBACK_LEFT) {
5195 for (i=0; i < n; ++i) {
5196 buffer[i*2+0] += data[j][d_offset+o+i];
5197 }
5198 } else if (m == PLAYBACK_RIGHT) {
5199 for (i=0; i < n; ++i) {
5200 buffer[i*2+1] += data[j][d_offset+o+i];
5201 }
5202 }
5203 }
5204 for (i=0; i < (n<<1); ++i) {
5205 FASTDEF(temp);
5206 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5207 if ((unsigned int) (v + 32768) > 65535)
5208 v = v < 0 ? -32768 : 32767;
5209 output[o2+i] = v;
5210 }
5211 }
5212 }
5213
5214 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5215 {
5216 int i;
5217 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5218 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5219 for (i=0; i < buf_c; ++i)
5220 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5221 } else {
5222 int limit = buf_c < data_c ? buf_c : data_c;
5223 for (i=0; i < limit; ++i)
5224 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5225 for ( ; i < buf_c; ++i)
5226 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5227 }
5228 }
5229
5230 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5231 {
5232 float **output = NULL;
5233 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5234 if (len > num_samples) len = num_samples;
5235 if (len)
5236 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5237 return len;
5238 }
5239
5240 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5241 {
5242 int i;
5243 check_endianness();
5244 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5245 assert(buf_c == 2);
5246 for (i=0; i < buf_c; ++i)
5247 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5248 } else {
5249 int limit = buf_c < data_c ? buf_c : data_c;
5250 int j;
5251 for (j=0; j < len; ++j) {
5252 for (i=0; i < limit; ++i) {
5253 FASTDEF(temp);
5254 float f = data[i][d_offset+j];
5255 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5256 if ((unsigned int) (v + 32768) > 65535)
5257 v = v < 0 ? -32768 : 32767;
5258 *buffer++ = v;
5259 }
5260 for ( ; i < buf_c; ++i)
5261 *buffer++ = 0;
5262 }
5263 }
5264 }
5265
5266 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5267 {
5268 float **output;
5269 int len;
5270 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5271 len = stb_vorbis_get_frame_float(f, NULL, &output);
5272 if (len) {
5273 if (len*num_c > num_shorts) len = num_shorts / num_c;
5274 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5275 }
5276 return len;
5277 }
5278
5279 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5280 {
5281 float **outputs;
5282 int len = num_shorts / channels;
5283 int n=0;
5284 int z = f->channels;
5285 if (z > channels) z = channels;
5286 while (n < len) {
5287 int k = f->channel_buffer_end - f->channel_buffer_start;
5288 if (n+k >= len) k = len - n;
5289 if (k)
5290 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5291 buffer += k*channels;
5292 n += k;
5293 f->channel_buffer_start += k;
5294 if (n == len) break;
5295 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5296 }
5297 return n;
5298 }
5299
5300 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5301 {
5302 float **outputs;
5303 int n=0;
5304 int z = f->channels;
5305 if (z > channels) z = channels;
5306 while (n < len) {
5307 int k = f->channel_buffer_end - f->channel_buffer_start;
5308 if (n+k >= len) k = len - n;
5309 if (k)
5310 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5311 n += k;
5312 f->channel_buffer_start += k;
5313 if (n == len) break;
5314 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5315 }
5316 return n;
5317 }
5318
5319 #ifndef STB_VORBIS_NO_STDIO
5320 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5321 {
5322 int data_len, offset, total, limit, error;
5323 short *data;
5324 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5325 if (v == NULL) return -1;
5326 limit = v->channels * 4096;
5327 *channels = v->channels;
5328 if (sample_rate)
5329 *sample_rate = v->sample_rate;
5330 offset = data_len = 0;
5331 total = limit;
5332 data = (short *) malloc(total * sizeof(*data));
5333 if (data == NULL) {
5334 stb_vorbis_close(v);
5335 return -2;
5336 }
5337 for (;;) {
5338 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5339 if (n == 0) break;
5340 data_len += n;
5341 offset += n * v->channels;
5342 if (offset + limit > total) {
5343 short *data2;
5344 total *= 2;
5345 data2 = (short *) realloc(data, total * sizeof(*data));
5346 if (data2 == NULL) {
5347 free(data);
5348 stb_vorbis_close(v);
5349 return -2;
5350 }
5351 data = data2;
5352 }
5353 }
5354 *output = data;
5355 stb_vorbis_close(v);
5356 return data_len;
5357 }
5358 #endif // NO_STDIO
5359
5360 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5361 {
5362 int data_len, offset, total, limit, error;
5363 short *data;
5364 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5365 if (v == NULL) return -1;
5366 limit = v->channels * 4096;
5367 *channels = v->channels;
5368 if (sample_rate)
5369 *sample_rate = v->sample_rate;
5370 offset = data_len = 0;
5371 total = limit;
5372 data = (short *) malloc(total * sizeof(*data));
5373 if (data == NULL) {
5374 stb_vorbis_close(v);
5375 return -2;
5376 }
5377 for (;;) {
5378 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5379 if (n == 0) break;
5380 data_len += n;
5381 offset += n * v->channels;
5382 if (offset + limit > total) {
5383 short *data2;
5384 total *= 2;
5385 data2 = (short *) realloc(data, total * sizeof(*data));
5386 if (data2 == NULL) {
5387 free(data);
5388 stb_vorbis_close(v);
5389 return -2;
5390 }
5391 data = data2;
5392 }
5393 }
5394 *output = data;
5395 stb_vorbis_close(v);
5396 return data_len;
5397 }
5398 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5399
5400 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5401 {
5402 float **outputs;
5403 int len = num_floats / channels;
5404 int n=0;
5405 int z = f->channels;
5406 if (z > channels) z = channels;
5407 while (n < len) {
5408 int i,j;
5409 int k = f->channel_buffer_end - f->channel_buffer_start;
5410 if (n+k >= len) k = len - n;
5411 for (j=0; j < k; ++j) {
5412 for (i=0; i < z; ++i)
5413 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5414 for ( ; i < channels; ++i)
5415 *buffer++ = 0;
5416 }
5417 n += k;
5418 f->channel_buffer_start += k;
5419 if (n == len)
5420 break;
5421 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5422 break;
5423 }
5424 return n;
5425 }
5426
5427 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5428 {
5429 float **outputs;
5430 int n=0;
5431 int z = f->channels;
5432 if (z > channels) z = channels;
5433 while (n < num_samples) {
5434 int i;
5435 int k = f->channel_buffer_end - f->channel_buffer_start;
5436 if (n+k >= num_samples) k = num_samples - n;
5437 if (k) {
5438 for (i=0; i < z; ++i)
5439 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5440 for ( ; i < channels; ++i)
5441 memset(buffer[i]+n, 0, sizeof(float) * k);
5442 }
5443 n += k;
5444 f->channel_buffer_start += k;
5445 if (n == num_samples)
5446 break;
5447 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5448 break;
5449 }
5450 return n;
5451 }
5452 #endif // STB_VORBIS_NO_PULLDATA_API
5453
5454 /* Version history
5455 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5456 found with Mayhem by ForAllSecure
5457 1.16 - 2019-03-04 - fix warnings
5458 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5459 1.14 - 2018-02-11 - delete bogus dealloca usage
5460 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5461 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5462 1.11 - 2017-07-23 - fix MinGW compilation
5463 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5464 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5465 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5466 avoid discarding last frame of audio data
5467 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5468 some more crash fixes when out of memory or with corrupt files
5469 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5470 some crash fixes when out of memory or with corrupt files
5471 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5472 1.04 - 2014-08-27 - fix missing const-correct case in API
5473 1.03 - 2014-08-07 - Warning fixes
5474 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5475 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5476 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5477 (API change) report sample rate for decode-full-file funcs
5478 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5479 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5480 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5481 0.99993 - remove assert that fired on legal files with empty tables
5482 0.99992 - rewind-to-start
5483 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5484 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5485 0.9998 - add a full-decode function with a memory source
5486 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5487 0.9996 - query length of vorbis stream in samples/seconds
5488 0.9995 - bugfix to another optimization that only happened in certain files
5489 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5490 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5491 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5492 0.9991 - performance improvement of IMDCT
5493 0.999 - (should have been 0.9990) performance improvement of IMDCT
5494 0.998 - no-CRT support from Casey Muratori
5495 0.997 - bugfixes for bugs found by Terje Mathisen
5496 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5497 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5498 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5499 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5500 0.992 - fixes for MinGW warning
5501 0.991 - turn fast-float-conversion on by default
5502 0.990 - fix push-mode seek recovery if you seek into the headers
5503 0.98b - fix to bad release of 0.98
5504 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5505 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5506 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5507 0.95 - clamping code for 16-bit functions
5508 0.94 - not publically released
5509 0.93 - fixed all-zero-floor case (was decoding garbage)
5510 0.92 - fixed a memory leak
5511 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5512 0.90 - first public release
5513 */
5514
5515 #endif // STB_VORBIS_HEADER_ONLY
5516
5517
5518 /*
5519 ------------------------------------------------------------------------------
5520 This software is available under 2 licenses -- choose whichever you prefer.
5521 ------------------------------------------------------------------------------
5522 ALTERNATIVE A - MIT License
5523 Copyright (c) 2017 Sean Barrett
5524 Permission is hereby granted, free of charge, to any person obtaining a copy of
5525 this software and associated documentation files (the "Software"), to deal in
5526 the Software without restriction, including without limitation the rights to
5527 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5528 of the Software, and to permit persons to whom the Software is furnished to do
5529 so, subject to the following conditions:
5530 The above copyright notice and this permission notice shall be included in all
5531 copies or substantial portions of the Software.
5532 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5533 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5534 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5535 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5536 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5537 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5538 SOFTWARE.
5539 ------------------------------------------------------------------------------
5540 ALTERNATIVE B - Public Domain (www.unlicense.org)
5541 This is free and unencumbered software released into the public domain.
5542 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5543 software, either in source code form or as a compiled binary, for any purpose,
5544 commercial or non-commercial, and by any means.
5545 In jurisdictions that recognize copyright laws, the author or authors of this
5546 software dedicate any and all copyright interest in the software to the public
5547 domain. We make this dedication for the benefit of the public at large and to
5548 the detriment of our heirs and successors. We intend this dedication to be an
5549 overt act of relinquishment in perpetuity of all present and future rights to
5550 this software under copyright law.
5551 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5552 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5553 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5554 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5555 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5556 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5557 ------------------------------------------------------------------------------
5558 */