From 6c96fa6301b47826b6a237bcd7353c4f812bc6b2 Mon Sep 17 00:00:00 2001 From: David Reid Date: Thu, 5 Jul 2018 22:57:45 +1000 Subject: [PATCH] Update external audio libraries. --- src/external/dr_flac.h | 671 ++++--- src/external/dr_mp3.h | 71 +- src/external/dr_wav.h | 349 +++- src/external/mini_al.h | 4092 ++++++++++++++++++++++++++++++++++++---- 4 files changed, 4493 insertions(+), 690 deletions(-) diff --git a/src/external/dr_flac.h b/src/external/dr_flac.h index ff23bdd1..c836847e 100644 --- a/src/external/dr_flac.h +++ b/src/external/dr_flac.h @@ -1,5 +1,5 @@ // FLAC audio decoder. Public domain. See "unlicense" statement at the end of this file. -// dr_flac - v0.8g - 2018-04-19 +// dr_flac - v0.9.7 - 2018-07-05 // // David Reid - mackron@gmail.com @@ -111,7 +111,7 @@ // - This has not been tested on big-endian architectures. // - Rice codes in unencoded binary form (see https://xiph.org/flac/format.html#rice_partition) has not been tested. If anybody // knows where I can find some test files for this, let me know. -// - dr_flac is not thread-safe, but it's APIs can be called from any thread so long as you do your own synchronization. +// - dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. // - When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open() // returning inconsistent samples. @@ -467,7 +467,7 @@ typedef struct // value specified in the STREAMINFO block. drflac_uint8 channels; - // The bits per sample. Will be set to somthing like 16, 24, etc. + // The bits per sample. Will be set to something like 16, 24, etc. drflac_uint8 bitsPerSample; // The maximum block size, in samples. This number represents the number of samples in each channel (not combined). @@ -482,17 +482,16 @@ typedef struct // The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. drflac_container container; - - // The position of the seektable in the file. - drflac_uint64 seektablePos; - - // The size of the seektable. - drflac_uint32 seektableSize; + // The number of seekpoints in the seektable. + drflac_uint32 seekpointCount; // Information about the frame the decoder is currently sitting on. drflac_frame currentFrame; + // The index of the sample the decoder is currently sitting on. This is only used for seeking. + drflac_uint64 currentSample; + // The position of the first frame in the stream. This is only ever used for seeking. drflac_uint64 firstFramePos; @@ -504,6 +503,9 @@ typedef struct // A pointer to the decoded sample data. This is an offset of pExtraData. drflac_int32* pDecodedSamples; + // A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. + drflac_seekpoint* pSeekpoints; + // Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. void* _oggbs; @@ -577,7 +579,7 @@ drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, dr // See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close() drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData); -// The same as drflac_open_with_metadata(), except attemps to open the stream even when a header block is not present. +// The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. // // See also: drflac_open_with_metadata(), drflac_open_relaxed() drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData); @@ -766,7 +768,7 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, dr // Compile-time CPU feature support. #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) - #ifdef _MSC_VER + #if defined(_MSC_VER) && !defined(__clang__) #if _MSC_VER >= 1400 #include static void drflac__cpuid(int info[4], int fid) @@ -780,19 +782,8 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, dr #if defined(__GNUC__) || defined(__clang__) static void drflac__cpuid(int info[4], int fid) { - asm ( - "movl %[fid], %%eax\n\t" - "cpuid\n\t" - "movl %%eax, %[info0]\n\t" - "movl %%ebx, %[info1]\n\t" - "movl %%ecx, %[info2]\n\t" - "movl %%edx, %[info3]\n\t" - : [info0] "=rm"(info[0]), - [info1] "=rm"(info[1]), - [info2] "=rm"(info[2]), - [info3] "=rm"(info[3]) - : [fid] "rm"(fid) - : "eax", "ebx", "ecx", "edx" + __asm__ __volatile__ ( + "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) ); } #else @@ -1447,7 +1438,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { - *pResultOut = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); + *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); bs->consumedBits += bitCount; bs->cache <<= bitCount; } else { @@ -1460,13 +1451,13 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs); drflac_uint32 bitCountLo = bitCount - bitCountHi; - drflac_uint32 resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); + drflac_uint32 resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); if (!drflac__reload_cache(bs)) { return DRFLAC_FALSE; } - *pResultOut = (resultHi << bitCountLo) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); + *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); bs->consumedBits += bitCountLo; bs->cache <<= bitCountLo; return DRFLAC_TRUE; @@ -1492,6 +1483,7 @@ static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, dr return DRFLAC_TRUE; } +#ifdef DRFLAC_64BIT static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut) { drflac_assert(bitCount <= 64); @@ -1510,6 +1502,7 @@ static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, d *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo); return DRFLAC_TRUE; } +#endif // Function below is unused, but leaving it here in case I need to quickly add it again. #if 0 @@ -1742,7 +1735,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported() static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #ifdef DRFLAC_64BIT return (drflac_uint32)__lzcnt64(x); #else @@ -1805,7 +1798,6 @@ static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsign } drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache); - zeroCounter += setBitOffsetPlus1; setBitOffsetPlus1 += 1; bs->consumedBits += setBitOffsetPlus1; @@ -2299,31 +2291,74 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_b drflac_assert(count > 0); drflac_assert(pSamplesOut != NULL); - drflac_uint32 zeroCountPart; - drflac_uint32 riceParamPart; + static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - drflac_uint32 i = 0; + drflac_uint32 zeroCountPart0; + drflac_uint32 zeroCountPart1; + drflac_uint32 zeroCountPart2; + drflac_uint32 zeroCountPart3; + drflac_uint32 riceParamPart0; + drflac_uint32 riceParamPart1; + drflac_uint32 riceParamPart2; + drflac_uint32 riceParamPart3; + drflac_uint32 i4 = 0; + drflac_uint32 count4 = count >> 2; + while (i4 < count4) { + // Rice extraction. + if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + if (bitsPerSample > 16) { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); + } else { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); + } + + i4 += 1; + pSamplesOut += 4; + } + + drflac_uint32 i = i4 << 2; while (i < count) { // Rice extraction. - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) { + if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { return DRFLAC_FALSE; } // Rice reconstruction. - static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - - riceParamPart |= (zeroCountPart << riceParam); - riceParamPart = (riceParamPart >> 1) ^ t[riceParamPart & 0x01]; - //riceParamPart = (riceParamPart >> 1) ^ (~(riceParamPart & 0x01) + 1); + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + //riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1); // Sample reconstruction. if (bitsPerSample > 16) { - pSamplesOut[i] = riceParamPart + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); } else { - pSamplesOut[i] = riceParamPart + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); } i += 1; + pSamplesOut += 1; } return DRFLAC_TRUE; @@ -3124,6 +3159,8 @@ static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) drflac_bool32 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos); drflac_zero_memory(&pFlac->currentFrame, sizeof(pFlac->currentFrame)); + pFlac->currentSample = 0; + return result; } @@ -3136,18 +3173,42 @@ static DRFLAC_INLINE drflac_result drflac__seek_to_next_frame(drflac* pFlac) static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_uint64 sampleIndex) { - // We need to find the frame that contains the sample. To do this, we iterate over each frame and inspect it's header. If based on the - // header we can determine that the frame contains the sample, we do a full decode of that frame. - if (!drflac__seek_to_first_frame(pFlac)) { - return DRFLAC_FALSE; - } + drflac_assert(pFlac != NULL); - drflac_uint64 runningSampleCount = 0; - for (;;) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + drflac_bool32 isMidFrame = DRFLAC_FALSE; + + // If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. + drflac_uint64 runningSampleCount; + if (sampleIndex >= pFlac->currentSample) { + // Seeking forward. Need to seek from the current position. + runningSampleCount = pFlac->currentSample; + + // The frame header for the first frame may not yet have been read. We need to do that if necessary. + if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + // Seeking backwards. Need to seek from the start of the file. + runningSampleCount = 0; + + // Move back to the start. + if (!drflac__seek_to_first_frame(pFlac)) { return DRFLAC_FALSE; } + // Decode the first frame in preparation for sample-exact seeking below. + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } + } + + // We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its + // header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. + for (;;) { drflac_uint64 firstSampleInFrame = 0; drflac_uint64 lastSampleInFrame = 0; drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); @@ -3156,35 +3217,52 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend // it never existed and keep iterating. - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount); // <-- Safe cast because the maximum number of samples in a frame is 65535. - if (samplesToDecode == 0) { - return DRFLAC_TRUE; - } - return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0; // <-- If this fails, something bad has happened (it should never fail). - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. + drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. + return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). } else { - return DRFLAC_FALSE; + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; // CRC mismatch. Pretend this frame never existed. + } else { + return DRFLAC_FALSE; + } } + } else { + // We started seeking mid-frame which means we need to skip the frame decoding part. + return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; } } else { // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this // frame never existed and leave the running sample count untouched. - drflac_result result = drflac__seek_to_next_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - runningSampleCount += sampleCountInThisFrame; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningSampleCount += sampleCountInThisFrame; } else { - return DRFLAC_FALSE; + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; // CRC mismatch. Pretend this frame never existed. + } else { + return DRFLAC_FALSE; + } } + } else { + // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. + runningSampleCount += pFlac->currentFrame.samplesRemaining; + pFlac->currentFrame.samplesRemaining = 0; + isMidFrame = DRFLAC_FALSE; } } + + next_iteration: + // Grab the next frame in preparation for the next iteration. + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } } } @@ -3193,95 +3271,107 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui { drflac_assert(pFlac != NULL); - if (pFlac->seektablePos == 0) { + if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { return DRFLAC_FALSE; } - if (!drflac__seek_to_byte(&pFlac->bs, pFlac->seektablePos)) { - return DRFLAC_FALSE; - } - // The number of seek points is derived from the size of the SEEKTABLE block. - drflac_uint32 seekpointCount = pFlac->seektableSize / 18; // 18 = the size of each seek point. - if (seekpointCount == 0) { - return DRFLAC_FALSE; // Would this ever happen? - } - - - drflac_seekpoint closestSeekpoint = {0, 0, 0}; - - drflac_uint32 seekpointsRemaining = seekpointCount; - while (seekpointsRemaining > 0) { - drflac_seekpoint seekpoint; - if (!drflac__read_uint64(&pFlac->bs, 64, &seekpoint.firstSample)) { - break; - } - if (!drflac__read_uint64(&pFlac->bs, 64, &seekpoint.frameOffset)) { - break; - } - if (!drflac__read_uint16(&pFlac->bs, 16, &seekpoint.sampleCount)) { + drflac_uint32 iClosestSeekpoint = 0; + for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + if (pFlac->pSeekpoints[iSeekpoint].firstSample*pFlac->channels >= sampleIndex) { break; } - // Note that the seekpoint sample is based on a single channel. The input sample (sampleIndex) is based on interleaving, thus - // we need to multiple the seekpoint's sample by the channel count. - if (seekpoint.firstSample*pFlac->channels > sampleIndex) { - break; + iClosestSeekpoint = iSeekpoint; + } + + + drflac_bool32 isMidFrame = DRFLAC_FALSE; + + // At this point we should have found the seekpoint closest to our sample. If we are seeking forward and the closest seekpoint is _before_ the current sample, we + // just seek forward from where we are. Otherwise we start seeking from the seekpoint's first sample. + drflac_uint64 runningSampleCount; + if ((sampleIndex >= pFlac->currentSample) && (pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels <= pFlac->currentSample)) { + // Optimized case. Just seek forward from where we are. + runningSampleCount = pFlac->currentSample; + + // The frame header for the first frame may not yet have been read. We need to do that if necessary. + if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; } + } else { + // Slower case. Seek to the start of the seekpoint and then seek forward from there. + runningSampleCount = pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels; - closestSeekpoint = seekpoint; - seekpointsRemaining -= 1; - } - - // At this point we should have found the seekpoint closest to our sample. We need to seek to it using basically the same - // technique as we use with the brute force method. - if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos + closestSeekpoint.frameOffset)) { - return DRFLAC_FALSE; - } - - drflac_uint64 runningSampleCount = closestSeekpoint.firstSample*pFlac->channels; - for (;;) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos + pFlac->pSeekpoints[iClosestSeekpoint].frameOffset)) { return DRFLAC_FALSE; } + // Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } + } + + for (;;) { drflac_uint64 firstSampleInFrame = 0; drflac_uint64 lastSampleInFrame = 0; drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1; if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { - // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + // The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend // it never existed and keep iterating. - drflac_result result = drflac__decode_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount); // <-- Safe cast because the maximum number of samples in a frame is 65535. - if (samplesToDecode == 0) { - return DRFLAC_TRUE; - } - return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0; // <-- If this fails, something bad has happened (it should never fail). - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. + drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. + return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). } else { - return DRFLAC_FALSE; + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; // CRC mismatch. Pretend this frame never existed. + } else { + return DRFLAC_FALSE; + } } + } else { + // We started seeking mid-frame which means we need to skip the frame decoding part. + return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; } } else { // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this // frame never existed and leave the running sample count untouched. - drflac_result result = drflac__seek_to_next_frame(pFlac); - if (result == DRFLAC_SUCCESS) { - runningSampleCount += sampleCountInThisFrame; - } else { - if (result == DRFLAC_CRC_MISMATCH) { - continue; // CRC mismatch. Pretend this frame never existed. + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningSampleCount += sampleCountInThisFrame; } else { - return DRFLAC_FALSE; + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; // CRC mismatch. Pretend this frame never existed. + } else { + return DRFLAC_FALSE; + } } + } else { + // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. + runningSampleCount += pFlac->currentFrame.samplesRemaining; + pFlac->currentFrame.samplesRemaining = 0; + isMidFrame = DRFLAC_FALSE; } } + + next_iteration: + // Grab the next frame in preparation for the next iteration. + if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + return DRFLAC_FALSE; + } } } @@ -3389,10 +3479,8 @@ drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, return DRFLAC_TRUE; } -drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) +drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize) { - drflac_assert(pFlac != NULL); - // We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that // we'll be sitting on byte 42. drflac_uint64 runningFilePos = 42; @@ -3403,7 +3491,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) drflac_uint8 isLastBlock = 0; drflac_uint8 blockType; drflac_uint32 blockSize; - if (!drflac__read_and_decode_block_header(pFlac->bs.onRead, pFlac->bs.pUserData, &isLastBlock, &blockType, &blockSize)) { + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { return DRFLAC_FALSE; } runningFilePos += 4; @@ -3418,13 +3506,13 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) { case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: { - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } @@ -3434,7 +3522,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3445,13 +3533,13 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) seektablePos = runningFilePos; seektableSize = blockSize; - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } @@ -3469,7 +3557,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) pSeekpoint->sampleCount = drflac__be2host_16(pSeekpoint->sampleCount); } - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3477,13 +3565,13 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: { - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } @@ -3496,7 +3584,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4; metadata.data.vorbis_comment.comments = pRunningData; - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3504,13 +3592,13 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: { - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } @@ -3524,7 +3612,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) metadata.data.cuesheet.isCD = ((pRunningData[0] & 0x80) >> 7) != 0; pRunningData += 259; metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; metadata.data.cuesheet.pTrackData = (const drflac_uint8*)pRunningData; - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3532,13 +3620,13 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: { - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } @@ -3558,7 +3646,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) metadata.data.picture.indexColorCount = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4; metadata.data.picture.pictureDataSize = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4; metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3566,14 +3654,14 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) case DRFLAC_METADATA_BLOCK_TYPE_PADDING: { - if (pFlac->onMeta) { + if (onMeta) { metadata.data.padding.unused = 0; // Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. - if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) { - isLastBlock = DRFLAC_TRUE; // An error occured while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. } else { - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); } } } break; @@ -3581,31 +3669,31 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) case DRFLAC_METADATA_BLOCK_TYPE_INVALID: { // Invalid chunk. Just skip over this one. - if (pFlac->onMeta) { - if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) { - isLastBlock = DRFLAC_TRUE; // An error occured while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. + if (onMeta) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. } } - } + } break; default: { // It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we // can at the very least report the chunk to the application and let it look at the raw data. - if (pFlac->onMeta) { + if (onMeta) { void* pRawData = DRFLAC_MALLOC(blockSize); if (pRawData == NULL) { return DRFLAC_FALSE; } - if (pFlac->bs.onRead(pFlac->bs.pUserData, pRawData, blockSize) != blockSize) { + if (onRead(pUserData, pRawData, blockSize) != blockSize) { DRFLAC_FREE(pRawData); return DRFLAC_FALSE; } metadata.pRawData = pRawData; metadata.rawDataSize = blockSize; - pFlac->onMeta(pFlac->pUserDataMD, &metadata); + onMeta(pUserDataMD, &metadata); DRFLAC_FREE(pRawData); } @@ -3613,8 +3701,8 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) } // If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. - if (pFlac->onMeta == NULL && blockSize > 0) { - if (!pFlac->bs.onSeek(pFlac->bs.pUserData, blockSize, drflac_seek_origin_current)) { + if (onMeta == NULL && blockSize > 0) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { isLastBlock = DRFLAC_TRUE; } } @@ -3625,9 +3713,9 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac* pFlac) } } - pFlac->seektablePos = seektablePos; - pFlac->seektableSize = seektableSize; - pFlac->firstFramePos = runningFilePos; + *pSeektablePos = seektablePos; + *pSeektableSize = seektableSize; + *pFirstFramePos = runningFilePos; return DRFLAC_TRUE; } @@ -3917,7 +4005,7 @@ drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserD // The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works -// in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is architecured +// in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed // in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type // dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from // the physical Ogg bitstream are converted and delivered in native FLAC format. @@ -4024,6 +4112,8 @@ static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_og return DRFLAC_FALSE; } } +#else + (void)recoveryMethod; // <-- Silence a warning. #endif oggbs->currentPageHeader = header; @@ -4263,13 +4353,13 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde // // Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg // bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the - // standard drflac__*() APIs because that will read in extra data for it's own internal caching which in turn breaks + // standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks // the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read // using the native FLAC decoding APIs, such as drflac__read_next_frame_header(), need to be re-implemented so as to // avoid the use of the drflac_bs object. // // Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: - // 1) Seeking is already partially accellerated using Ogg's paging system in the code block above. + // 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. // 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. // 3) Simplicity. if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { @@ -4462,7 +4552,7 @@ drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_pro // If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next - // packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialiation phase for Ogg is to create the + // packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the // Ogg bistream object. pInit->hasMetadataBlocks = DRFLAC_TRUE; // <-- Always have at least VORBIS_COMMENT metadata block. return DRFLAC_TRUE; @@ -4604,43 +4694,116 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p #ifndef DR_FLAC_NO_OGG // There's additional data required for Ogg streams. + drflac_uint32 oggbsAllocationSize = 0; if (init.container == drflac_container_ogg) { - allocationSize += sizeof(drflac_oggbs); + oggbsAllocationSize = sizeof(drflac_oggbs); + allocationSize += oggbsAllocationSize; + } + + drflac_oggbs oggbs; + drflac_zero_memory(&oggbs, sizeof(oggbs)); + if (init.container == drflac_container_ogg) { + oggbs.onRead = onRead; + oggbs.onSeek = onSeek; + oggbs.pUserData = pUserData; + oggbs.currentBytePos = init.oggFirstBytePos; + oggbs.firstBytePos = init.oggFirstBytePos; + oggbs.serialNumber = init.oggSerial; + oggbs.bosPageHeader = init.oggBosHeader; + oggbs.bytesRemainingInPage = 0; } #endif + // This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to + // consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading + // and decoding the metadata. + drflac_uint64 firstFramePos = 42; // <-- We know we are at byte 42 at this point. + drflac_uint64 seektablePos = 0; + drflac_uint32 seektableSize = 0; + if (init.hasMetadataBlocks) { + drflac_read_proc onReadOverride = onRead; + drflac_seek_proc onSeekOverride = onSeek; + void* pUserDataOverride = pUserData; + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + onReadOverride = drflac__on_read_ogg; + onSeekOverride = drflac__on_seek_ogg; + pUserDataOverride = (void*)&oggbs; + } +#endif + + if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize)) { + return NULL; + } + + allocationSize += seektableSize; + } + + drflac* pFlac = (drflac*)DRFLAC_MALLOC(allocationSize); drflac__init_from_info(pFlac, &init); pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); #ifndef DR_FLAC_NO_OGG if (init.container == drflac_container_ogg) { - drflac_oggbs* oggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); - oggbs->onRead = onRead; - oggbs->onSeek = onSeek; - oggbs->pUserData = pUserData; - oggbs->currentBytePos = init.oggFirstBytePos; - oggbs->firstBytePos = init.oggFirstBytePos; - oggbs->serialNumber = init.oggSerial; - oggbs->bosPageHeader = init.oggBosHeader; - oggbs->bytesRemainingInPage = 0; + drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); + *pInternalOggbs = oggbs; // The Ogg bistream needs to be layered on top of the original bitstream. pFlac->bs.onRead = drflac__on_read_ogg; pFlac->bs.onSeek = drflac__on_seek_ogg; - pFlac->bs.pUserData = (void*)oggbs; - pFlac->_oggbs = (void*)oggbs; + pFlac->bs.pUserData = (void*)pInternalOggbs; + pFlac->_oggbs = (void*)pInternalOggbs; } #endif - // Decode metadata before returning. - if (init.hasMetadataBlocks) { - if (!drflac__read_and_decode_metadata(pFlac)) { - DRFLAC_FREE(pFlac); - return NULL; + pFlac->firstFramePos = firstFramePos; + + // NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) + { + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + else +#endif + { + // If we have a seektable we need to load it now, making sure we move back to where we were previously. + if (seektablePos != 0) { + pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); + pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); + + // Seek to the seektable, then just read directly into our seektable buffer. + if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { + if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { + // Endian swap. + for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + pFlac->pSeekpoints[iSeekpoint].firstSample = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstSample); + pFlac->pSeekpoints[iSeekpoint].frameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].frameOffset); + pFlac->pSeekpoints[iSeekpoint].sampleCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].sampleCount); + } + } else { + // Failed to read the seektable. Pretend we don't have one. + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + + // We need to seek back to where we were. If this fails it's a critical error. + if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFramePos, drflac_seek_origin_start)) { + return NULL; + } + } else { + // Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } } } + + // If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode // the first frame. if (!init.hasStreamInfoBlock) { @@ -4808,6 +4971,7 @@ static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_ drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; drflac_assert(memoryStream != NULL); drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start)); + drflac_assert(offset <= (drflac_int64)memoryStream->dataSize); if (origin == drflac_seek_origin_current) { if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) { @@ -5024,18 +5188,25 @@ drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 sampl break; // Couldn't read the next frame, so just break from the loop and return. } } else { - samplesRead += 1; - pFlac->currentFrame.samplesRemaining -= 1; - samplesToRead -= 1; + if (pFlac->currentFrame.samplesRemaining > samplesToRead) { + samplesRead += samplesToRead; + pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead; // <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536. + samplesToRead = 0; + } else { + samplesRead += pFlac->currentFrame.samplesRemaining; + samplesToRead -= pFlac->currentFrame.samplesRemaining; + pFlac->currentFrame.samplesRemaining = 0; + } } } + pFlac->currentSample += samplesRead; return samplesRead; } drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut) { - // Note that is allowed to be null, in which case this will be treated as something like a seek. + // Note that is allowed to be null, in which case this will act like a seek. if (pFlac == NULL || samplesToRead == 0) { return 0; } @@ -5062,10 +5233,11 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac drflac_uint64 misalignedSampleCount = samplesReadFromFrameSoFar % channelCount; if (misalignedSampleCount > 0) { drflac_uint64 misalignedSamplesRead = drflac__read_s32__misaligned(pFlac, misalignedSampleCount, bufferOut); - samplesRead += misalignedSamplesRead; + samplesRead += misalignedSamplesRead; samplesReadFromFrameSoFar += misalignedSamplesRead; - bufferOut += misalignedSamplesRead; - samplesToRead -= misalignedSamplesRead; + bufferOut += misalignedSamplesRead; + samplesToRead -= misalignedSamplesRead; + pFlac->currentSample += misalignedSamplesRead; } @@ -5150,14 +5322,14 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac } drflac_uint64 alignedSamplesRead = alignedSampleCountPerChannel * channelCount; - samplesRead += alignedSamplesRead; + samplesRead += alignedSamplesRead; samplesReadFromFrameSoFar += alignedSamplesRead; - bufferOut += alignedSamplesRead; - samplesToRead -= alignedSamplesRead; + bufferOut += alignedSamplesRead; + samplesToRead -= alignedSamplesRead; + pFlac->currentSample += alignedSamplesRead; pFlac->currentFrame.samplesRemaining -= (unsigned int)alignedSamplesRead; - // At this point we may still have some excess samples left to read. if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0) { drflac_uint64 excessSamplesRead = 0; @@ -5167,10 +5339,11 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac excessSamplesRead = drflac__read_s32__misaligned(pFlac, pFlac->currentFrame.samplesRemaining, bufferOut); } - samplesRead += excessSamplesRead; + samplesRead += excessSamplesRead; samplesReadFromFrameSoFar += excessSamplesRead; - bufferOut += excessSamplesRead; - samplesToRead -= excessSamplesRead; + bufferOut += excessSamplesRead; + samplesToRead -= excessSamplesRead; + pFlac->currentSample += excessSamplesRead; } } } @@ -5196,8 +5369,8 @@ drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac } totalSamplesRead += samplesJustRead; - samplesToRead -= samplesJustRead; - pBufferOut += samplesJustRead; + samplesToRead -= samplesJustRead; + pBufferOut += samplesJustRead; } return totalSamplesRead; @@ -5221,8 +5394,8 @@ drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* } totalSamplesRead += samplesJustRead; - samplesToRead -= samplesJustRead; - pBufferOut += samplesJustRead; + samplesToRead -= samplesJustRead; + pBufferOut += samplesJustRead; } return totalSamplesRead; @@ -5241,33 +5414,57 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) } if (sampleIndex == 0) { + pFlac->currentSample = 0; return drflac__seek_to_first_frame(pFlac); - } + } else { + drflac_bool32 wasSuccessful = DRFLAC_FALSE; - // Clamp the sample to the end. - if (sampleIndex >= pFlac->totalSampleCount) { - sampleIndex = pFlac->totalSampleCount - 1; - } - - - // Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so - // we'll instead use Ogg's natural seeking facility. -#ifndef DR_FLAC_NO_OGG - if (pFlac->container == drflac_container_ogg) - { - return drflac_ogg__seek_to_sample(pFlac, sampleIndex); - } - else -#endif - { - // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. - if (!drflac__seek_to_sample__seek_table(pFlac, sampleIndex)) { - return drflac__seek_to_sample__brute_force(pFlac, sampleIndex); + // Clamp the sample to the end. + if (sampleIndex >= pFlac->totalSampleCount) { + sampleIndex = pFlac->totalSampleCount - 1; } + + // If the target sample and the current sample are in the same frame we just move the position forward. + if (sampleIndex > pFlac->currentSample) { + // Forward. + drflac_uint32 offset = (drflac_uint32)(sampleIndex - pFlac->currentSample); + if (pFlac->currentFrame.samplesRemaining > offset) { + pFlac->currentFrame.samplesRemaining -= offset; + pFlac->currentSample = sampleIndex; + return DRFLAC_TRUE; + } + } else { + // Backward. + drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentSample - sampleIndex); + drflac_uint32 currentFrameSampleCount = pFlac->currentFrame.header.blockSize * drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); + drflac_uint32 currentFrameSamplesConsumed = (drflac_uint32)(currentFrameSampleCount - pFlac->currentFrame.samplesRemaining); + if (currentFrameSamplesConsumed > offsetAbs) { + pFlac->currentFrame.samplesRemaining += offsetAbs; + pFlac->currentSample = sampleIndex; + return DRFLAC_TRUE; + } + } + + // Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so + // we'll instead use Ogg's natural seeking facility. + #ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + wasSuccessful = drflac_ogg__seek_to_sample(pFlac, sampleIndex); + } + else + #endif + { + // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. + wasSuccessful = drflac__seek_to_sample__seek_table(pFlac, sampleIndex); + if (!wasSuccessful) { + wasSuccessful = drflac__seek_to_sample__brute_force(pFlac, sampleIndex); + } + } + + pFlac->currentSample = sampleIndex; + return wasSuccessful; } - - - return DRFLAC_TRUE; } @@ -5524,6 +5721,32 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, dr // REVISION HISTORY // +// v0.9.7 - 2018-07-05 +// - Fix a warning. +// +// v0.9.6 - 2018-06-29 +// - Fix some typos. +// +// v0.9.5 - 2018-06-23 +// - Fix some warnings. +// +// v0.9.4 - 2018-06-14 +// - Optimizations to seeking. +// - Clean up. +// +// v0.9.3 - 2018-05-22 +// - Bug fix. +// +// v0.9.2 - 2018-05-12 +// - Fix a compilation error due to a missing break statement. +// +// v0.9.1 - 2018-04-29 +// - Fix compilation error with Clang. +// +// v0.9 - 2018-04-24 +// - Fix Clang build. +// - Start using major.minor.revision versioning. +// // v0.8g - 2018-04-19 // - Fix build on non-x86/x64 architectures. // diff --git a/src/external/dr_mp3.h b/src/external/dr_mp3.h index 564bf377..467d319d 100644 --- a/src/external/dr_mp3.h +++ b/src/external/dr_mp3.h @@ -1,5 +1,5 @@ // MP3 audio decoder. Public domain. See "unlicense" statement at the end of this file. -// dr_mp3 - v0.2.3 - 2018-04-29 +// dr_mp3 - v0.2.5 - 2018-06-22 // // David Reid - mackron@gmail.com // @@ -339,7 +339,7 @@ void drmp3_free(void* p); #define DRMP3_HDR_GET_LAYER(h) (((h[1]) >> 1) & 3) #define DRMP3_HDR_GET_BITRATE(h) ((h[2]) >> 4) #define DRMP3_HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3) -#define DRMP3_HDR_GET_MY_SAMPLE_RATE(h) (DRMP3_HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) +#define DRMP3_HDR_GET_MY_SAMPLE_RATE(h) (DRMP3_HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) #define DRMP3_HDR_IS_FRAME_576(h) ((h[1] & 14) == 2) #define DRMP3_HDR_IS_LAYER_1(h) ((h[1] & 6) == 6) @@ -412,10 +412,10 @@ static int drmp3_have_simd() #ifdef MINIMP3_TEST static int g_counter; if (g_counter++ > 100) - goto test_nosimd; + return 0; #endif if (g_have_simd) - return g_have_simd - 1; + goto end; drmp3_cpuid(CPUInfo, 0); if (CPUInfo[0] > 0) { @@ -423,11 +423,9 @@ static int drmp3_have_simd() g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */ return g_have_simd - 1; } -#ifdef MINIMP3_TEST -test_nosimd: -#endif - g_have_simd = 1; - return 0; + +end: + return g_have_simd - 1; #endif } #elif defined(__ARM_NEON) || defined(__aarch64__) @@ -760,8 +758,7 @@ static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr) { - static const drmp3_uint8 g_scf_long[9][23] = { - { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + static const drmp3_uint8 g_scf_long[8][23] = { { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 }, { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, @@ -771,8 +768,7 @@ static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drm { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 }, { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 } }; - static const drmp3_uint8 g_scf_short[9][40] = { - { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + static const drmp3_uint8 g_scf_short[8][40] = { { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, @@ -782,8 +778,7 @@ static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drm { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } }; - static const drmp3_uint8 g_scf_mixed[9][40] = { - { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + static const drmp3_uint8 g_scf_mixed[8][40] = { { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, @@ -796,7 +791,7 @@ static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drm unsigned tables, scfsi = 0; int main_data_begin, part_23_sum = 0; - int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); + int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0); int gr_count = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2; if (DRMP3_HDR_TEST_MPEG1(hdr)) @@ -1020,25 +1015,25 @@ static float drmp3_L3_pow_43(int x) static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *gr_info, const float *scf, int layer3gr_limit) { static const float g_pow43_signed[32] = { 0,0,1,-1,2.519842f,-2.519842f,4.326749f,-4.326749f,6.349604f,-6.349604f,8.549880f,-8.549880f,10.902724f,-10.902724f,13.390518f,-13.390518f,16.000000f,-16.000000f,18.720754f,-18.720754f,21.544347f,-21.544347f,24.463781f,-24.463781f,27.473142f,-27.473142f,30.567351f,-30.567351f,33.741992f,-33.741992f,36.993181f,-36.993181f }; - static const drmp3_int16 tab0[32] = { 0, }; - static const drmp3_int16 tab1[] = { 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256 }; - static const drmp3_int16 tab2[] = { -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288 }; - static const drmp3_int16 tab3[] = { -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288 }; - static const drmp3_int16 tab5[] = { -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258 }; - static const drmp3_int16 tab6[] = { -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259 }; - static const drmp3_int16 tab7[] = { -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258 }; - static const drmp3_int16 tab8[] = { -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258 }; - static const drmp3_int16 tab9[] = { -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259 }; - static const drmp3_int16 tab10[] = { -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258 }; - static const drmp3_int16 tab11[] = { -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290 }; - static const drmp3_int16 tab12[] = { -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259 }; - static const drmp3_int16 tab13[] = { -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258 }; - static const drmp3_int16 tab15[] = { -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259 }; - static const drmp3_int16 tab16[] = { -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258 }; - static const drmp3_int16 tab24[] = { -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; + static const drmp3_int16 tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, + -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288, + -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288, + -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258, + -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259, + -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258, + -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258, + -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259, + -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258, + -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290, + -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259, + -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258, + -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259, + -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258, + -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; static const drmp3_uint8 tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205}; static const drmp3_uint8 tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 }; - static const drmp3_int16 * const tabindex[2*16] = { tab0,tab1,tab2,tab3,tab0,tab5,tab6,tab7,tab8,tab9,tab10,tab11,tab12,tab13,tab0,tab15,tab16,tab16,tab16,tab16,tab16,tab16,tab16,tab16,tab24,tab24,tab24,tab24,tab24,tab24,tab24,tab24 }; + static const drmp3_int16 tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 }; static const drmp3_uint8 g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 }; #define DRMP3_PEEK_BITS(n) (bs_cache >> (32 - n)) @@ -1058,7 +1053,7 @@ static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *g { int tab_num = gr_info->table_select[ireg]; int sfb_cnt = gr_info->region_count[ireg++]; - const short *codebook = tabindex[tab_num]; + const short *codebook = tabs + tabindex[tab_num]; int linbits = g_linbits[tab_num]; do { @@ -1227,7 +1222,7 @@ static void drmp3_L3_intensity_stereo(float *left, drmp3_uint8 *ist_pos, const d int prev = itop - max_blocks; ist_pos[itop] = (drmp3_uint8)(max_band[i] >= prev ? default_pos : ist_pos[prev]); } - drmp3_L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress&1); + drmp3_L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1); } static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sfb) @@ -2776,6 +2771,12 @@ void drmp3_free(void* p) // REVISION HISTORY // =============== // +// v0.2.5 - 2018-06-22 +// - Bring up to date with minimp3. +// +// v0.2.4 - 2018-05-12 +// - Bring up to date with minimp3. +// // v0.2.3 - 2018-04-29 // - Fix TCC build. // diff --git a/src/external/dr_wav.h b/src/external/dr_wav.h index f11dc2a8..b071d00e 100644 --- a/src/external/dr_wav.h +++ b/src/external/dr_wav.h @@ -1,5 +1,5 @@ // WAV audio loader and writer. Public domain. See "unlicense" statement at the end of this file. -// dr_wav - v0.7f - 2018-02-05 +// dr_wav - v0.8.1 - 2018-06-29 // // David Reid - mackron@gmail.com @@ -50,7 +50,7 @@ // drwav_free(pSampleData); // // The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in -// this case), but you can still output the audio data in it's internal format (see notes below for supported formats): +// this case), but you can still output the audio data in its internal format (see notes below for supported formats): // // size_t samplesRead = drwav_read(&wav, wav.totalSampleCount, pDecodedInterleavedSamples); // @@ -246,7 +246,7 @@ typedef struct // Block align. This is equal to the number of channels * bytes per sample. drwav_uint16 blockAlign; - // Bit's per sample. + // Bits per sample. drwav_uint16 bitsPerSample; // The size of the extended data. Only used internally for validation, but left here for informational purposes. @@ -292,7 +292,7 @@ typedef struct // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. drwav_uint16 channels; - // The bits per sample. Will be set to somthing like 16, 24, etc. + // The bits per sample. Will be set to something like 16, 24, etc. drwav_uint16 bitsPerSample; // The number of bytes per sample. @@ -316,6 +316,14 @@ typedef struct drwav_uint64 bytesRemaining; + // Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always + // set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation. + drwav_uint64 dataChunkDataSizeTargetWrite; + + // Keeps track of whether or not the wav writer was initialized in sequential mode. + drwav_bool32 isSequentialWrite; + + // A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory(). drwav__memory_stream memoryStream; drwav__memory_stream_write memoryStreamWrite; @@ -381,11 +389,15 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS // This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory() // to open the stream from a file or from a block of memory respectively. // +// If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform +// a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek. +// // If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate // a drwav object on the heap and return a pointer to it. // // See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit() drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); +drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData); // Uninitializes the given drwav object. // @@ -403,8 +415,8 @@ void drwav_uninit(drwav* pWav); // // Close the loader with drwav_close(). // -// This is the lowest level function for opening a WAV file. You can also use drwav_open_file() and drwav_open_memory() -// to open the stream from a file or from a block of memory respectively. +// You can also use drwav_open_file() and drwav_open_memory() to open the stream from a file or from a block of +// memory respectively. // // This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before // initializing it. @@ -422,14 +434,15 @@ drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserDat // // Close the loader with drwav_close(). // -// This is the lowest level function for opening a WAV file. You can also use drwav_open_file_write() and drwav_open_memory_write() -// to open the stream from a file or from a block of memory respectively. +// You can also use drwav_open_file_write() and drwav_open_memory_write() to open the stream from a file or from a block +// of memory respectively. // // This is different from drwav_init_write() in that it will allocate the drwav object for you via DRWAV_MALLOC() before // initializing it. // // See also: drwav_open_file_write(), drwav_open_memory_write(), drwav_close() drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); +drwav* drwav_open_write_sequential(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData); // Uninitializes and deletes the the given drwav object. // @@ -478,7 +491,7 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p -//// Convertion Utilities //// +//// Conversion Utilities //// #ifndef DR_WAV_NO_CONVERSION_API // Reads a chunk of audio data and converts it to signed 16-bit PCM samples. @@ -587,6 +600,7 @@ drwav_bool32 drwav_init_file(drwav* pWav, const char* filename); // objects because the operating system may restrict the number of file handles an application can have open at // any given time. drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat); +drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); // Helper for opening a wave file using stdio. // @@ -601,6 +615,7 @@ drwav* drwav_open_file(const char* filename); // objects because the operating system may restrict the number of file handles an application can have open at // any given time. drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat); +drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); #endif //DR_WAV_NO_STDIO @@ -619,6 +634,7 @@ drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize); // The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be // considered valid until after drwav_uninit() has been called anyway. drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat); +drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); // Helper for opening a loader from a pre-allocated memory buffer. // @@ -635,6 +651,7 @@ drwav* drwav_open_memory(const void* data, size_t dataSize); // The buffer will remain allocated even after drwav_close() is called. Indeed, the buffer should not be // considered valid until after drwav_close() has been called anyway. drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat); +drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount); #ifndef DR_WAV_NO_CONVERSION_API @@ -822,6 +839,8 @@ static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 fo drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); +drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); +drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); typedef struct { @@ -867,7 +886,7 @@ static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUser pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24; // <-- Subtract 24 because w64 includes the size of the header. pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 8); - pRunningBytesReadOut += 24; + *pRunningBytesReadOut += 24; } return DRWAV_TRUE; @@ -902,8 +921,8 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe } - // Skip junk chunks. - if ((container == drwav_container_riff && drwav__fourcc_equal(header.id.fourcc, "JUNK")) || (container == drwav_container_w64 && drwav__guid_equal(header.id.guid, drwavGUID_W64_JUNK))) { + // Skip non-fmt chunks. + if ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) { if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) { return DRWAV_FALSE; } @@ -999,6 +1018,23 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe #ifndef DR_WAV_NO_STDIO +FILE* drwav_fopen(const char* filePath, const char* openMode) +{ + FILE* pFile; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (fopen_s(&pFile, filePath, openMode) != 0) { + return DRWAV_FALSE; + } +#else + pFile = fopen(filePath, openMode); + if (pFile == NULL) { + return DRWAV_FALSE; + } +#endif + + return pFile; +} + static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead) { return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData); @@ -1016,51 +1052,41 @@ static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek drwav_bool32 drwav_init_file(drwav* pWav, const char* filename) { - FILE* pFile; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (fopen_s(&pFile, filename, "rb") != 0) { - return DRWAV_FALSE; - } -#else - pFile = fopen(filename, "rb"); + FILE* pFile = drwav_fopen(filename, "rb"); if (pFile == NULL) { return DRWAV_FALSE; } -#endif return drwav_init(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); } -drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat) + +drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) { - FILE* pFile; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (fopen_s(&pFile, filename, "wb") != 0) { - return DRWAV_FALSE; - } -#else - pFile = fopen(filename, "wb"); + FILE* pFile = drwav_fopen(filename, "wb"); if (pFile == NULL) { return DRWAV_FALSE; } -#endif - return drwav_init_write(pWav, pFormat, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); + return drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); +} + +drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat) +{ + return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE); +} + +drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +{ + return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE); } drwav* drwav_open_file(const char* filename) { - FILE* pFile; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (fopen_s(&pFile, filename, "rb") != 0) { - return NULL; - } -#else - pFile = fopen(filename, "rb"); + FILE* pFile = drwav_fopen(filename, "rb"); if (pFile == NULL) { - return NULL; + return DRWAV_FALSE; } -#endif drwav* pWav = drwav_open(drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); if (pWav == NULL) { @@ -1071,21 +1097,15 @@ drwav* drwav_open_file(const char* filename) return pWav; } -drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat) -{ - FILE* pFile; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (fopen_s(&pFile, filename, "wb") != 0) { - return NULL; - } -#else - pFile = fopen(filename, "wb"); - if (pFile == NULL) { - return NULL; - } -#endif - drwav* pWav = drwav_open_write(pFormat, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); +drwav* drwav_open_file_write__internal(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) +{ + FILE* pFile = drwav_fopen(filename, "wb"); + if (pFile == NULL) { + return DRWAV_FALSE; + } + + drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile); if (pWav == NULL) { fclose(pFile); return NULL; @@ -1093,6 +1113,16 @@ drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFor return pWav; } + +drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat) +{ + return drwav_open_file_write__internal(filename, pFormat, 0, DRWAV_FALSE); +} + +drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +{ + return drwav_open_file_write__internal(filename, pFormat, totalSampleCount, DRWAV_TRUE); +} #endif //DR_WAV_NO_STDIO @@ -1232,7 +1262,8 @@ drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize) return DRWAV_TRUE; } -drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) + +drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) { if (ppData == NULL) { return DRWAV_FALSE; @@ -1249,7 +1280,7 @@ drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSi memoryStreamWrite.dataCapacity = 0; memoryStreamWrite.currentWritePos = 0; - if (!drwav_init_write(pWav, pFormat, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite)) { + if (!drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite)) { return DRWAV_FALSE; } @@ -1258,6 +1289,17 @@ drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSi return DRWAV_TRUE; } +drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) +{ + return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE); +} + +drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +{ + return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE); +} + + drwav* drwav_open_memory(const void* data, size_t dataSize) { if (data == NULL || dataSize == 0) { @@ -1280,7 +1322,8 @@ drwav* drwav_open_memory(const void* data, size_t dataSize) return pWav; } -drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) + +drwav* drwav_open_memory_write__internal(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential) { if (ppData == NULL) { return NULL; @@ -1297,7 +1340,7 @@ drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_dat memoryStreamWrite.dataCapacity = 0; memoryStreamWrite.currentWritePos = 0; - drwav* pWav = drwav_open_write(pFormat, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite); + drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite); if (pWav == NULL) { return NULL; } @@ -1307,6 +1350,16 @@ drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_dat return pWav; } +drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat) +{ + return drwav_open_memory_write__internal(ppData, pDataSize, pFormat, 0, DRWAV_FALSE); +} + +drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount) +{ + return drwav_open_memory_write__internal(ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE); +} + drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData) { @@ -1392,7 +1445,7 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS } - // The next 24 bytes should be the "fmt " chunk. + // The next bytes should be the "fmt " chunk. drwav_fmt fmt; if (!drwav__read_fmt(onRead, onSeek, pUserData, pWav->container, &pWav->dataChunkDataPos, &fmt)) { return DRWAV_FALSE; // Failed to read the "fmt " chunk. @@ -1456,7 +1509,7 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS if (onRead(pUserData, &sampleCountFromFactChunk, 8) != 8) { return DRWAV_FALSE; } - pWav->dataChunkDataPos += 4; + pWav->dataChunkDataPos += 8; dataSize -= 8; } } @@ -1518,8 +1571,8 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS #ifdef DR_WAV_LIBSNDFILE_COMPAT // I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website), // it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count - // from the number of blocks, however this results in the inclusion of the extra silent samples at the end of the last block. The correct - // way to know the total sample count is to inspect the "fact" chunk which should always be present for compressed formats, and should + // from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct + // way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should // always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my // correctness tests against libsndfile, and is disabled by default. if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { @@ -1535,12 +1588,51 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS return DRWAV_TRUE; } -drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) + +drwav_uint32 drwav_riff_chunk_size_riff(drwav_uint64 dataChunkSize) { - if (onWrite == NULL || onSeek == NULL) { + if (dataChunkSize <= (0xFFFFFFFF - 36)) { + return 36 + (drwav_uint32)dataChunkSize; + } else { + return 0xFFFFFFFF; + } +} + +drwav_uint32 drwav_data_chunk_size_riff(drwav_uint64 dataChunkSize) +{ + if (dataChunkSize <= 0xFFFFFFFF) { + return (drwav_uint32)dataChunkSize; + } else { + return 0xFFFFFFFF; + } +} + +drwav_uint64 drwav_riff_chunk_size_w64(drwav_uint64 dataChunkSize) +{ + return 80 + 24 + dataChunkSize; // +24 because W64 includes the size of the GUID and size fields. +} + +drwav_uint64 drwav_data_chunk_size_w64(drwav_uint64 dataChunkSize) +{ + return 24 + dataChunkSize; // +24 because W64 includes the size of the GUID and size fields. +} + + +drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) +{ + if (pWav == NULL) { return DRWAV_FALSE; } + if (onWrite == NULL) { + return DRWAV_FALSE; + } + + if (!isSequential && onSeek == NULL) { + return DRWAV_FALSE; // <-- onSeek is required when in non-sequential mode. + } + + // Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) { return DRWAV_FALSE; @@ -1557,20 +1649,42 @@ drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drw pWav->fmt.formatTag = (drwav_uint16)pFormat->format; pWav->fmt.channels = (drwav_uint16)pFormat->channels; pWav->fmt.sampleRate = pFormat->sampleRate; - pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) >> 3); - pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) >> 3); + pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8); + pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8); pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample; pWav->fmt.extendedSize = 0; + pWav->isSequentialWrite = isSequential; + size_t runningPos = 0; + // The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In + // sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non- + // sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek. + drwav_uint64 initialDataChunkSize = 0; + if (isSequential) { + initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8; + + // The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64 + // so for the sake of simplicity I'm not doing any validation for that. + if (pFormat->container == drwav_container_riff) { + if (initialDataChunkSize > (0xFFFFFFFF - 36)) { + return DRWAV_FALSE; // Not enough room to store every sample. + } + } + } + + pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize; + + // "RIFF" chunk. - drwav_uint64 chunkSizeRIFF = 0; if (pFormat->container == drwav_container_riff) { + drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize; // +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] runningPos += pWav->onWrite(pUserData, "RIFF", 4); runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 4); runningPos += pWav->onWrite(pUserData, "WAVE", 4); } else { + drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize; // +24 because W64 includes the size of the GUID and size fields. runningPos += pWav->onWrite(pUserData, drwavGUID_W64_RIFF, 16); runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 8); runningPos += pWav->onWrite(pUserData, drwavGUID_W64_WAVE, 16); @@ -1596,14 +1710,14 @@ drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drw runningPos += pWav->onWrite(pUserData, &pWav->fmt.bitsPerSample, 2); pWav->dataChunkDataPos = runningPos; - pWav->dataChunkDataSize = 0; // "data" chunk. - drwav_uint64 chunkSizeDATA = 0; if (pFormat->container == drwav_container_riff) { + drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize; runningPos += pWav->onWrite(pUserData, "data", 4); runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 4); } else { + drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; // +24 because W64 includes the size of the GUID and size fields. runningPos += pWav->onWrite(pUserData, drwavGUID_W64_DATA, 16); runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 8); } @@ -1633,16 +1747,32 @@ drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drw return DRWAV_TRUE; } + +drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) +{ + return drwav_init_write__internal(pWav, pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData); // DRWAV_FALSE = Not Sequential +} + +drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData) +{ + return drwav_init_write__internal(pWav, pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData); // DRWAV_TRUE = Sequential +} + void drwav_uninit(drwav* pWav) { if (pWav == NULL) { return; } - // If the drwav object was opened in write mode we'll need to finialize a few things: - // - Make sure the "data" chunk is aligned to 16-bits + // If the drwav object was opened in write mode we'll need to finalize a few things: + // - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers. // - Set the size of the "data" chunk. if (pWav->onWrite != NULL) { + // Validation for sequential mode. + if (pWav->isSequentialWrite) { + drwav_assert(pWav->dataChunkDataSize == pWav->dataChunkDataSizeTargetWrite); + } + // Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. drwav_uint32 paddingSize = 0; if (pWav->container == drwav_container_riff) { @@ -1657,42 +1787,31 @@ void drwav_uninit(drwav* pWav) } - // Chunk sizes. - if (pWav->onSeek) { + // Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need + // to do this when using non-sequential mode. + if (pWav->onSeek && !pWav->isSequentialWrite) { if (pWav->container == drwav_container_riff) { // The "RIFF" chunk size. if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) { - drwav_uint32 riffChunkSize = 36; - if (pWav->dataChunkDataSize <= (0xFFFFFFFF - 36)) { - riffChunkSize = 36 + (drwav_uint32)pWav->dataChunkDataSize; - } else { - riffChunkSize = 0xFFFFFFFF; - } - + drwav_uint32 riffChunkSize = drwav_riff_chunk_size_riff(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &riffChunkSize, 4); } // the "data" chunk size. if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) { - drwav_uint32 dataChunkSize = 0; - if (pWav->dataChunkDataSize <= 0xFFFFFFFF) { - dataChunkSize = (drwav_uint32)pWav->dataChunkDataSize; - } else { - dataChunkSize = 0xFFFFFFFF; - } - + drwav_uint32 dataChunkSize = drwav_data_chunk_size_riff(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &dataChunkSize, 4); } } else { // The "RIFF" chunk size. if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) { - drwav_uint64 riffChunkSize = 80 + 24 + pWav->dataChunkDataSize; + drwav_uint64 riffChunkSize = drwav_riff_chunk_size_w64(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &riffChunkSize, 8); } // The "data" chunk size. if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) { - drwav_uint64 dataChunkSize = 24 + pWav->dataChunkDataSize; // +24 because W64 includes the size of the GUID and size fields. + drwav_uint64 dataChunkSize = drwav_data_chunk_size_w64(pWav->dataChunkDataSize); pWav->onWrite(pWav->pUserData, &dataChunkSize, 8); } } @@ -1724,14 +1843,15 @@ drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserDat return pWav; } -drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) + +drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) { drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav)); if (pWav == NULL) { return NULL; } - if (!drwav_init_write(pWav, pFormat, onWrite, onSeek, pUserData)) { + if (!drwav_init_write__internal(pWav, pFormat, totalSampleCount, isSequential, onWrite, onSeek, pUserData)) { DRWAV_FREE(pWav); return NULL; } @@ -1739,6 +1859,16 @@ drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWri return pWav; } +drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData) +{ + return drwav_open_write__internal(pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData); +} + +drwav* drwav_open_write_sequential(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData) +{ + return drwav_open_write__internal(pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData); +} + void drwav_close(drwav* pWav) { drwav_uninit(pWav); @@ -1784,6 +1914,10 @@ drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOu drwav_bool32 drwav_seek_to_first_sample(drwav* pWav) { + if (pWav->onWrite != NULL) { + return DRWAV_FALSE; // No seeking in write mode. + } + if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) { return DRWAV_FALSE; } @@ -1800,6 +1934,10 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample) { // Seeking should be compatible with wave files > 2GB. + if (pWav->onWrite != NULL) { + return DRWAV_FALSE; // No seeking in write mode. + } + if (pWav == NULL || pWav->onSeek == NULL) { return DRWAV_FALSE; } @@ -1911,8 +2049,25 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p return 0; } - size_t bytesWritten = drwav_write_raw(pWav, (size_t)bytesToWrite, pData); - return ((drwav_uint64)bytesWritten * 8) / pWav->bitsPerSample; + drwav_uint64 bytesWritten = 0; + const drwav_uint8* pRunningData = (const drwav_uint8*)pData; + while (bytesToWrite > 0) { + drwav_uint64 bytesToWriteThisIteration = bytesToWrite; + if (bytesToWriteThisIteration > SIZE_MAX) { + bytesToWriteThisIteration = SIZE_MAX; + } + + size_t bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData); + if (bytesJustWritten == 0) { + break; + } + + bytesToWrite -= bytesJustWritten; + bytesWritten += bytesJustWritten; + pRunningData += bytesJustWritten; + } + + return (bytesWritten * 8) / pWav->bitsPerSample; } @@ -3439,6 +3594,16 @@ void drwav_free(void* pDataReturnedByOpenAndRead) // REVISION HISTORY // +// v0.8.1 - 2018-06-29 +// - Add support for sequential writing APIs. +// - Disable seeking in write mode. +// - Fix bugs with Wave64. +// - Fix typos. +// +// v0.8 - 2018-04-27 +// - Bug fix. +// - Start using major.minor.revision versioning. +// // v0.7f - 2018-02-05 // - Restrict ADPCM formats to a maximum of 2 channels. // diff --git a/src/external/mini_al.h b/src/external/mini_al.h index 4d05b218..c3004d76 100644 --- a/src/external/mini_al.h +++ b/src/external/mini_al.h @@ -16,6 +16,7 @@ // - WASAPI // - DirectSound // - WinMM +// - Core Audio (macOS, iOS) // - ALSA // - PulseAudio // - JACK @@ -24,8 +25,6 @@ // - OpenAL // - SDL // - Null (Silence) -// - ... and more in the future. -// - Core Audio (OSX, iOS) // // Supported Formats: // - Unsigned 8-bit PCM @@ -43,11 +42,9 @@ // // You can then #include this file in other parts of the program as you would with any other header file. // -// The implementation of this library will try #include-ing necessary headers for some backends. If you do not have -// the development packages for any particular backend you can disable it by #define-ing the appropriate MAL_NO_* -// option before the implementation. +// If you want to disable a specific backend, #define the appropriate MAL_NO_* option before the implementation. // -// Note that GCC and Clang requires "-msse2", "-mavx", etc. for SIMD optimizations. +// Note that GCC and Clang requires "-msse2", "-mavx2", etc. for SIMD optimizations. // // // Building for Windows @@ -55,6 +52,12 @@ // The Windows build should compile clean on all popular compilers without the need to configure any include paths // nor link to any libraries. // +// Building for macOS and iOS +// -------------------------- +// The macOS build should compile clean without the need to download any dependencies or link to any libraries or +// frameworks. The iOS build needs to be compiled as Objective-C (sorry) and will need to link the relevant frameworks +// but should Just Work with Xcode. +// // Building for Linux // ------------------ // The Linux build only requires linking to -ldl, -lpthread and -lm. You do not need any development packages for any @@ -174,6 +177,9 @@ // #define MAL_NO_JACK // Disables the JACK backend. // +// #define MAL_NO_COREAUDIO +// Disables the Core Audio backend. +// // #define MAL_NO_OSS // Disables the OSS backend. // @@ -207,8 +213,8 @@ // #define MAL_NO_SSE2 // Disables SSE2 optimizations. // -// #define MAL_NO_AVX -// Disables AVX optimizations. +// #define MAL_NO_AVX2 +// Disables AVX2 optimizations. // // #define MAL_NO_AVX512 // Disables AVX-512 optimizations. @@ -278,9 +284,6 @@ extern "C" { #define MAL_SUPPORT_PULSEAUDIO #define MAL_SUPPORT_JACK #endif - #if defined(MAL_APPLE) - #define MAL_SUPPORT_COREAUDIO - #endif #if defined(MAL_ANDROID) #define MAL_SUPPORT_OPENSL #endif @@ -288,6 +291,9 @@ extern "C" { #define MAL_SUPPORT_OSS #endif #endif +#if defined(MAL_APPLE) + #define MAL_SUPPORT_COREAUDIO +#endif #define MAL_SUPPORT_SDL // All platforms support SDL. @@ -351,6 +357,10 @@ extern "C" { #endif #endif +#if !defined(MAL_HAS_STDINT) && (defined(__GNUC__) || defined(__clang__)) // Assume support for stdint.h on GCC and Clang. + #define MAL_HAS_STDINT +#endif + #ifndef MAL_HAS_STDINT typedef signed char mal_int8; typedef unsigned char mal_uint8; @@ -399,7 +409,7 @@ typedef mal_uint32 mal_bool32; typedef void* mal_handle; typedef void* mal_ptr; -typedef void (* mal_proc)(); +typedef void (* mal_proc)(void); typedef struct mal_context mal_context; typedef struct mal_device mal_device; @@ -413,6 +423,13 @@ typedef mal_uint16 wchar_t; #define NULL 0 #endif +#if defined(SIZE_MAX) + #define MAL_SIZE_MAX SIZE_MAX +#else + #define MAL_SIZE_MAX 0xFFFFFFFF /* When SIZE_MAX is not defined by the standard library just default to the maximum 32-bit unsigned integer. */ +#endif + + #ifdef _MSC_VER #define MAL_INLINE __forceinline #else @@ -625,27 +642,29 @@ typedef int mal_result; #define MAL_API_NOT_FOUND -8 #define MAL_DEVICE_BUSY -9 #define MAL_DEVICE_NOT_INITIALIZED -10 -#define MAL_DEVICE_ALREADY_STARTED -11 -#define MAL_DEVICE_ALREADY_STARTING -12 -#define MAL_DEVICE_ALREADY_STOPPED -13 -#define MAL_DEVICE_ALREADY_STOPPING -14 -#define MAL_FAILED_TO_MAP_DEVICE_BUFFER -15 -#define MAL_FAILED_TO_UNMAP_DEVICE_BUFFER -16 -#define MAL_FAILED_TO_INIT_BACKEND -17 -#define MAL_FAILED_TO_READ_DATA_FROM_CLIENT -18 -#define MAL_FAILED_TO_READ_DATA_FROM_DEVICE -19 -#define MAL_FAILED_TO_SEND_DATA_TO_CLIENT -20 -#define MAL_FAILED_TO_SEND_DATA_TO_DEVICE -21 -#define MAL_FAILED_TO_OPEN_BACKEND_DEVICE -22 -#define MAL_FAILED_TO_START_BACKEND_DEVICE -23 -#define MAL_FAILED_TO_STOP_BACKEND_DEVICE -24 -#define MAL_FAILED_TO_CONFIGURE_BACKEND_DEVICE -25 -#define MAL_FAILED_TO_CREATE_MUTEX -26 -#define MAL_FAILED_TO_CREATE_EVENT -27 -#define MAL_FAILED_TO_CREATE_THREAD -28 -#define MAL_INVALID_DEVICE_CONFIG -29 -#define MAL_ACCESS_DENIED -30 -#define MAL_TOO_LARGE -31 +#define MAL_DEVICE_NOT_STARTED -11 +#define MAL_DEVICE_NOT_STOPPED -12 +#define MAL_DEVICE_ALREADY_STARTED -13 +#define MAL_DEVICE_ALREADY_STARTING -14 +#define MAL_DEVICE_ALREADY_STOPPED -15 +#define MAL_DEVICE_ALREADY_STOPPING -16 +#define MAL_FAILED_TO_MAP_DEVICE_BUFFER -17 +#define MAL_FAILED_TO_UNMAP_DEVICE_BUFFER -18 +#define MAL_FAILED_TO_INIT_BACKEND -19 +#define MAL_FAILED_TO_READ_DATA_FROM_CLIENT -20 +#define MAL_FAILED_TO_READ_DATA_FROM_DEVICE -21 +#define MAL_FAILED_TO_SEND_DATA_TO_CLIENT -22 +#define MAL_FAILED_TO_SEND_DATA_TO_DEVICE -23 +#define MAL_FAILED_TO_OPEN_BACKEND_DEVICE -24 +#define MAL_FAILED_TO_START_BACKEND_DEVICE -25 +#define MAL_FAILED_TO_STOP_BACKEND_DEVICE -26 +#define MAL_FAILED_TO_CONFIGURE_BACKEND_DEVICE -27 +#define MAL_FAILED_TO_CREATE_MUTEX -28 +#define MAL_FAILED_TO_CREATE_EVENT -29 +#define MAL_FAILED_TO_CREATE_THREAD -30 +#define MAL_INVALID_DEVICE_CONFIG -31 +#define MAL_ACCESS_DENIED -32 +#define MAL_TOO_LARGE -33 typedef void (* mal_log_proc) (mal_context* pContext, mal_device* pDevice, const char* message); typedef void (* mal_recv_proc)(mal_device* pDevice, mal_uint32 frameCount, const void* pSamples); @@ -661,6 +680,7 @@ typedef enum mal_backend_alsa, mal_backend_pulseaudio, mal_backend_jack, + mal_backend_coreaudio, mal_backend_oss, mal_backend_opensl, mal_backend_openal, @@ -754,7 +774,7 @@ typedef union int jack; // JACK always uses default devices. #endif #ifdef MAL_SUPPORT_COREAUDIO - // TODO: Implement me. + char coreaudio[256]; // Core Audio uses a string for identification. #endif #ifdef MAL_SUPPORT_OSS char oss[64]; // "dev/dsp0", etc. "dev/dsp" for the default device. @@ -812,6 +832,10 @@ typedef struct mal_stream_format streamFormatIn; mal_stream_format streamFormatOut; mal_dither_mode ditherMode; + mal_bool32 noSSE2 : 1; + mal_bool32 noAVX2 : 1; + mal_bool32 noAVX512 : 1; + mal_bool32 noNEON : 1; mal_format_converter_read_proc onRead; mal_format_converter_read_deinterleaved_proc onReadDeinterleaved; void* pUserData; @@ -820,6 +844,10 @@ typedef struct struct mal_format_converter { mal_format_converter_config config; + mal_bool32 useSSE2 : 1; + mal_bool32 useAVX2 : 1; + mal_bool32 useAVX512 : 1; + mal_bool32 useNEON : 1; void (* onConvertPCM)(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode); void (* onInterleavePCM)(void* dst, const void** src, mal_uint64 frameCount, mal_uint32 channels); void (* onDeinterleavePCM)(void** dst, const void* src, mal_uint64 frameCount, mal_uint32 channels); @@ -838,7 +866,7 @@ typedef struct mal_channel channelMapOut[MAL_MAX_CHANNELS]; mal_channel_mix_mode mixingMode; mal_bool32 noSSE2 : 1; - mal_bool32 noAVX : 1; + mal_bool32 noAVX2 : 1; mal_bool32 noAVX512 : 1; mal_bool32 noNEON : 1; mal_channel_router_read_deinterleaved_proc onReadDeinterleaved; @@ -851,7 +879,7 @@ struct mal_channel_router mal_bool32 isPassthrough : 1; mal_bool32 isSimpleShuffle : 1; mal_bool32 useSSE2 : 1; - mal_bool32 useAVX : 1; + mal_bool32 useAVX2 : 1; mal_bool32 useAVX512 : 1; mal_bool32 useNEON : 1; mal_uint8 shuffleTable[MAL_MAX_CHANNELS]; @@ -885,6 +913,10 @@ typedef struct mal_uint32 sampleRateOut; mal_uint32 channels; mal_src_algorithm algorithm; + mal_bool32 noSSE2 : 1; + mal_bool32 noAVX2 : 1; + mal_bool32 noAVX512 : 1; + mal_bool32 noNEON : 1; mal_src_read_deinterleaved_proc onReadDeinterleaved; void* pUserData; union @@ -914,11 +946,15 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_src float timeIn; mal_uint32 inputFrameCount; // The number of frames sitting in the input buffer, not including the first half of the window. mal_uint32 windowPosInSamples; // An offset of . - float table[MAL_SRC_SINC_MAX_WINDOW_WIDTH * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; // Precomputed lookup table. + float table[MAL_SRC_SINC_MAX_WINDOW_WIDTH*1 * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; // Precomputed lookup table. The +1 is used to avoid the need for an overflow check. } sinc; }; mal_src_config config; + mal_bool32 useSSE2 : 1; + mal_bool32 useAVX2 : 1; + mal_bool32 useAVX512 : 1; + mal_bool32 useNEON : 1; }; typedef struct mal_dsp mal_dsp; @@ -938,6 +974,10 @@ typedef struct mal_dither_mode ditherMode; mal_src_algorithm srcAlgorithm; mal_bool32 allowDynamicSampleRate; + mal_bool32 noSSE2 : 1; + mal_bool32 noAVX2 : 1; + mal_bool32 noAVX512 : 1; + mal_bool32 noNEON : 1; mal_dsp_read_proc onRead; void* pUserData; union @@ -1205,7 +1245,25 @@ struct mal_context #ifdef MAL_SUPPORT_COREAUDIO struct { - int _unused; + mal_handle hCoreFoundation; + mal_proc CFStringGetCString; + + mal_handle hCoreAudio; + mal_proc AudioObjectGetPropertyData; + mal_proc AudioObjectGetPropertyDataSize; + mal_proc AudioObjectSetPropertyData; + + mal_handle hAudioUnit; // Could possibly be set to AudioToolbox on later versions of macOS. + mal_proc AudioComponentFindNext; + mal_proc AudioComponentInstanceDispose; + mal_proc AudioComponentInstanceNew; + mal_proc AudioOutputUnitStart; + mal_proc AudioOutputUnitStop; + mal_proc AudioUnitAddPropertyListener; + mal_proc AudioUnitGetProperty; + mal_proc AudioUnitSetProperty; + mal_proc AudioUnitInitialize; + mal_proc AudioUnitRender; } coreaudio; #endif #ifdef MAL_SUPPORT_OSS @@ -1494,7 +1552,10 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_device #ifdef MAL_SUPPORT_COREAUDIO struct { - int _unused; + mal_uint32 deviceObjectID; + /*AudioComponent*/ mal_ptr component; // <-- Can this be per-context? + /*AudioUnit*/ mal_ptr audioUnit; + /*AudioBufferList**/ mal_ptr pAudioBufferList; // Only used for input devices. } coreaudio; #endif #ifdef MAL_SUPPORT_OSS @@ -1569,6 +1630,7 @@ MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_device // - WASAPI // - DirectSound // - WinMM +// - Core Audio (macOS, iOS) // - OSS // - PulseAudio // - ALSA @@ -1868,7 +1930,7 @@ mal_context_config mal_context_config_init(mal_log_proc onLog); // // mal_device_config_init(), mal_device_config_init_playback(), etc. will allow you to explicitly set the sample format, // channel count, etc. -mal_device_config mal_device_config_init_default(); +mal_device_config mal_device_config_init_default(void); mal_device_config mal_device_config_init_default_capture(mal_recv_proc onRecvCallback); mal_device_config mal_device_config_init_default_playback(mal_send_proc onSendCallback); @@ -2028,7 +2090,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver // Helper for initializing a format converter config. -mal_format_converter_config mal_format_converter_config_init_new(); +mal_format_converter_config mal_format_converter_config_init_new(void); mal_format_converter_config mal_format_converter_config_init(mal_format formatIn, mal_format formatOut, mal_uint32 channels, mal_format_converter_read_proc onRead, void* pUserData); mal_format_converter_config mal_format_converter_config_init_deinterleaved(mal_format formatIn, mal_format formatOut, mal_uint32 channels, mal_format_converter_read_deinterleaved_proc onReadDeinterleaved, void* pUserData); @@ -2134,7 +2196,7 @@ mal_uint64 mal_src_read_deinterleaved(mal_src* pSRC, mal_uint64 frameCount, void // Helper for creating a sample rate conversion config. -mal_src_config mal_src_config_init_new(); +mal_src_config mal_src_config_init_new(void); mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRateOut, mal_uint32 channels, mal_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData); @@ -2164,7 +2226,7 @@ mal_result mal_dsp_set_output_sample_rate(mal_dsp* pDSP, mal_uint32 sampleRateOu mal_uint64 mal_dsp_read(mal_dsp* pDSP, mal_uint64 frameCount, void* pFramesOut, void* pUserData); // Helper for initializing a mal_dsp_config object. -mal_dsp_config mal_dsp_config_init_new(); +mal_dsp_config mal_dsp_config_init_new(void); mal_dsp_config mal_dsp_config_init(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut, mal_dsp_read_proc onRead, void* pUserData); mal_dsp_config mal_dsp_config_init_ex(mal_format formatIn, mal_uint32 channelsIn, mal_uint32 sampleRateIn, mal_channel channelMapIn[MAL_MAX_CHANNELS], mal_format formatOut, mal_uint32 channelsOut, mal_uint32 sampleRateOut, mal_channel channelMapOut[MAL_MAX_CHANNELS], mal_dsp_read_proc onRead, void* pUserData); @@ -2237,7 +2299,7 @@ void mal_blend_f32(float* pOut, float* pInA, float* pInB, float factor, mal_uint // This could be useful for dynamically determining the size of a device's internal buffer based on the speed of the system. // // This is a slow API because it performs a profiling test. -float mal_calculate_cpu_speed_factor(); +float mal_calculate_cpu_speed_factor(void); // Adjust buffer size based on a scaling factor. // @@ -2465,8 +2527,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float* #if !defined(MAL_NO_SSE2) // Assume all MSVC compilers support SSE2 intrinsics. #define MAL_SUPPORT_SSE2 #endif - #if _MSC_VER >= 1600 && !defined(MAL_NO_AVX) // 2010 - #define MAL_SUPPORT_AVX + //#if _MSC_VER >= 1600 && !defined(MAL_NO_AVX) // 2010 + // #define MAL_SUPPORT_AVX + //#endif + #if _MSC_VER >= 1700 && !defined(MAL_NO_AVX2) // 2012 + #define MAL_SUPPORT_AVX2 #endif #if _MSC_VER >= 1910 && !defined(MAL_NO_AVX512) // 2017 #define MAL_SUPPORT_AVX512 @@ -2476,8 +2541,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float* #if defined(__SSE2__) && !defined(MAL_NO_SSE2) #define MAL_SUPPORT_SSE2 #endif - #if defined(__AVX__) && !defined(MAL_NO_AVX) - #define MAL_SUPPORT_AVX + //#if defined(__AVX__) && !defined(MAL_NO_AVX) + // #define MAL_SUPPORT_AVX + //#endif + #if defined(__AVX2__) && !defined(MAL_NO_AVX2) + #define MAL_SUPPORT_AVX2 #endif #if defined(__AVX512F__) && !defined(MAL_NO_AVX512) #define MAL_SUPPORT_AVX512 @@ -2489,8 +2557,11 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float* #if !defined(MAL_SUPPORT_SSE2) && !defined(MAL_NO_SSE2) && __has_include() #define MAL_SUPPORT_SSE2 #endif - #if !defined(MAL_SUPPORT_AVX) && !defined(MAL_NO_AVX) && __has_include() - #define MAL_SUPPORT_AVX + //#if !defined(MAL_SUPPORT_AVX) && !defined(MAL_NO_AVX) && __has_include() + // #define MAL_SUPPORT_AVX + //#endif + #if !defined(MAL_SUPPORT_AVX2) && !defined(MAL_NO_AVX2) && __has_include() + #define MAL_SUPPORT_AVX2 #endif #if !defined(MAL_SUPPORT_AVX512) && !defined(MAL_NO_AVX512) && __has_include() #define MAL_SUPPORT_AVX512 @@ -2499,7 +2570,7 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSignWave, mal_uint64 count, float* #if defined(MAL_SUPPORT_AVX512) #include // Not a mistake. Intentionally including instead of because otherwise the compiler will complain. - #elif defined(MAL_SUPPORT_AVX) + #elif defined(MAL_SUPPORT_AVX2) || defined(MAL_SUPPORT_AVX) #include #elif defined(MAL_SUPPORT_SSE2) #include @@ -2597,6 +2668,7 @@ static MAL_INLINE mal_bool32 mal_has_sse2() #endif } +#if 0 static MAL_INLINE mal_bool32 mal_has_avx() { #if defined(MAL_SUPPORT_AVX) @@ -2629,6 +2701,42 @@ static MAL_INLINE mal_bool32 mal_has_avx() return MAL_FALSE; // No compiler support. #endif } +#endif + +static MAL_INLINE mal_bool32 mal_has_avx2() +{ +#if defined(MAL_SUPPORT_AVX2) + #if (defined(MAL_X64) || defined(MAL_X86)) && !defined(MAL_NO_AVX2) + #if defined(_AVX2_) || defined(__AVX2__) + return MAL_TRUE; // If the compiler is allowed to freely generate AVX2 code we can assume support. + #else + // AVX requires both CPU and OS support. + #if defined(MAL_NO_CPUID) || defined(MAL_NO_XGETBV) + return MAL_FALSE; + #else + int info1[4]; + int info7[4]; + mal_cpuid(info1, 1); + mal_cpuid(info7, 7); + if (((info1[2] & (1 << 27)) != 0) && ((info7[1] & (1 << 5)) != 0)) { + mal_uint64 xrc = mal_xgetbv(0); + if ((xrc & 0x06) == 0x06) { + return MAL_TRUE; + } else { + return MAL_FALSE; + } + } else { + return MAL_FALSE; + } + #endif + #endif + #else + return MAL_FALSE; // AVX is only supported on x86 and x64 architectures. + #endif +#else + return MAL_FALSE; // No compiler support. +#endif +} static MAL_INLINE mal_bool32 mal_has_avx512f() { @@ -2641,9 +2749,11 @@ static MAL_INLINE mal_bool32 mal_has_avx512f() #if defined(MAL_NO_CPUID) || defined(MAL_NO_XGETBV) return MAL_FALSE; #else - int info[4]; - mal_cpuid(info, 1); - if (((info[2] & (1 << 27)) != 0) && ((info[1] & (1 << 16)) != 0)) { + int info1[4]; + int info7[4]; + mal_cpuid(info1, 1); + mal_cpuid(info7, 7); + if (((info1[2] & (1 << 27)) != 0) && ((info7[1] & (1 << 16)) != 0)) { mal_uint64 xrc = mal_xgetbv(0); if ((xrc & 0xE6) == 0xE6) { return MAL_TRUE; @@ -2688,6 +2798,12 @@ static MAL_INLINE mal_bool32 mal_has_neon() #ifndef MAL_PI_D #define MAL_PI_D 3.14159265358979323846264 #endif +#ifndef MAL_TAU +#define MAL_TAU 6.28318530717958647693f +#endif +#ifndef MAL_TAU_D +#define MAL_TAU_D 6.28318530717958647693 +#endif // Unfortunately using runtime linking for pthreads causes problems. This has occurred for me when testing on FreeBSD. When // using runtime linking, deadlocks can occur (for me it happens when loading data from fread()). It turns out that doing @@ -2873,6 +2989,18 @@ mal_uint32 g_malStandardSampleRatePriorities[] = { MAL_SAMPLE_RATE_384000 }; +mal_format g_malFormatPriorities[] = { + mal_format_f32, // Most common + mal_format_s16, + + //mal_format_s24_32, // Clean alignment + mal_format_s32, + + mal_format_s24, // Unclean alignment + + mal_format_u8 // Low quality +}; + #define MAL_DEFAULT_PLAYBACK_DEVICE_NAME "Default Playback Device" #define MAL_DEFAULT_CAPTURE_DEVICE_NAME "Default Capture Device" @@ -3169,6 +3297,20 @@ static MAL_INLINE unsigned int mal_round_to_power_of_2(unsigned int x) } } +static MAL_INLINE unsigned int mal_count_set_bits(unsigned int x) +{ + unsigned int count = 0; + while (x != 0) { + if (x & 1) { + count += 1; + } + + x = x >> 1; + } + + return count; +} + // Clamps an f32 sample to -1..1 @@ -3185,9 +3327,38 @@ static MAL_INLINE float mal_mix_f32(float x, float y, float a) } static MAL_INLINE float mal_mix_f32_fast(float x, float y, float a) { - return x + (y - x)*a; + float r0 = (y - x); + float r1 = r0*a; + return x + r1; + //return x + (y - x)*a; } +#if defined(MAL_SUPPORT_SSE2) +static MAL_INLINE __m128 mal_mix_f32_fast__sse2(__m128 x, __m128 y, __m128 a) +{ + return _mm_add_ps(x, _mm_mul_ps(_mm_sub_ps(y, x), a)); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +static MAL_INLINE __m256 mal_mix_f32_fast__avx2(__m256 x, __m256 y, __m256 a) +{ + return _mm256_add_ps(x, _mm256_mul_ps(_mm256_sub_ps(y, x), a)); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +static MAL_INLINE __m512 mal_mix_f32_fast__avx512(__m512 x, __m512 y, __m512 a) +{ + return _mm512_add_ps(x, _mm512_mul_ps(_mm512_sub_ps(y, x), a)); +} +#endif +#if defined(MAL_SUPPORT_NEON) +static MAL_INLINE float32x4_t mal_mix_f32_fast__neon(float32x4_t x, float32x4_t y, float32x4_t a) +{ + return vaddq_f32(x, vmulq_f32(vsubq_f32(y, x), a)); +} +#endif + + static MAL_INLINE double mal_mix_f64(double x, double y, double a) { return x*(1-a) + y*a; @@ -3251,16 +3422,25 @@ static MAL_INLINE mal_int32 mal_rand_range_s32(mal_int32 lo, mal_int32 hi) } +static MAL_INLINE float mal_dither_f32_rectangle(float ditherMin, float ditherMax) +{ + return mal_rand_range_f32(ditherMin, ditherMax); +} + +static MAL_INLINE float mal_dither_f32_triangle(float ditherMin, float ditherMax) +{ + float a = mal_rand_range_f32(ditherMin, 0); + float b = mal_rand_range_f32(0, ditherMax); + return a + b; +} + static MAL_INLINE float mal_dither_f32(mal_dither_mode ditherMode, float ditherMin, float ditherMax) { if (ditherMode == mal_dither_mode_rectangle) { - float a = mal_rand_range_f32(ditherMin, ditherMax); - return a; + return mal_dither_f32_rectangle(ditherMin, ditherMax); } if (ditherMode == mal_dither_mode_triangle) { - float a = mal_rand_range_f32(ditherMin, 0); - float b = mal_rand_range_f32(0, ditherMax); - return a + b; + return mal_dither_f32_triangle(ditherMin, ditherMax); } return 0; @@ -3286,6 +3466,10 @@ static MAL_INLINE mal_int32 mal_dither_s32(mal_dither_mode ditherMode, mal_int32 // multiple of the alignment. The alignment must be a power of 2. void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut) { + if (pSplitSizeOut) { + *pSplitSizeOut = 0; + } + if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) { return; } @@ -3358,7 +3542,7 @@ void mal_timer_init(mal_timer* pTimer) LARGE_INTEGER counter; QueryPerformanceCounter(&counter); - pTimer->counter = (mal_uint64)counter.QuadPart; + pTimer->counter = counter.QuadPart; } double mal_timer_get_time_in_seconds(mal_timer* pTimer) @@ -3368,10 +3552,10 @@ double mal_timer_get_time_in_seconds(mal_timer* pTimer) return 0; } - return (counter.QuadPart - pTimer->counter) / (double)g_mal_TimerFrequency.QuadPart; + return (double)(counter.QuadPart - pTimer->counter) / g_mal_TimerFrequency.QuadPart; } #elif defined(MAL_APPLE) && (__MAC_OS_X_VERSION_MIN_REQUIRED < 101200) -uint64_t g_mal_TimerFrequency = 0; +mal_uint64 g_mal_TimerFrequency = 0; void mal_timer_init(mal_timer* pTimer) { mach_timebase_info_data_t baseTime; @@ -3383,16 +3567,22 @@ void mal_timer_init(mal_timer* pTimer) double mal_timer_get_time_in_seconds(mal_timer* pTimer) { - uint64_t newTimeCounter = mach_absolute_time(); - uint64_t oldTimeCounter = pTimer->counter; + mal_uint64 newTimeCounter = mach_absolute_time(); + mal_uint64 oldTimeCounter = pTimer->counter; return (newTimeCounter - oldTimeCounter) / g_mal_TimerFrequency; } #else +#if defined(CLOCK_MONOTONIC) + #define MAL_CLOCK_ID CLOCK_MONOTONIC +#else + #define MAL_CLOCK_ID CLOCK_REALTIME +#endif + void mal_timer_init(mal_timer* pTimer) { struct timespec newTime; - clock_gettime(CLOCK_MONOTONIC, &newTime); + clock_gettime(MAL_CLOCK_ID, &newTime); pTimer->counter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec; } @@ -3400,10 +3590,10 @@ void mal_timer_init(mal_timer* pTimer) double mal_timer_get_time_in_seconds(mal_timer* pTimer) { struct timespec newTime; - clock_gettime(CLOCK_MONOTONIC, &newTime); + clock_gettime(MAL_CLOCK_ID, &newTime); - uint64_t newTimeCounter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec; - uint64_t oldTimeCounter = pTimer->counter; + mal_uint64 newTimeCounter = (newTime.tv_sec * 1000000000) + newTime.tv_nsec; + mal_uint64 oldTimeCounter = pTimer->counter; return (newTimeCounter - oldTimeCounter) / 1000000000.0; } @@ -3593,8 +3783,10 @@ mal_bool32 mal_thread_create__posix(mal_context* pContext, mal_thread* pThread, scheduler = SCHED_FIFO; } #endif +#ifdef MAL_LINUX } else { scheduler = sched_getscheduler(0); +#endif } if (scheduler != -1) { @@ -4135,6 +4327,19 @@ mal_result mal_context__try_get_device_name_by_id(mal_context* pContext, mal_dev } +mal_uint32 mal_get_format_priority_index(mal_format format) // Lower = better. +{ + for (mal_uint32 i = 0; i < mal_countof(g_malFormatPriorities); ++i) { + if (g_malFormatPriorities[i] == format) { + return i; + } + } + + // Getting here means the format could not be found or is equal to mal_format_unknown. + return (mal_uint32)-1; +} + + /////////////////////////////////////////////////////////////////////////////// // // Null Backend @@ -4591,7 +4796,10 @@ void mal_channel_mask_to_channel_map__win32(DWORD dwChannelMask, mal_uint32 chan } #ifdef __cplusplus -#define mal_is_guid_equal(a, b) IsEqualGUID(*((const GUID*)a), *((const GUID*)b)) +mal_bool32 mal_is_guid_equal(const void* a, const void* b) +{ + return IsEqualGUID(*(const GUID*)a, *(const GUID*)b); +} #else #define mal_is_guid_equal(a, b) IsEqualGUID((const GUID*)a, (const GUID*)b) #endif @@ -8040,7 +8248,7 @@ mal_result mal_device_init__winmm(mal_context* pContext, mal_device_type type, m } // Backend tax. Need to fiddle with this. - float fBackend = 12.0; + float fBackend = 10.0; pDevice->bufferSizeInFrames = mal_calculate_default_buffer_size_in_frames(pConfig->performanceProfile, pConfig->sampleRate, fCPUSpeed*fType*fBackend); } @@ -8578,8 +8786,8 @@ static struct const char* name; float scale; } g_malDefaultBufferSizeScalesALSA[] = { - {"bcm2835 IEC958/HDMI", 8.0f}, - {"bcm2835 ALSA", 8.0f} + {"bcm2835 IEC958/HDMI", 6.0f}, + {"bcm2835 ALSA", 6.0f} }; float mal_find_default_buffer_size_scale__alsa(const char* deviceName) @@ -9771,6 +9979,8 @@ mal_result mal_device_init__alsa(mal_context* pContext, mal_device_type type, co } + + // Hardware parameters. mal_snd_pcm_hw_params_t* pHWParams = (mal_snd_pcm_hw_params_t*)alloca(((mal_snd_pcm_hw_params_sizeof_proc)pContext->alsa.snd_pcm_hw_params_sizeof)()); mal_zero_memory(pHWParams, ((mal_snd_pcm_hw_params_sizeof_proc)pContext->alsa.snd_pcm_hw_params_sizeof)()); @@ -12343,11 +12553,1794 @@ mal_result mal_device__stop_backend__jack(mal_device* pDevice) if (((mal_jack_deactivate_proc)pContext->jack.jack_deactivate)((mal_jack_client_t*)pDevice->jack.pClient) != 0) { return mal_post_error(pDevice, "[JACK] An error occurred when deactivating the JACK client.", MAL_ERROR); } + + mal_device__set_state(pDevice, MAL_STATE_STOPPED); + mal_stop_proc onStop = pDevice->onStop; + if (onStop) { + onStop(pDevice); + } return MAL_SUCCESS; } +#endif // JACK + + + +/////////////////////////////////////////////////////////////////////////////// +// +// Core Audio Backend +// +/////////////////////////////////////////////////////////////////////////////// +#ifdef MAL_HAS_COREAUDIO +#include + +#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE == 1 + #define MAL_APPLE_MOBILE +#else + #define MAL_APPLE_DESKTOP #endif +#if defined(MAL_APPLE_DESKTOP) +#include +#else +#include +#endif + +#include + +// CoreFoundation +typedef Boolean (* mal_CFStringGetCString_proc)(CFStringRef theString, char* buffer, CFIndex bufferSize, CFStringEncoding encoding); + +// CoreAudio +#if defined(MAL_APPLE_DESKTOP) +typedef OSStatus (* mal_AudioObjectGetPropertyData_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32* ioDataSize, void* outData); +typedef OSStatus (* mal_AudioObjectGetPropertyDataSize_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32* outDataSize); +typedef OSStatus (* mal_AudioObjectSetPropertyData_proc)(AudioObjectID inObjectID, const AudioObjectPropertyAddress* inAddress, UInt32 inQualifierDataSize, const void* inQualifierData, UInt32 inDataSize, const void* inData); +#endif + +// AudioToolbox +typedef AudioComponent (* mal_AudioComponentFindNext_proc)(AudioComponent inComponent, const AudioComponentDescription* inDesc); +typedef OSStatus (* mal_AudioComponentInstanceDispose_proc)(AudioComponentInstance inInstance); +typedef OSStatus (* mal_AudioComponentInstanceNew_proc)(AudioComponent inComponent, AudioComponentInstance* outInstance); +typedef OSStatus (* mal_AudioOutputUnitStart_proc)(AudioUnit inUnit); +typedef OSStatus (* mal_AudioOutputUnitStop_proc)(AudioUnit inUnit); +typedef OSStatus (* mal_AudioUnitAddPropertyListener_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitPropertyListenerProc inProc, void* inProcUserData); +typedef OSStatus (* mal_AudioUnitGetProperty_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitScope inScope, AudioUnitElement inElement, void* outData, UInt32* ioDataSize); +typedef OSStatus (* mal_AudioUnitSetProperty_proc)(AudioUnit inUnit, AudioUnitPropertyID inID, AudioUnitScope inScope, AudioUnitElement inElement, const void* inData, UInt32 inDataSize); +typedef OSStatus (* mal_AudioUnitInitialize_proc)(AudioUnit inUnit); +typedef OSStatus (* mal_AudioUnitRender_proc)(AudioUnit inUnit, AudioUnitRenderActionFlags* ioActionFlags, const AudioTimeStamp* inTimeStamp, UInt32 inOutputBusNumber, UInt32 inNumberFrames, AudioBufferList* ioData); + + +#define MAL_COREAUDIO_OUTPUT_BUS 0 +#define MAL_COREAUDIO_INPUT_BUS 1 + + +// Core Audio +// +// So far, Core Audio has been the worst backend to work with due to being both unintuitive and having almost no documentation +// apart from comments in the headers (which admittedly are quite good). For my own purposes, and for anybody out there whose +// needing to figure out how this darn thing works, I'm going to outline a few things here. +// +// Since mini_al is a fairly low-level API, one of the things it needs is control over specific devices, and it needs to be +// able to identify whether or not it can be used as playback and/or capture. The AudioObject API is the only one I've seen +// that supports this level of detail. There was some public domain sample code I stumbled across that used the AudioComponent +// and AudioUnit APIs, but I couldn't see anything that gave low-level control over device selection and capabilities (the +// distinction between playback and capture in particular). Therefore, mini_al is using the AudioObject API. +// +// Most (all?) functions in the AudioObject API take a AudioObjectID as it's input. This is the device identifier. When +// retrieving global information, such as the device list, you use kAudioObjectSystemObject. When retrieving device-specific +// data, you pass in the ID for that device. In order to retrieve device-specific IDs you need to enumerate over each of the +// devices. This is done using the AudioObjectGetPropertyDataSize() and AudioObjectGetPropertyData() APIs which seem to be +// the central APIs for retrieving information about the system and specific devices. +// +// To use the AudioObjectGetPropertyData() API you need to use the notion of a property address. A property address is a +// structure with three variables and is used to identify which property you are getting or setting. The first is the "selector" +// which is basically the specific property that you're wanting to retrieve or set. The second is the "scope", which is +// typically set to kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyScopeInput for input-specific properties and +// kAudioObjectPropertyScopeOutput for output-specific properties. The last is the "element" which is always set to +// kAudioObjectPropertyElementMaster in mini_al's case. I don't know of any cases where this would be set to anything different. +// +// Back to the earlier issue of device retrieval, you first use the AudioObjectGetPropertyDataSize() API to retrieve the size +// of the raw data which is just a list of AudioDeviceID's. You use the kAudioObjectSystemObject AudioObjectID, and a property +// address with the kAudioHardwarePropertyDevices selector and the kAudioObjectPropertyScopeGlobal scope. Once you have the +// size, allocate a block of memory of that size and then call AudioObjectGetPropertyData(). The data is just a list of +// AudioDeviceID's so just do "dataSize/sizeof(AudioDeviceID)" to know the device count. + +mal_result mal_result_from_OSStatus(OSStatus status) +{ + switch (status) + { + case noErr: return MAL_SUCCESS; + #if defined(MAL_APPLE_DESKTOP) + case kAudioHardwareNotRunningError: return MAL_DEVICE_NOT_STARTED; + case kAudioHardwareUnspecifiedError: return MAL_ERROR; + case kAudioHardwareUnknownPropertyError: return MAL_INVALID_ARGS; + case kAudioHardwareBadPropertySizeError: return MAL_INVALID_OPERATION; + case kAudioHardwareIllegalOperationError: return MAL_INVALID_OPERATION; + case kAudioHardwareBadObjectError: return MAL_INVALID_ARGS; + case kAudioHardwareBadDeviceError: return MAL_INVALID_ARGS; + case kAudioHardwareBadStreamError: return MAL_INVALID_ARGS; + case kAudioHardwareUnsupportedOperationError: return MAL_INVALID_OPERATION; + case kAudioDeviceUnsupportedFormatError: return MAL_FORMAT_NOT_SUPPORTED; + case kAudioDevicePermissionsError: return MAL_ACCESS_DENIED; + #endif + default: return MAL_ERROR; + } +} + +#if 0 +mal_channel mal_channel_from_AudioChannelBitmap(AudioChannelBitmap bit) +{ + switch (bit) + { + case kAudioChannelBit_Left: return MAL_CHANNEL_LEFT; + case kAudioChannelBit_Right: return MAL_CHANNEL_RIGHT; + case kAudioChannelBit_Center: return MAL_CHANNEL_FRONT_CENTER; + case kAudioChannelBit_LFEScreen: return MAL_CHANNEL_LFE; + case kAudioChannelBit_LeftSurround: return MAL_CHANNEL_BACK_LEFT; + case kAudioChannelBit_RightSurround: return MAL_CHANNEL_BACK_RIGHT; + case kAudioChannelBit_LeftCenter: return MAL_CHANNEL_FRONT_LEFT_CENTER; + case kAudioChannelBit_RightCenter: return MAL_CHANNEL_FRONT_RIGHT_CENTER; + case kAudioChannelBit_CenterSurround: return MAL_CHANNEL_BACK_CENTER; + case kAudioChannelBit_LeftSurroundDirect: return MAL_CHANNEL_SIDE_LEFT; + case kAudioChannelBit_RightSurroundDirect: return MAL_CHANNEL_SIDE_RIGHT; + case kAudioChannelBit_TopCenterSurround: return MAL_CHANNEL_TOP_CENTER; + case kAudioChannelBit_VerticalHeightLeft: return MAL_CHANNEL_TOP_FRONT_LEFT; + case kAudioChannelBit_VerticalHeightCenter: return MAL_CHANNEL_TOP_FRONT_CENTER; + case kAudioChannelBit_VerticalHeightRight: return MAL_CHANNEL_TOP_FRONT_RIGHT; + case kAudioChannelBit_TopBackLeft: return MAL_CHANNEL_TOP_BACK_LEFT; + case kAudioChannelBit_TopBackCenter: return MAL_CHANNEL_TOP_BACK_CENTER; + case kAudioChannelBit_TopBackRight: return MAL_CHANNEL_TOP_BACK_RIGHT; + default: return MAL_CHANNEL_NONE; + } +} +#endif + +mal_channel mal_channel_from_AudioChannelLabel(AudioChannelLabel label) +{ + switch (label) + { + case kAudioChannelLabel_Unknown: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Unused: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_UseCoordinates: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Left: return MAL_CHANNEL_LEFT; + case kAudioChannelLabel_Right: return MAL_CHANNEL_RIGHT; + case kAudioChannelLabel_Center: return MAL_CHANNEL_FRONT_CENTER; + case kAudioChannelLabel_LFEScreen: return MAL_CHANNEL_LFE; + case kAudioChannelLabel_LeftSurround: return MAL_CHANNEL_BACK_LEFT; + case kAudioChannelLabel_RightSurround: return MAL_CHANNEL_BACK_RIGHT; + case kAudioChannelLabel_LeftCenter: return MAL_CHANNEL_FRONT_LEFT_CENTER; + case kAudioChannelLabel_RightCenter: return MAL_CHANNEL_FRONT_RIGHT_CENTER; + case kAudioChannelLabel_CenterSurround: return MAL_CHANNEL_BACK_CENTER; + case kAudioChannelLabel_LeftSurroundDirect: return MAL_CHANNEL_SIDE_LEFT; + case kAudioChannelLabel_RightSurroundDirect: return MAL_CHANNEL_SIDE_RIGHT; + case kAudioChannelLabel_TopCenterSurround: return MAL_CHANNEL_TOP_CENTER; + case kAudioChannelLabel_VerticalHeightLeft: return MAL_CHANNEL_TOP_FRONT_LEFT; + case kAudioChannelLabel_VerticalHeightCenter: return MAL_CHANNEL_TOP_FRONT_CENTER; + case kAudioChannelLabel_VerticalHeightRight: return MAL_CHANNEL_TOP_FRONT_RIGHT; + case kAudioChannelLabel_TopBackLeft: return MAL_CHANNEL_TOP_BACK_LEFT; + case kAudioChannelLabel_TopBackCenter: return MAL_CHANNEL_TOP_BACK_CENTER; + case kAudioChannelLabel_TopBackRight: return MAL_CHANNEL_TOP_BACK_RIGHT; + case kAudioChannelLabel_RearSurroundLeft: return MAL_CHANNEL_BACK_LEFT; + case kAudioChannelLabel_RearSurroundRight: return MAL_CHANNEL_BACK_RIGHT; + case kAudioChannelLabel_LeftWide: return MAL_CHANNEL_SIDE_LEFT; + case kAudioChannelLabel_RightWide: return MAL_CHANNEL_SIDE_RIGHT; + case kAudioChannelLabel_LFE2: return MAL_CHANNEL_LFE; + case kAudioChannelLabel_LeftTotal: return MAL_CHANNEL_LEFT; + case kAudioChannelLabel_RightTotal: return MAL_CHANNEL_RIGHT; + case kAudioChannelLabel_HearingImpaired: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Narration: return MAL_CHANNEL_MONO; + case kAudioChannelLabel_Mono: return MAL_CHANNEL_MONO; + case kAudioChannelLabel_DialogCentricMix: return MAL_CHANNEL_MONO; + case kAudioChannelLabel_CenterSurroundDirect: return MAL_CHANNEL_BACK_CENTER; + case kAudioChannelLabel_Haptic: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Ambisonic_W: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Ambisonic_X: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Ambisonic_Y: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Ambisonic_Z: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_MS_Mid: return MAL_CHANNEL_LEFT; + case kAudioChannelLabel_MS_Side: return MAL_CHANNEL_RIGHT; + case kAudioChannelLabel_XY_X: return MAL_CHANNEL_LEFT; + case kAudioChannelLabel_XY_Y: return MAL_CHANNEL_RIGHT; + case kAudioChannelLabel_HeadphonesLeft: return MAL_CHANNEL_LEFT; + case kAudioChannelLabel_HeadphonesRight: return MAL_CHANNEL_RIGHT; + case kAudioChannelLabel_ClickTrack: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_ForeignLanguage: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Discrete: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_Discrete_0: return MAL_CHANNEL_AUX_0; + case kAudioChannelLabel_Discrete_1: return MAL_CHANNEL_AUX_1; + case kAudioChannelLabel_Discrete_2: return MAL_CHANNEL_AUX_2; + case kAudioChannelLabel_Discrete_3: return MAL_CHANNEL_AUX_3; + case kAudioChannelLabel_Discrete_4: return MAL_CHANNEL_AUX_4; + case kAudioChannelLabel_Discrete_5: return MAL_CHANNEL_AUX_5; + case kAudioChannelLabel_Discrete_6: return MAL_CHANNEL_AUX_6; + case kAudioChannelLabel_Discrete_7: return MAL_CHANNEL_AUX_7; + case kAudioChannelLabel_Discrete_8: return MAL_CHANNEL_AUX_8; + case kAudioChannelLabel_Discrete_9: return MAL_CHANNEL_AUX_9; + case kAudioChannelLabel_Discrete_10: return MAL_CHANNEL_AUX_10; + case kAudioChannelLabel_Discrete_11: return MAL_CHANNEL_AUX_11; + case kAudioChannelLabel_Discrete_12: return MAL_CHANNEL_AUX_12; + case kAudioChannelLabel_Discrete_13: return MAL_CHANNEL_AUX_13; + case kAudioChannelLabel_Discrete_14: return MAL_CHANNEL_AUX_14; + case kAudioChannelLabel_Discrete_15: return MAL_CHANNEL_AUX_15; + case kAudioChannelLabel_Discrete_65535: return MAL_CHANNEL_NONE; + + #if 0 // Introduced in a later version of macOS. + case kAudioChannelLabel_HOA_ACN: return MAL_CHANNEL_NONE; + case kAudioChannelLabel_HOA_ACN_0: return MAL_CHANNEL_AUX_0; + case kAudioChannelLabel_HOA_ACN_1: return MAL_CHANNEL_AUX_1; + case kAudioChannelLabel_HOA_ACN_2: return MAL_CHANNEL_AUX_2; + case kAudioChannelLabel_HOA_ACN_3: return MAL_CHANNEL_AUX_3; + case kAudioChannelLabel_HOA_ACN_4: return MAL_CHANNEL_AUX_4; + case kAudioChannelLabel_HOA_ACN_5: return MAL_CHANNEL_AUX_5; + case kAudioChannelLabel_HOA_ACN_6: return MAL_CHANNEL_AUX_6; + case kAudioChannelLabel_HOA_ACN_7: return MAL_CHANNEL_AUX_7; + case kAudioChannelLabel_HOA_ACN_8: return MAL_CHANNEL_AUX_8; + case kAudioChannelLabel_HOA_ACN_9: return MAL_CHANNEL_AUX_9; + case kAudioChannelLabel_HOA_ACN_10: return MAL_CHANNEL_AUX_10; + case kAudioChannelLabel_HOA_ACN_11: return MAL_CHANNEL_AUX_11; + case kAudioChannelLabel_HOA_ACN_12: return MAL_CHANNEL_AUX_12; + case kAudioChannelLabel_HOA_ACN_13: return MAL_CHANNEL_AUX_13; + case kAudioChannelLabel_HOA_ACN_14: return MAL_CHANNEL_AUX_14; + case kAudioChannelLabel_HOA_ACN_15: return MAL_CHANNEL_AUX_15; + case kAudioChannelLabel_HOA_ACN_65024: return MAL_CHANNEL_NONE; + #endif + + default: return MAL_CHANNEL_NONE; + } +} + +mal_result mal_format_from_AudioStreamBasicDescription(const AudioStreamBasicDescription* pDescription, mal_format* pFormatOut) +{ + mal_assert(pDescription != NULL); + mal_assert(pFormatOut != NULL); + + *pFormatOut = mal_format_unknown; // Safety. + + // There's a few things mini_al doesn't support. + if (pDescription->mFormatID != kAudioFormatLinearPCM) { + return MAL_FORMAT_NOT_SUPPORTED; + } + + // We don't support any non-packed formats that are aligned high. + if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsAlignedHigh) != 0) { + return MAL_FORMAT_NOT_SUPPORTED; + } + + // Big-endian formats are not currently supported, but will be added in a future version of mini_al. + if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsAlignedHigh) != 0) { + return MAL_FORMAT_NOT_SUPPORTED; + } + + // We are not currently supporting non-interleaved formats (this will be added in a future version of mini_al). + if ((pDescription->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0) { + return MAL_FORMAT_NOT_SUPPORTED; + } + + if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsFloat) != 0) { + if (pDescription->mBitsPerChannel == 32) { + *pFormatOut = mal_format_f32; + return MAL_SUCCESS; + } + } else { + if ((pDescription->mFormatFlags & kLinearPCMFormatFlagIsSignedInteger) != 0) { + if (pDescription->mBitsPerChannel == 16) { + *pFormatOut = mal_format_s16; + return MAL_SUCCESS; + } else if (pDescription->mBitsPerChannel == 24) { + if (pDescription->mBytesPerFrame == (pDescription->mBitsPerChannel/8 * pDescription->mChannelsPerFrame)) { + *pFormatOut = mal_format_s24; + return MAL_SUCCESS; + } else { + if (pDescription->mBytesPerFrame/pDescription->mChannelsPerFrame == sizeof(mal_int32)) { + // TODO: Implement mal_format_s24_32. + //*pFormatOut = mal_format_s24_32; + //return MAL_SUCCESS; + return MAL_FORMAT_NOT_SUPPORTED; + } + } + } else if (pDescription->mBitsPerChannel == 32) { + *pFormatOut = mal_format_s32; + return MAL_SUCCESS; + } + } else { + if (pDescription->mBitsPerChannel == 8) { + *pFormatOut = mal_format_u8; + return MAL_SUCCESS; + } + } + } + + // Getting here means the format is not supported. + return MAL_FORMAT_NOT_SUPPORTED; +} + +#if defined(MAL_APPLE_DESKTOP) +mal_result mal_get_device_object_ids__coreaudio(mal_context* pContext, UInt32* pDeviceCount, AudioObjectID** ppDeviceObjectIDs) // NOTE: Free the returned buffer with mal_free(). +{ + mal_assert(pContext != NULL); + mal_assert(pDeviceCount != NULL); + mal_assert(ppDeviceObjectIDs != NULL); + (void)pContext; + + // Safety. + *pDeviceCount = 0; + *ppDeviceObjectIDs = NULL; + + AudioObjectPropertyAddress propAddressDevices; + propAddressDevices.mSelector = kAudioHardwarePropertyDevices; + propAddressDevices.mScope = kAudioObjectPropertyScopeGlobal; + propAddressDevices.mElement = kAudioObjectPropertyElementMaster; + + UInt32 deviceObjectsDataSize; + OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(kAudioObjectSystemObject, &propAddressDevices, 0, NULL, &deviceObjectsDataSize); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + AudioObjectID* pDeviceObjectIDs = (AudioObjectID*)mal_malloc(deviceObjectsDataSize); + if (pDeviceObjectIDs == NULL) { + return MAL_OUT_OF_MEMORY; + } + + status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(kAudioObjectSystemObject, &propAddressDevices, 0, NULL, &deviceObjectsDataSize, pDeviceObjectIDs); + if (status != noErr) { + mal_free(pDeviceObjectIDs); + return mal_result_from_OSStatus(status); + } + + *pDeviceCount = deviceObjectsDataSize / sizeof(AudioObjectID); + *ppDeviceObjectIDs = pDeviceObjectIDs; + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_uid_as_CFStringRef(mal_context* pContext, AudioObjectID objectID, CFStringRef* pUID) +{ + mal_assert(pContext != NULL); + + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyDeviceUID; + propAddress.mScope = kAudioObjectPropertyScopeGlobal; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + UInt32 dataSize = sizeof(*pUID); + OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(objectID, &propAddress, 0, NULL, &dataSize, pUID); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_uid(mal_context* pContext, AudioObjectID objectID, size_t bufferSize, char* bufferOut) +{ + mal_assert(pContext != NULL); + + CFStringRef uid; + mal_result result = mal_get_AudioObject_uid_as_CFStringRef(pContext, objectID, &uid); + if (result != MAL_SUCCESS) { + return result; + } + + if (!((mal_CFStringGetCString_proc)pContext->coreaudio.CFStringGetCString)(uid, bufferOut, bufferSize, kCFStringEncodingUTF8)) { + return MAL_ERROR; + } + + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_name(mal_context* pContext, AudioObjectID objectID, size_t bufferSize, char* bufferOut) +{ + mal_assert(pContext != NULL); + + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyDeviceNameCFString; + propAddress.mScope = kAudioObjectPropertyScopeGlobal; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + CFStringRef deviceName = NULL; + UInt32 dataSize = sizeof(deviceName); + OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(objectID, &propAddress, 0, NULL, &dataSize, &deviceName); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + if (!((mal_CFStringGetCString_proc)pContext->coreaudio.CFStringGetCString)(deviceName, bufferOut, bufferSize, kCFStringEncodingUTF8)) { + return MAL_ERROR; + } + + return MAL_SUCCESS; +} + +mal_bool32 mal_does_AudioObject_support_scope(mal_context* pContext, AudioObjectID deviceObjectID, AudioObjectPropertyScope scope) +{ + mal_assert(pContext != NULL); + + // To know whether or not a device is an input device we need ot look at the stream configuration. If it has an output channel it's a + // playback device. + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyStreamConfiguration; + propAddress.mScope = scope; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + UInt32 dataSize; + OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize); + if (status != noErr) { + return MAL_FALSE; + } + + AudioBufferList* pBufferList = (AudioBufferList*)mal_malloc(dataSize); + if (pBufferList == NULL) { + return MAL_FALSE; // Out of memory. + } + + status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pBufferList); + if (status != noErr) { + mal_free(pBufferList); + return MAL_FALSE; + } + + mal_bool32 isSupported = MAL_FALSE; + if (pBufferList->mNumberBuffers > 0) { + isSupported = MAL_TRUE; + } + + mal_free(pBufferList); + return isSupported; +} + +mal_bool32 mal_does_AudioObject_support_playback(mal_context* pContext, AudioObjectID deviceObjectID) +{ + return mal_does_AudioObject_support_scope(pContext, deviceObjectID, kAudioObjectPropertyScopeOutput); +} + +mal_bool32 mal_does_AudioObject_support_capture(mal_context* pContext, AudioObjectID deviceObjectID) +{ + return mal_does_AudioObject_support_scope(pContext, deviceObjectID, kAudioObjectPropertyScopeInput); +} + + +mal_result mal_get_AudioObject_stream_descriptions(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, UInt32* pDescriptionCount, AudioStreamRangedDescription** ppDescriptions) // NOTE: Free the returned pointer with mal_free(). +{ + mal_assert(pContext != NULL); + mal_assert(pDescriptionCount != NULL); + mal_assert(ppDescriptions != NULL); + + // TODO: Experiment with kAudioStreamPropertyAvailablePhysicalFormats instead of (or in addition to) kAudioStreamPropertyAvailableVirtualFormats. My + // MacBook Pro uses s24/32 format, however, which mini_al does not currently support. + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioStreamPropertyAvailableVirtualFormats; //kAudioStreamPropertyAvailablePhysicalFormats; + propAddress.mScope = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + UInt32 dataSize; + OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + AudioStreamRangedDescription* pDescriptions = (AudioStreamRangedDescription*)mal_malloc(dataSize); + if (pDescriptions == NULL) { + return MAL_OUT_OF_MEMORY; + } + + status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pDescriptions); + if (status != noErr) { + mal_free(pDescriptions); + return mal_result_from_OSStatus(status); + } + + *pDescriptionCount = dataSize / sizeof(*pDescriptions); + *ppDescriptions = pDescriptions; + return MAL_SUCCESS; +} + + + +mal_result mal_get_AudioObject_channel_layout(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, AudioChannelLayout** ppChannelLayout) // NOTE: Free the returned pointer with mal_free(). +{ + mal_assert(pContext != NULL); + mal_assert(ppChannelLayout != NULL); + + *ppChannelLayout = NULL; // Safety. + + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyPreferredChannelLayout; + propAddress.mScope = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + UInt32 dataSize; + OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + AudioChannelLayout* pChannelLayout = (AudioChannelLayout*)mal_malloc(dataSize); + if (pChannelLayout == NULL) { + return MAL_OUT_OF_MEMORY; + } + + status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pChannelLayout); + if (status != noErr) { + mal_free(pChannelLayout); + return mal_result_from_OSStatus(status); + } + + *ppChannelLayout = pChannelLayout; + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_channel_count(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32* pChannelCount) +{ + mal_assert(pContext != NULL); + mal_assert(pChannelCount != NULL); + + *pChannelCount = 0; // Safety. + + AudioChannelLayout* pChannelLayout; + mal_result result = mal_get_AudioObject_channel_layout(pContext, deviceObjectID, deviceType, &pChannelLayout); + if (result != MAL_SUCCESS) { + return result; + } + + if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelDescriptions) { + *pChannelCount = pChannelLayout->mNumberChannelDescriptions; + } else if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelBitmap) { + *pChannelCount = mal_count_set_bits(pChannelLayout->mChannelBitmap); + } else { + *pChannelCount = AudioChannelLayoutTag_GetNumberOfChannels(pChannelLayout->mChannelLayoutTag); + } + + mal_free(pChannelLayout); + return MAL_SUCCESS; +} + +mal_result mal_get_channel_map_from_AudioChannelLayout(AudioChannelLayout* pChannelLayout, mal_channel channelMap[MAL_MAX_CHANNELS]) +{ + mal_assert(pChannelLayout != NULL); + + if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelDescriptions) { + for (UInt32 iChannel = 0; iChannel < pChannelLayout->mNumberChannelDescriptions; ++iChannel) { + channelMap[iChannel] = mal_channel_from_AudioChannelLabel(pChannelLayout->mChannelDescriptions[iChannel].mChannelLabel); + } + } else +#if 0 + if (pChannelLayout->mChannelLayoutTag == kAudioChannelLayoutTag_UseChannelBitmap) { + // This is the same kind of system that's used by Windows audio APIs. + UInt32 iChannel = 0; + AudioChannelBitmap bitmap = pChannelLayout->mChannelBitmap; + for (UInt32 iBit = 0; iBit < 32; ++iBit) { + AudioChannelBitmap bit = bitmap & (1 << iBit); + if (bit != 0) { + channelMap[iChannel++] = mal_channel_from_AudioChannelBit(bit); + } + } + } else +#endif + { + // Need to use the tag to determine the channel map. For now I'm just assuming a default channel map, but later on this should + // be updated to determine the mapping based on the tag. + UInt32 channelCount = AudioChannelLayoutTag_GetNumberOfChannels(pChannelLayout->mChannelLayoutTag); + switch (pChannelLayout->mChannelLayoutTag) + { + case kAudioChannelLayoutTag_Mono: + case kAudioChannelLayoutTag_Stereo: + case kAudioChannelLayoutTag_StereoHeadphones: + case kAudioChannelLayoutTag_MatrixStereo: + case kAudioChannelLayoutTag_MidSide: + case kAudioChannelLayoutTag_XY: + case kAudioChannelLayoutTag_Binaural: + case kAudioChannelLayoutTag_Ambisonic_B_Format: + { + mal_get_standard_channel_map(mal_standard_channel_map_default, channelCount, channelMap); + } break; + + case kAudioChannelLayoutTag_Octagonal: + { + channelMap[7] = MAL_CHANNEL_SIDE_RIGHT; + channelMap[6] = MAL_CHANNEL_SIDE_LEFT; + } // Intentional fallthrough. + case kAudioChannelLayoutTag_Hexagonal: + { + channelMap[5] = MAL_CHANNEL_BACK_CENTER; + } // Intentional fallthrough. + case kAudioChannelLayoutTag_Pentagonal: + { + channelMap[4] = MAL_CHANNEL_FRONT_CENTER; + } // Intentional fallghrough. + case kAudioChannelLayoutTag_Quadraphonic: + { + channelMap[3] = MAL_CHANNEL_BACK_RIGHT; + channelMap[2] = MAL_CHANNEL_BACK_LEFT; + channelMap[1] = MAL_CHANNEL_RIGHT; + channelMap[0] = MAL_CHANNEL_LEFT; + } break; + + // TODO: Add support for more tags here. + + default: + { + mal_get_standard_channel_map(mal_standard_channel_map_default, channelCount, channelMap); + } break; + } + } + + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_channel_map(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_channel channelMap[MAL_MAX_CHANNELS]) +{ + mal_assert(pContext != NULL); + + AudioChannelLayout* pChannelLayout; + mal_result result = mal_get_AudioObject_channel_layout(pContext, deviceObjectID, deviceType, &pChannelLayout); + if (result != MAL_SUCCESS) { + return result; // Rather than always failing here, would it be more robust to simply assume a default? + } + + result = mal_get_channel_map_from_AudioChannelLayout(pChannelLayout, channelMap); + if (result != MAL_SUCCESS) { + return result; + } + + return result; +} + +mal_result mal_get_AudioObject_sample_rates(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, UInt32* pSampleRateRangesCount, AudioValueRange** ppSampleRateRanges) // NOTE: Free the returned pointer with mal_free(). +{ + mal_assert(pContext != NULL); + mal_assert(pSampleRateRangesCount != NULL); + mal_assert(ppSampleRateRanges != NULL); + + // Safety. + *pSampleRateRangesCount = 0; + *ppSampleRateRanges = NULL; + + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyAvailableNominalSampleRates; + propAddress.mScope = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + UInt32 dataSize; + OSStatus status = ((mal_AudioObjectGetPropertyDataSize_proc)pContext->coreaudio.AudioObjectGetPropertyDataSize)(deviceObjectID, &propAddress, 0, NULL, &dataSize); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + AudioValueRange* pSampleRateRanges = (AudioValueRange*)mal_malloc(dataSize); + if (pSampleRateRanges == NULL) { + return MAL_OUT_OF_MEMORY; + } + + status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pSampleRateRanges); + if (status != noErr) { + mal_free(pSampleRateRanges); + return mal_result_from_OSStatus(status); + } + + *pSampleRateRangesCount = dataSize / sizeof(*pSampleRateRanges); + *ppSampleRateRanges = pSampleRateRanges; + return MAL_SUCCESS; +} + +mal_result mal_get_AudioObject_get_closest_sample_rate(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32 sampleRateIn, mal_uint32* pSampleRateOut) +{ + mal_assert(pContext != NULL); + mal_assert(pSampleRateOut != NULL); + + *pSampleRateOut = 0; // Safety. + + UInt32 sampleRateRangeCount; + AudioValueRange* pSampleRateRanges; + mal_result result = mal_get_AudioObject_sample_rates(pContext, deviceObjectID, deviceType, &sampleRateRangeCount, &pSampleRateRanges); + if (result != MAL_SUCCESS) { + return result; + } + + if (sampleRateRangeCount == 0) { + mal_free(pSampleRateRanges); + return MAL_ERROR; // Should never hit this case should we? + } + + if (sampleRateIn == 0) { + // Search in order of mini_al's preferred priority. + for (UInt32 iMALSampleRate = 0; iMALSampleRate < mal_countof(g_malStandardSampleRatePriorities); ++iMALSampleRate) { + mal_uint32 malSampleRate = g_malStandardSampleRatePriorities[iMALSampleRate]; + for (UInt32 iCASampleRate = 0; iCASampleRate < sampleRateRangeCount; ++iCASampleRate) { + AudioValueRange caSampleRate = pSampleRateRanges[iCASampleRate]; + if (caSampleRate.mMinimum <= malSampleRate && caSampleRate.mMaximum >= malSampleRate) { + *pSampleRateOut = malSampleRate; + mal_free(pSampleRateRanges); + return MAL_SUCCESS; + } + } + } + + // If we get here it means none of mini_al's standard sample rates matched any of the supported sample rates from the device. In this + // case we just fall back to the first one reported by Core Audio. + mal_assert(sampleRateRangeCount > 0); + + *pSampleRateOut = pSampleRateRanges[0].mMinimum; + mal_free(pSampleRateRanges); + return MAL_SUCCESS; + } else { + // Find the closest match to this sample rate. + UInt32 currentAbsoluteDifference = INT32_MAX; + UInt32 iCurrentClosestRange = (UInt32)-1; + for (UInt32 iRange = 0; iRange < sampleRateRangeCount; ++iRange) { + if (pSampleRateRanges[iRange].mMinimum <= sampleRateIn && pSampleRateRanges[iRange].mMaximum >= sampleRateIn) { + *pSampleRateOut = sampleRateIn; + mal_free(pSampleRateRanges); + return MAL_SUCCESS; + } else { + UInt32 absoluteDifference; + if (pSampleRateRanges[iRange].mMinimum > sampleRateIn) { + absoluteDifference = pSampleRateRanges[iRange].mMinimum - sampleRateIn; + } else { + absoluteDifference = sampleRateIn - pSampleRateRanges[iRange].mMaximum; + } + + if (currentAbsoluteDifference > absoluteDifference) { + currentAbsoluteDifference = absoluteDifference; + iCurrentClosestRange = iRange; + } + } + } + + mal_assert(iCurrentClosestRange != (UInt32)-1); + + *pSampleRateOut = pSampleRateRanges[iCurrentClosestRange].mMinimum; + mal_free(pSampleRateRanges); + return MAL_SUCCESS; + } + + // Should never get here, but it would mean we weren't able to find any suitable sample rates. + //mal_free(pSampleRateRanges); + //return MAL_ERROR; +} + + +mal_result mal_get_AudioObject_closest_buffer_size_in_frames(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32 bufferSizeInFramesIn, mal_uint32* pBufferSizeInFramesOut) +{ + mal_assert(pContext != NULL); + mal_assert(pBufferSizeInFramesOut != NULL); + + *pBufferSizeInFramesOut = 0; // Safety. + + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyBufferFrameSizeRange; + propAddress.mScope = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + AudioValueRange bufferSizeRange; + UInt32 dataSize = sizeof(bufferSizeRange); + OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, &bufferSizeRange); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + // This is just a clamp. + if (bufferSizeInFramesIn < bufferSizeRange.mMinimum) { + *pBufferSizeInFramesOut = (mal_uint32)bufferSizeRange.mMinimum; + } else if (bufferSizeInFramesIn > bufferSizeRange.mMaximum) { + *pBufferSizeInFramesOut = (mal_uint32)bufferSizeRange.mMaximum; + } else { + *pBufferSizeInFramesOut = bufferSizeInFramesIn; + } + + return MAL_SUCCESS; +} + +mal_result mal_set_AudioObject_buffer_size_in_frames(mal_context* pContext, AudioObjectID deviceObjectID, mal_device_type deviceType, mal_uint32* pBufferSizeInOut) +{ + mal_assert(pContext != NULL); + + mal_uint32 chosenBufferSizeInFrames; + mal_result result = mal_get_AudioObject_closest_buffer_size_in_frames(pContext, deviceObjectID, deviceType, *pBufferSizeInOut, &chosenBufferSizeInFrames); + if (result != MAL_SUCCESS) { + return result; + } + + // Try setting the size of the buffer... If this fails we just use whatever is currently set. + AudioObjectPropertyAddress propAddress; + propAddress.mSelector = kAudioDevicePropertyBufferFrameSize; + propAddress.mScope = (deviceType == mal_device_type_playback) ? kAudioObjectPropertyScopeOutput : kAudioObjectPropertyScopeInput; + propAddress.mElement = kAudioObjectPropertyElementMaster; + + OSStatus status = ((mal_AudioObjectSetPropertyData_proc)pContext->coreaudio.AudioObjectSetPropertyData)(deviceObjectID, &propAddress, 0, NULL, sizeof(chosenBufferSizeInFrames), &chosenBufferSizeInFrames); + if (status != noErr) { + // Getting here means we were unable to set the buffer size. In this case just use whatever is currently selected. + UInt32 dataSize = sizeof(*pBufferSizeInOut); + OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(deviceObjectID, &propAddress, 0, NULL, &dataSize, pBufferSizeInOut); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + } + + return MAL_SUCCESS; +} + + +mal_result mal_find_AudioObjectID(mal_context* pContext, mal_device_type type, const mal_device_id* pDeviceID, AudioObjectID* pDeviceObjectID) +{ + mal_assert(pContext != NULL); + mal_assert(pDeviceObjectID != NULL); + + // Safety. + *pDeviceObjectID = 0; + + if (pDeviceID == NULL) { + // Default device. + AudioObjectPropertyAddress propAddressDefaultDevice; + propAddressDefaultDevice.mScope = kAudioObjectPropertyScopeGlobal; + propAddressDefaultDevice.mElement = kAudioObjectPropertyElementMaster; + if (type == mal_device_type_playback) { + propAddressDefaultDevice.mSelector = kAudioHardwarePropertyDefaultOutputDevice; + } else { + propAddressDefaultDevice.mSelector = kAudioHardwarePropertyDefaultInputDevice; + } + + UInt32 defaultDeviceObjectIDSize = sizeof(AudioObjectID); + AudioObjectID defaultDeviceObjectID; + OSStatus status = ((mal_AudioObjectGetPropertyData_proc)pContext->coreaudio.AudioObjectGetPropertyData)(kAudioObjectSystemObject, &propAddressDefaultDevice, 0, NULL, &defaultDeviceObjectIDSize, &defaultDeviceObjectID); + if (status == noErr) { + *pDeviceObjectID = defaultDeviceObjectID; + return MAL_SUCCESS; + } + } else { + // Explicit device. + UInt32 deviceCount; + AudioObjectID* pDeviceObjectIDs; + mal_result result = mal_get_device_object_ids__coreaudio(pContext, &deviceCount, &pDeviceObjectIDs); + if (result != MAL_SUCCESS) { + return result; + } + + for (UInt32 iDevice = 0; iDevice < deviceCount; ++iDevice) { + AudioObjectID deviceObjectID = pDeviceObjectIDs[iDevice]; + + char uid[256]; + if (mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(uid), uid) != MAL_SUCCESS) { + continue; + } + + if (type == mal_device_type_playback) { + if (mal_does_AudioObject_support_playback(pContext, deviceObjectID)) { + if (strcmp(uid, pDeviceID->coreaudio) == 0) { + *pDeviceObjectID = deviceObjectID; + return MAL_SUCCESS; + } + } + } else { + if (mal_does_AudioObject_support_capture(pContext, deviceObjectID)) { + if (strcmp(uid, pDeviceID->coreaudio) == 0) { + *pDeviceObjectID = deviceObjectID; + return MAL_SUCCESS; + } + } + } + } + } + + // If we get here it means we couldn't find the device. + return MAL_NO_DEVICE; +} + + +mal_result mal_device_find_best_format__coreaudio(const mal_device* pDevice, AudioStreamBasicDescription* pFormat) +{ + mal_assert(pDevice != NULL); + + AudioObjectID deviceObjectID = (AudioObjectID)pDevice->coreaudio.deviceObjectID; + + UInt32 deviceFormatDescriptionCount; + AudioStreamRangedDescription* pDeviceFormatDescriptions; + mal_result result = mal_get_AudioObject_stream_descriptions(pDevice->pContext, deviceObjectID, pDevice->type, &deviceFormatDescriptionCount, &pDeviceFormatDescriptions); + if (result != MAL_SUCCESS) { + return result; + } + + mal_uint32 desiredSampleRate = pDevice->sampleRate; + if (pDevice->usingDefaultSampleRate) { + // When using the device's default sample rate, we get the highest priority standard rate supported by the device. Otherwise + // we just use the pre-set rate. + for (mal_uint32 iStandardRate = 0; iStandardRate < mal_countof(g_malStandardSampleRatePriorities); ++iStandardRate) { + mal_uint32 standardRate = g_malStandardSampleRatePriorities[iStandardRate]; + + mal_bool32 foundRate = MAL_FALSE; + for (UInt32 iDeviceRate = 0; iDeviceRate < deviceFormatDescriptionCount; ++iDeviceRate) { + mal_uint32 deviceRate = (mal_uint32)pDeviceFormatDescriptions[iDeviceRate].mFormat.mSampleRate; + + if (deviceRate == standardRate) { + desiredSampleRate = standardRate; + foundRate = MAL_TRUE; + break; + } + } + + if (foundRate) { + break; + } + } + } + + mal_uint32 desiredChannelCount = pDevice->channels; + if (pDevice->usingDefaultChannels) { + mal_get_AudioObject_channel_count(pDevice->pContext, deviceObjectID, pDevice->type, &desiredChannelCount); // <-- Not critical if this fails. + } + + mal_format desiredFormat = pDevice->format; + if (pDevice->usingDefaultFormat) { + desiredFormat = g_malFormatPriorities[0]; + } + + // If we get here it means we don't have an exact match to what the client is asking for. We'll need to find the closest one. The next + // loop will check for formats that have the same sample rate to what we're asking for. If there is, we prefer that one in all cases. + AudioStreamBasicDescription bestDeviceFormatSoFar; + mal_zero_object(&bestDeviceFormatSoFar); + + mal_bool32 hasSupportedFormat = MAL_FALSE; + for (UInt32 iFormat = 0; iFormat < deviceFormatDescriptionCount; ++iFormat) { + mal_format format; + mal_result formatResult = mal_format_from_AudioStreamBasicDescription(&pDeviceFormatDescriptions[iFormat].mFormat, &format); + if (formatResult == MAL_SUCCESS && format != mal_format_unknown) { + hasSupportedFormat = MAL_TRUE; + bestDeviceFormatSoFar = pDeviceFormatDescriptions[iFormat].mFormat; + break; + } + } + + if (!hasSupportedFormat) { + return MAL_FORMAT_NOT_SUPPORTED; + } + + + for (UInt32 iFormat = 0; iFormat < deviceFormatDescriptionCount; ++iFormat) { + AudioStreamBasicDescription thisDeviceFormat = pDeviceFormatDescriptions[iFormat].mFormat; + + // If the format is not supported by mini_al we need to skip this one entirely. + mal_format thisSampleFormat; + mal_result formatResult = mal_format_from_AudioStreamBasicDescription(&pDeviceFormatDescriptions[iFormat].mFormat, &thisSampleFormat); + if (formatResult != MAL_SUCCESS || thisSampleFormat == mal_format_unknown) { + continue; // The format is not supported by mini_al. Skip. + } + + mal_format bestSampleFormatSoFar; + mal_format_from_AudioStreamBasicDescription(&bestDeviceFormatSoFar, &bestSampleFormatSoFar); + + + // Getting here means the format is supported by mini_al which makes this format a candidate. + if (thisDeviceFormat.mSampleRate != desiredSampleRate) { + // The sample rate does not match, but this format could still be usable, although it's a very low priority. If the best format + // so far has an equal sample rate we can just ignore this one. + if (bestDeviceFormatSoFar.mSampleRate == desiredSampleRate) { + continue; // The best sample rate so far has the same sample rate as what we requested which means it's still the best so far. Skip this format. + } else { + // In this case, neither the best format so far nor this one have the same sample rate. Check the channel count next. + if (thisDeviceFormat.mChannelsPerFrame != desiredChannelCount) { + // This format has a different sample rate _and_ a different channel count. + if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) { + continue; // No change to the best format. + } else { + // Both this format and the best so far have different sample rates and different channel counts. Whichever has the + // best format is the new best. + if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + continue; // No change to the best format. + } + } + } else { + // This format has a different sample rate but the desired channel count. + if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) { + // Both this format and the best so far have the desired channel count. Whichever has the best format is the new best. + if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + continue; // No change to the best format for now. + } + } else { + // This format has the desired channel count, but the best so far does not. We have a new best. + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } + } + } + } else { + // The sample rates match which makes this format a very high priority contender. If the best format so far has a different + // sample rate it needs to be replaced with this one. + if (bestDeviceFormatSoFar.mSampleRate != desiredSampleRate) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + // In this case both this format and the best format so far have the same sample rate. Check the channel count next. + if (thisDeviceFormat.mChannelsPerFrame == desiredChannelCount) { + // In this case this format has the same channel count as what the client is requesting. If the best format so far has + // a different count, this one becomes the new best. + if (bestDeviceFormatSoFar.mChannelsPerFrame != desiredChannelCount) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + // In this case both this format and the best so far have the ideal sample rate and channel count. Check the format. + if (thisSampleFormat == desiredFormat) { + bestDeviceFormatSoFar = thisDeviceFormat; + break; // Found the exact match. + } else { + // The formats are different. The new best format is the one with the highest priority format according to mini_al. + if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + continue; // No change to the best format for now. + } + } + } + } else { + // In this case the channel count is different to what the client has requested. If the best so far has the same channel + // count as the requested count then it remains the best. + if (bestDeviceFormatSoFar.mChannelsPerFrame == desiredChannelCount) { + continue; + } else { + // This is the case where both have the same sample rate (good) but different channel counts. Right now both have about + // the same priority, but we need to compare the format now. + if (thisSampleFormat == bestSampleFormatSoFar) { + if (mal_get_format_priority_index(thisSampleFormat) < mal_get_format_priority_index(bestSampleFormatSoFar)) { + bestDeviceFormatSoFar = thisDeviceFormat; + continue; + } else { + continue; // No change to the best format for now. + } + } + } + } + } + } + } + + *pFormat = bestDeviceFormatSoFar; + return MAL_SUCCESS; +} +#endif + + + +mal_bool32 mal_context_is_device_id_equal__coreaudio(mal_context* pContext, const mal_device_id* pID0, const mal_device_id* pID1) +{ + mal_assert(pContext != NULL); + mal_assert(pID0 != NULL); + mal_assert(pID1 != NULL); + (void)pContext; + + return strcmp(pID0->coreaudio, pID1->coreaudio) == 0; +} + +mal_result mal_context_enumerate_devices__coreaudio(mal_context* pContext, mal_enum_devices_callback_proc callback, void* pUserData) +{ + mal_assert(pContext != NULL); + mal_assert(callback != NULL); + +#if defined(MAL_APPLE_DESKTOP) + UInt32 deviceCount; + AudioObjectID* pDeviceObjectIDs; + mal_result result = mal_get_device_object_ids__coreaudio(pContext, &deviceCount, &pDeviceObjectIDs); + if (result != MAL_SUCCESS) { + return result; + } + + for (UInt32 iDevice = 0; iDevice < deviceCount; ++iDevice) { + AudioObjectID deviceObjectID = pDeviceObjectIDs[iDevice]; + + mal_device_info info; + mal_zero_object(&info); + if (mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(info.id.coreaudio), info.id.coreaudio) != MAL_SUCCESS) { + continue; + } + if (mal_get_AudioObject_name(pContext, deviceObjectID, sizeof(info.name), info.name) != MAL_SUCCESS) { + continue; + } + + if (mal_does_AudioObject_support_playback(pContext, deviceObjectID)) { + if (!callback(pContext, mal_device_type_playback, &info, pUserData)) { + break; + } + } + if (mal_does_AudioObject_support_capture(pContext, deviceObjectID)) { + if (!callback(pContext, mal_device_type_capture, &info, pUserData)) { + break; + } + } + } + + mal_free(pDeviceObjectIDs); +#else + // Only supporting default devices on non-Desktop platforms. + mal_device_info info; + + mal_zero_object(&info); + mal_strncpy_s(info.name, sizeof(info.name), MAL_DEFAULT_PLAYBACK_DEVICE_NAME, (size_t)-1); + if (!callback(pContext, mal_device_type_playback, &info, pUserData)) { + return MAL_SUCCESS; + } + + mal_zero_object(&info); + mal_strncpy_s(info.name, sizeof(info.name), MAL_DEFAULT_CAPTURE_DEVICE_NAME, (size_t)-1); + if (!callback(pContext, mal_device_type_capture, &info, pUserData)) { + return MAL_SUCCESS; + } +#endif + + return MAL_SUCCESS; +} + +mal_result mal_context_get_device_info__coreaudio(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, mal_share_mode shareMode, mal_device_info* pDeviceInfo) +{ + mal_assert(pContext != NULL); + (void)shareMode; + (void)pDeviceInfo; + +#if defined(MAL_APPLE_DESKTOP) + // Desktop + // ======= + AudioObjectID deviceObjectID; + mal_result result = mal_find_AudioObjectID(pContext, deviceType, pDeviceID, &deviceObjectID); + if (result != MAL_SUCCESS) { + return result; + } + + result = mal_get_AudioObject_uid(pContext, deviceObjectID, sizeof(pDeviceInfo->id.coreaudio), pDeviceInfo->id.coreaudio); + if (result != MAL_SUCCESS) { + return result; + } + + result = mal_get_AudioObject_name(pContext, deviceObjectID, sizeof(pDeviceInfo->name), pDeviceInfo->name); + if (result != MAL_SUCCESS) { + return result; + } + + // Formats. + UInt32 streamDescriptionCount; + AudioStreamRangedDescription* pStreamDescriptions; + result = mal_get_AudioObject_stream_descriptions(pContext, deviceObjectID, deviceType, &streamDescriptionCount, &pStreamDescriptions); + if (result != MAL_SUCCESS) { + return result; + } + + for (UInt32 iStreamDescription = 0; iStreamDescription < streamDescriptionCount; ++iStreamDescription) { + mal_format format; + result = mal_format_from_AudioStreamBasicDescription(&pStreamDescriptions[iStreamDescription].mFormat, &format); + if (result != MAL_SUCCESS) { + continue; + } + + mal_assert(format != mal_format_unknown); + + // Make sure the format isn't already in the output list. + mal_bool32 exists = MAL_FALSE; + for (mal_uint32 iOutputFormat = 0; iOutputFormat < pDeviceInfo->formatCount; ++iOutputFormat) { + if (pDeviceInfo->formats[iOutputFormat] == format) { + exists = MAL_TRUE; + break; + } + } + + if (!exists) { + pDeviceInfo->formats[pDeviceInfo->formatCount++] = format; + } + } + + mal_free(pStreamDescriptions); + + + // Channels. + result = mal_get_AudioObject_channel_count(pContext, deviceObjectID, deviceType, &pDeviceInfo->minChannels); + if (result != MAL_SUCCESS) { + return result; + } + pDeviceInfo->maxChannels = pDeviceInfo->minChannels; + + + // Sample rates. + UInt32 sampleRateRangeCount; + AudioValueRange* pSampleRateRanges; + result = mal_get_AudioObject_sample_rates(pContext, deviceObjectID, deviceType, &sampleRateRangeCount, &pSampleRateRanges); + if (result != MAL_SUCCESS) { + return result; + } + + if (sampleRateRangeCount > 0) { + pDeviceInfo->minSampleRate = UINT32_MAX; + pDeviceInfo->maxSampleRate = 0; + for (UInt32 iSampleRate = 0; iSampleRate < sampleRateRangeCount; ++iSampleRate) { + if (pDeviceInfo->minSampleRate > pSampleRateRanges[iSampleRate].mMinimum) { + pDeviceInfo->minSampleRate = pSampleRateRanges[iSampleRate].mMinimum; + } + if (pDeviceInfo->maxSampleRate < pSampleRateRanges[iSampleRate].mMaximum) { + pDeviceInfo->maxSampleRate = pSampleRateRanges[iSampleRate].mMaximum; + } + } + } +#else + // Mobile + // ====== + if (deviceType == mal_device_type_playback) { + mal_strncpy_s(pDeviceInfo->name, sizeof(pDeviceInfo->name), MAL_DEFAULT_PLAYBACK_DEVICE_NAME, (size_t)-1); + } else { + mal_strncpy_s(pDeviceInfo->name, sizeof(pDeviceInfo->name), MAL_DEFAULT_CAPTURE_DEVICE_NAME, (size_t)-1); + } + + // Retrieving device information is more annoying on mobile than desktop. For simplicity I'm locking this down to whatever format is + // reported on a temporary I/O unit. The problem, however, is that this doesn't return a value for the sample rate which we need to + // retrieve from the AVAudioSession shared instance. + AudioComponentDescription desc; + desc.componentType = kAudioUnitType_Output; + desc.componentSubType = kAudioUnitSubType_RemoteIO; + desc.componentManufacturer = kAudioUnitManufacturer_Apple; + desc.componentFlags = 0; + desc.componentFlagsMask = 0; + + AudioComponent component = ((mal_AudioComponentFindNext_proc)pContext->coreaudio.AudioComponentFindNext)(NULL, &desc); + if (component == NULL) { + return MAL_FAILED_TO_INIT_BACKEND; + } + + AudioUnit audioUnit; + OSStatus status = ((mal_AudioComponentInstanceNew_proc)pContext->coreaudio.AudioComponentInstanceNew)(component, &audioUnit); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + AudioUnitScope formatScope = (deviceType == mal_device_type_playback) ? kAudioUnitScope_Input : kAudioUnitScope_Output; + AudioUnitElement formatElement = (deviceType == mal_device_type_playback) ? MAL_COREAUDIO_OUTPUT_BUS : MAL_COREAUDIO_INPUT_BUS; + + AudioStreamBasicDescription bestFormat; + UInt32 propSize = sizeof(bestFormat); + status = ((mal_AudioUnitGetProperty_proc)pContext->coreaudio.AudioUnitGetProperty)(audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, &propSize); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)(audioUnit); + return mal_result_from_OSStatus(status); + } + + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)(audioUnit); + audioUnit = NULL; + + + pDeviceInfo->minChannels = bestFormat.mChannelsPerFrame; + pDeviceInfo->maxChannels = bestFormat.mChannelsPerFrame; + + pDeviceInfo->formatCount = 1; + mal_result result = mal_format_from_AudioStreamBasicDescription(&bestFormat, &pDeviceInfo->formats[0]); + if (result != MAL_SUCCESS) { + return result; + } + + // It looks like Apple are wanting to push the whole AVAudioSession thing. Thus, we need to use that to determine device settings. To do + // this we just get the shared instance and inspect. + @autoreleasepool { + AVAudioSession* pAudioSession = [AVAudioSession sharedInstance]; + mal_assert(pAudioSession != NULL); + + pDeviceInfo->minSampleRate = (mal_uint32)pAudioSession.sampleRate; + pDeviceInfo->maxSampleRate = pDeviceInfo->minSampleRate; + } +#endif + + return MAL_SUCCESS; +} + +mal_result mal_context_init__coreaudio(mal_context* pContext) +{ + mal_assert(pContext != NULL); + +#if !defined(MAL_NO_RUNTIME_LINKING) && !defined(MAL_APPLE_MOBILE) + pContext->coreaudio.hCoreFoundation = mal_dlopen("CoreFoundation.framework/CoreFoundation"); + if (pContext->coreaudio.hCoreFoundation == NULL) { + return MAL_API_NOT_FOUND; + } + + pContext->coreaudio.CFStringGetCString = mal_dlsym(pContext->coreaudio.hCoreFoundation, "CFStringGetCString"); + + + pContext->coreaudio.hCoreAudio = mal_dlopen("CoreAudio.framework/CoreAudio"); + if (pContext->coreaudio.hCoreAudio == NULL) { + mal_dlclose(pContext->coreaudio.hCoreFoundation); + return MAL_API_NOT_FOUND; + } + + pContext->coreaudio.AudioObjectGetPropertyData = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectGetPropertyData"); + pContext->coreaudio.AudioObjectGetPropertyDataSize = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectGetPropertyDataSize"); + pContext->coreaudio.AudioObjectSetPropertyData = mal_dlsym(pContext->coreaudio.hCoreAudio, "AudioObjectSetPropertyData"); + + + // It looks like Apple has moved some APIs from AudioUnit into AudioToolbox on more recent versions of macOS. They are still + // defined in AudioUnit, but just in case they decided to remove them from there entirely I'm going to do implement a fallback. + // The way it'll work is that it'll first try AudioUnit, and if the required symbols are not present there we'll fall back to + // AudioToolbox. + pContext->coreaudio.hAudioUnit = mal_dlopen("AudioUnit.framework/AudioUnit"); + if (pContext->coreaudio.hAudioUnit == NULL) { + mal_dlclose(pContext->coreaudio.hCoreAudio); + mal_dlclose(pContext->coreaudio.hCoreFoundation); + return MAL_API_NOT_FOUND; + } + + if (mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioComponentFindNext") == NULL) { + // Couldn't find the required symbols in AudioUnit, so fall back to AudioToolbox. + mal_dlclose(pContext->coreaudio.hAudioUnit); + pContext->coreaudio.hAudioUnit = mal_dlopen("AudioToolbox.framework/AudioToolbox"); + if (pContext->coreaudio.hAudioUnit == NULL) { + mal_dlclose(pContext->coreaudio.hCoreAudio); + mal_dlclose(pContext->coreaudio.hCoreFoundation); + return MAL_API_NOT_FOUND; + } + } + + pContext->coreaudio.AudioComponentFindNext = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioComponentFindNext"); + pContext->coreaudio.AudioComponentInstanceDispose = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioComponentInstanceDispose"); + pContext->coreaudio.AudioComponentInstanceNew = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioComponentInstanceNew"); + pContext->coreaudio.AudioOutputUnitStart = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioOutputUnitStart"); + pContext->coreaudio.AudioOutputUnitStop = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioOutputUnitStop"); + pContext->coreaudio.AudioUnitAddPropertyListener = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioUnitAddPropertyListener"); + pContext->coreaudio.AudioUnitGetProperty = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioUnitGetProperty"); + pContext->coreaudio.AudioUnitSetProperty = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioUnitSetProperty"); + pContext->coreaudio.AudioUnitInitialize = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioUnitInitialize"); + pContext->coreaudio.AudioUnitRender = mal_dlsym(pContext->coreaudio.hAudioUnit, "AudioUnitRender"); +#else + pContext->coreaudio.CFStringGetCString = (mal_proc)CFStringGetCString; + + #if defined(MAL_APPLE_DESKTOP) + pContext->coreaudio.AudioObjectGetPropertyData = (mal_proc)AudioObjectGetPropertyData; + pContext->coreaudio.AudioObjectGetPropertyDataSize = (mal_proc)AudioObjectGetPropertyDataSize; + pContext->coreaudio.AudioObjectSetPropertyData = (mal_proc)AudioObjectSetPropertyData; + #endif + + pContext->coreaudio.AudioComponentFindNext = (mal_proc)AudioComponentFindNext; + pContext->coreaudio.AudioComponentInstanceDispose = (mal_proc)AudioComponentInstanceDispose; + pContext->coreaudio.AudioComponentInstanceNew = (mal_proc)AudioComponentInstanceNew; + pContext->coreaudio.AudioOutputUnitStart = (mal_proc)AudioOutputUnitStart; + pContext->coreaudio.AudioOutputUnitStop = (mal_proc)AudioOutputUnitStop; + pContext->coreaudio.AudioUnitAddPropertyListener = (mal_proc)AudioUnitAddPropertyListener; + pContext->coreaudio.AudioUnitGetProperty = (mal_proc)AudioUnitGetProperty; + pContext->coreaudio.AudioUnitSetProperty = (mal_proc)AudioUnitSetProperty; + pContext->coreaudio.AudioUnitInitialize = (mal_proc)AudioUnitInitialize; + pContext->coreaudio.AudioUnitRender = (mal_proc)AudioUnitRender; +#endif + + pContext->onDeviceIDEqual = mal_context_is_device_id_equal__coreaudio; + pContext->onEnumDevices = mal_context_enumerate_devices__coreaudio; + pContext->onGetDeviceInfo = mal_context_get_device_info__coreaudio; + + return MAL_SUCCESS; +} + +mal_result mal_context_uninit__coreaudio(mal_context* pContext) +{ + mal_assert(pContext != NULL); + mal_assert(pContext->backend == mal_backend_coreaudio); + +#if !defined(MAL_NO_RUNTIME_LINKING) && !defined(MAL_APPLE_MOBILE) + mal_dlclose(pContext->coreaudio.hAudioUnit); + mal_dlclose(pContext->coreaudio.hCoreAudio); + mal_dlclose(pContext->coreaudio.hCoreFoundation); +#endif + + (void)pContext; + return MAL_SUCCESS; +} + +void mal_device_uninit__coreaudio(mal_device* pDevice) +{ + mal_assert(pDevice != NULL); + mal_assert(mal_device__get_state(pDevice) == MAL_STATE_UNINITIALIZED); + + ((mal_AudioComponentInstanceDispose_proc)pDevice->pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + + if (pDevice->coreaudio.pAudioBufferList) { + mal_free(pDevice->coreaudio.pAudioBufferList); + } +} + + +OSStatus mal_on_output__coreaudio(void* pUserData, AudioUnitRenderActionFlags* pActionFlags, const AudioTimeStamp* pTimeStamp, UInt32 busNumber, UInt32 frameCount, AudioBufferList* pBufferList) +{ + (void)pActionFlags; + (void)pTimeStamp; + (void)busNumber; + + mal_device* pDevice = (mal_device*)pUserData; + mal_assert(pDevice != NULL); + + // For now we can assume everything is interleaved. + for (UInt32 iBuffer = 0; iBuffer < pBufferList->mNumberBuffers; ++iBuffer) { + if (pBufferList->mBuffers[iBuffer].mNumberChannels == pDevice->internalChannels) { + mal_uint32 frameCountForThisBuffer = pBufferList->mBuffers[iBuffer].mDataByteSize / mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels); + if (frameCountForThisBuffer > 0) { + mal_device__read_frames_from_client(pDevice, frameCountForThisBuffer, pBufferList->mBuffers[iBuffer].mData); + } + } else { + // This case is where the number of channels in the output buffer do not match our internal channels. It could mean that it's + // not interleaved, in which case we can't handle right now since mini_al does not yet support non-interleaved streams. We just + // output silence here. + mal_zero_memory(pBufferList->mBuffers[iBuffer].mData, pBufferList->mBuffers[iBuffer].mDataByteSize); + } + } + + return noErr; +} + +OSStatus mal_on_input__coreaudio(void* pUserData, AudioUnitRenderActionFlags* pActionFlags, const AudioTimeStamp* pTimeStamp, UInt32 busNumber, UInt32 frameCount, AudioBufferList* pUnusedBufferList) +{ + (void)pActionFlags; + (void)pTimeStamp; + (void)busNumber; + (void)frameCount; + (void)pUnusedBufferList; + + mal_device* pDevice = (mal_device*)pUserData; + mal_assert(pDevice != NULL); + + // I'm not going to trust the input frame count. I'm instead going to base this off the size of the first buffer. + UInt32 actualFrameCount = ((AudioBufferList*)pDevice->coreaudio.pAudioBufferList)->mBuffers[0].mDataByteSize / mal_get_bytes_per_sample(pDevice->internalFormat) / ((AudioBufferList*)pDevice->coreaudio.pAudioBufferList)->mBuffers[0].mNumberChannels; + if (actualFrameCount == 0) { + return noErr; + } + + OSStatus status = ((mal_AudioUnitRender_proc)pDevice->pContext->coreaudio.AudioUnitRender)((AudioUnit)pDevice->coreaudio.audioUnit, pActionFlags, pTimeStamp, busNumber, actualFrameCount, (AudioBufferList*)pDevice->coreaudio.pAudioBufferList); + if (status != noErr) { + return status; + } + + AudioBufferList* pRenderedBufferList = (AudioBufferList*)pDevice->coreaudio.pAudioBufferList; + mal_assert(pRenderedBufferList); + + // For now we can assume everything is interleaved. + for (UInt32 iBuffer = 0; iBuffer < pRenderedBufferList->mNumberBuffers; ++iBuffer) { + if (pRenderedBufferList->mBuffers[iBuffer].mNumberChannels == pDevice->internalChannels) { + mal_uint32 frameCountForThisBuffer = pRenderedBufferList->mBuffers[iBuffer].mDataByteSize / mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels); + if (frameCountForThisBuffer > 0) { + mal_device__send_frames_to_client(pDevice, frameCountForThisBuffer, pRenderedBufferList->mBuffers[iBuffer].mData); + } + } else { + // This case is where the number of channels in the output buffer do not match our internal channels. It could mean that it's + // not interleaved, in which case we can't handle right now since mini_al does not yet support non-interleaved streams. + } + } + + return noErr; +} + +void on_start_stop__coreaudio(void* pUserData, AudioUnit audioUnit, AudioUnitPropertyID propertyID, AudioUnitScope scope, AudioUnitElement element) +{ + (void)propertyID; + + mal_device* pDevice = (mal_device*)pUserData; + mal_assert(pDevice != NULL); + + UInt32 isRunning; + UInt32 isRunningSize = sizeof(isRunning); + OSStatus status = ((mal_AudioUnitGetProperty_proc)pDevice->pContext->coreaudio.AudioUnitGetProperty)(audioUnit, kAudioOutputUnitProperty_IsRunning, scope, element, &isRunning, &isRunningSize); + if (status != noErr) { + return; // Don't really know what to do in this case... just ignore it, I suppose... + } + + if (!isRunning) { + mal_stop_proc onStop = pDevice->onStop; + if (onStop) { + onStop(pDevice); + } + } +} + + +mal_result mal_device_init__coreaudio(mal_context* pContext, mal_device_type deviceType, const mal_device_id* pDeviceID, const mal_device_config* pConfig, mal_device* pDevice) +{ + mal_assert(pContext != NULL); + mal_assert(pConfig != NULL); + mal_assert(pDevice != NULL); + mal_assert(deviceType == mal_device_type_playback || deviceType == mal_device_type_capture); + + mal_result result; + +#if defined(MAL_APPLE_DESKTOP) + AudioObjectID deviceObjectID; + result = mal_find_AudioObjectID(pContext, deviceType, pDeviceID, &deviceObjectID); + if (result != MAL_SUCCESS) { + return result; + } + + pDevice->coreaudio.deviceObjectID = deviceObjectID; +#endif + + // Core audio doesn't really use the notion of a period so we can leave this unmodified, but not too over the top. + if (pDevice->periods < 1) { + pDevice->periods = 1; + } + if (pDevice->periods > 16) { + pDevice->periods = 16; + } + + + // Audio component. + AudioComponentDescription desc; + desc.componentType = kAudioUnitType_Output; +#if defined(MAL_APPLE_DESKTOP) + desc.componentSubType = kAudioUnitSubType_HALOutput; +#else + desc.componentSubType = kAudioUnitSubType_RemoteIO; +#endif + desc.componentManufacturer = kAudioUnitManufacturer_Apple; + desc.componentFlags = 0; + desc.componentFlagsMask = 0; + + pDevice->coreaudio.component = ((mal_AudioComponentFindNext_proc)pContext->coreaudio.AudioComponentFindNext)(NULL, &desc); + if (pDevice->coreaudio.component == NULL) { + return MAL_FAILED_TO_INIT_BACKEND; + } + + + // Audio unit. + OSStatus status = ((mal_AudioComponentInstanceNew_proc)pContext->coreaudio.AudioComponentInstanceNew)(pDevice->coreaudio.component, (AudioUnit*)&pDevice->coreaudio.audioUnit); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + + // The input/output buses need to be explicitly enabled and disabled. We set the flag based on the output unit first, then we just swap it for input. + UInt32 enableIOFlag = 1; + if (deviceType == mal_device_type_capture) { + enableIOFlag = 0; + } + + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Output, MAL_COREAUDIO_OUTPUT_BUS, &enableIOFlag, sizeof(enableIOFlag)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + + enableIOFlag = (enableIOFlag == 0) ? 1 : 0; + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Input, MAL_COREAUDIO_INPUT_BUS, &enableIOFlag, sizeof(enableIOFlag)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + + + // Set the device to use with this audio unit. This is only used on desktop since we are using defaults on mobile. +#if defined(MAL_APPLE_DESKTOP) + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_CurrentDevice, kAudioUnitScope_Global, (deviceType == mal_device_type_playback) ? MAL_COREAUDIO_OUTPUT_BUS : MAL_COREAUDIO_INPUT_BUS, &deviceObjectID, sizeof(AudioDeviceID)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(result); + } +#endif + + // Format. This is the hardest part of initialization because there's a few variables to take into account. + // 1) The format must be supported by the device. + // 2) The format must be supported mini_al. + // 3) There's a priority that mini_al prefers. + // + // Ideally we would like to use a format that's as close to the hardware as possible so we can get as close to a passthrough as possible. The + // most important property is the sample rate. mini_al can do format conversion for any sample rate and channel count, but cannot do the same + // for the sample data format. If the sample data format is not supported by mini_al it must be ignored completely. + // + // On mobile platforms this is a bit different. We just force the use of whatever the audio unit's current format is set to. + { + AudioUnitScope formatScope = (deviceType == mal_device_type_playback) ? kAudioUnitScope_Input : kAudioUnitScope_Output; + AudioUnitElement formatElement = (deviceType == mal_device_type_playback) ? MAL_COREAUDIO_OUTPUT_BUS : MAL_COREAUDIO_INPUT_BUS; + + AudioStreamBasicDescription bestFormat; + #if defined(MAL_APPLE_DESKTOP) + result = mal_device_find_best_format__coreaudio(pDevice, &bestFormat); + if (result != MAL_SUCCESS) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return result; + } + + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat)); + if (status != noErr) { + // We failed to set the format, so fall back to the current format of the audio unit. + UInt32 propSize = sizeof(bestFormat); + status = ((mal_AudioUnitGetProperty_proc)pContext->coreaudio.AudioUnitGetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, &propSize); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + } + #else + UInt32 propSize = sizeof(bestFormat); + status = ((mal_AudioUnitGetProperty_proc)pContext->coreaudio.AudioUnitGetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, &propSize); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + + // Sample rate is a little different here because for some reason kAudioUnitProperty_StreamFormat returns 0... Oh well. We need to instead try + // setting the sample rate to what the user has requested and then just see the results of it. Need to use some Objective-C here for this since + // it depends on Apple's AVAudioSession API. To do this we just get the shared AVAudioSession instance and then set it. Note that from what I + // can tell, it looks like the sample rate is shared between playback and capture for everything. + @autoreleasepool { + AVAudioSession* pAudioSession = [AVAudioSession sharedInstance]; + mal_assert(pAudioSession != NULL); + + [pAudioSession setPreferredSampleRate:(double)pDevice->sampleRate error:nil]; + bestFormat.mSampleRate = pAudioSession.sampleRate; + } + + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + #endif + + result = mal_format_from_AudioStreamBasicDescription(&bestFormat, &pDevice->internalFormat); + if (result != MAL_SUCCESS || pDevice->internalFormat == mal_format_unknown) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return result; + } + + pDevice->internalChannels = bestFormat.mChannelsPerFrame; + pDevice->internalSampleRate = bestFormat.mSampleRate; + } + + + // Internal channel map. +#if defined(MAL_APPLE_DESKTOP) + result = mal_get_AudioObject_channel_map(pContext, deviceObjectID, deviceType, pDevice->internalChannelMap); + if (result != MAL_SUCCESS) { + return result; + } +#else + // TODO: Figure out how to get the channel map using AVAudioSession. + mal_get_standard_channel_map(mal_standard_channel_map_default, pDevice->internalChannels, pDevice->internalChannelMap); +#endif + + + // Buffer size. Not allowing this to be configurable on iOS. + mal_uint32 actualBufferSizeInFrames = pDevice->bufferSizeInFrames; + if (actualBufferSizeInFrames < pDevice->periods) { + actualBufferSizeInFrames = pDevice->periods; + } + +#if defined(MAL_APPLE_DESKTOP) + if (pDevice->usingDefaultBufferSize) { + // CPU speed is a factor to consider when determine how large of a buffer we need. + float fCPUSpeed = mal_calculate_cpu_speed_factor(); + + // In my admittedly limited testing, capture latency seems to be about the same as playback with Core Audio, at least on my MacBook Pro. On other + // backends, however, this is often different. I am therefore leaving the logic below in place just in case I need to do some capture/playback + // specific tweaking. + float fDeviceType; + if (deviceType == mal_device_type_playback) { + fDeviceType = 1.0f; + } else { + fDeviceType = 1.0f; + } + + // Backend tax. Need to fiddle with this. + float fBackend = 1.0f; + + actualBufferSizeInFrames = mal_calculate_default_buffer_size_in_frames(pConfig->performanceProfile, pConfig->sampleRate, fCPUSpeed*fDeviceType*fBackend); + if (actualBufferSizeInFrames < pDevice->periods) { + actualBufferSizeInFrames = pDevice->periods; + } + } + + actualBufferSizeInFrames = actualBufferSizeInFrames / pDevice->periods; + result = mal_set_AudioObject_buffer_size_in_frames(pContext, deviceObjectID, deviceType, &actualBufferSizeInFrames); + if (result != MAL_SUCCESS) { + return result; + } +#else + actualBufferSizeInFrames = 4096; +#endif + + pDevice->bufferSizeInFrames = actualBufferSizeInFrames * pDevice->periods; + + + // Callbacks. + AURenderCallbackStruct callbackInfo; + callbackInfo.inputProcRefCon = pDevice; + if (deviceType == mal_device_type_playback) { + callbackInfo.inputProc = mal_on_output__coreaudio; + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Global, MAL_COREAUDIO_OUTPUT_BUS, &callbackInfo, sizeof(callbackInfo)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + } else { + callbackInfo.inputProc = mal_on_input__coreaudio; + status = ((mal_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_SetInputCallback, kAudioUnitScope_Global, MAL_COREAUDIO_INPUT_BUS, &callbackInfo, sizeof(callbackInfo)); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + } + + // We need to listen for stop events. + status = ((mal_AudioUnitAddPropertyListener_proc)pContext->coreaudio.AudioUnitAddPropertyListener)((AudioUnit)pDevice->coreaudio.audioUnit, kAudioOutputUnitProperty_IsRunning, on_start_stop__coreaudio, pDevice); + if (status != noErr) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + + + // We need a buffer list if this is an input device. We render into this in the input callback. + if (deviceType == mal_device_type_capture) { + mal_bool32 isInterleaved = MAL_TRUE; // TODO: Add support for non-interleaved streams. + + size_t allocationSize = sizeof(AudioBufferList) - sizeof(AudioBuffer); // Subtract sizeof(AudioBuffer) because that part is dynamically sized. + if (isInterleaved) { + // Interleaved case. This is the simple case because we just have one buffer. + allocationSize += sizeof(AudioBuffer) * 1; + allocationSize += actualBufferSizeInFrames * mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels); + } else { + // Non-interleaved case. This is the more complex case because there's more than one buffer. + allocationSize += sizeof(AudioBuffer) * pDevice->internalChannels; + allocationSize += actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat) * pDevice->internalChannels; + } + + AudioBufferList* pBufferList = (AudioBufferList*)mal_malloc(allocationSize); + if (pBufferList == NULL) { + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return MAL_OUT_OF_MEMORY; + } + + if (isInterleaved) { + pBufferList->mNumberBuffers = 1; + pBufferList->mBuffers[0].mNumberChannels = pDevice->internalChannels; + pBufferList->mBuffers[0].mDataByteSize = actualBufferSizeInFrames * mal_get_bytes_per_frame(pDevice->internalFormat, pDevice->internalChannels); + pBufferList->mBuffers[0].mData = (mal_uint8*)pBufferList + sizeof(AudioBufferList); + } else { + pBufferList->mNumberBuffers = pDevice->internalChannels; + for (mal_uint32 iBuffer = 0; iBuffer < pBufferList->mNumberBuffers; ++iBuffer) { + pBufferList->mBuffers[iBuffer].mNumberChannels = 1; + pBufferList->mBuffers[iBuffer].mDataByteSize = actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat); + pBufferList->mBuffers[iBuffer].mData = (mal_uint8*)pBufferList + ((sizeof(AudioBufferList) - sizeof(AudioBuffer)) + (sizeof(AudioBuffer) * pDevice->internalChannels)) + (actualBufferSizeInFrames * mal_get_bytes_per_sample(pDevice->internalFormat) * iBuffer); + } + } + + pDevice->coreaudio.pAudioBufferList = pBufferList; + } + + + // Initialize the audio unit. + status = ((mal_AudioUnitInitialize_proc)pContext->coreaudio.AudioUnitInitialize)((AudioUnit)pDevice->coreaudio.audioUnit); + if (status != noErr) { + mal_free(pDevice->coreaudio.pAudioBufferList); + ((mal_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)((AudioUnit)pDevice->coreaudio.audioUnit); + return mal_result_from_OSStatus(status); + } + + + return MAL_SUCCESS; +} + +mal_result mal_device__start_backend__coreaudio(mal_device* pDevice) +{ + mal_assert(pDevice != NULL); + + OSStatus status = ((mal_AudioOutputUnitStart_proc)pDevice->pContext->coreaudio.AudioOutputUnitStart)((AudioUnit)pDevice->coreaudio.audioUnit); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + return MAL_SUCCESS; +} + +mal_result mal_device__stop_backend__coreaudio(mal_device* pDevice) +{ + mal_assert(pDevice != NULL); + + OSStatus status = ((mal_AudioOutputUnitStop_proc)pDevice->pContext->coreaudio.AudioOutputUnitStop)((AudioUnit)pDevice->coreaudio.audioUnit); + if (status != noErr) { + return mal_result_from_OSStatus(status); + } + + return MAL_SUCCESS; +} +#endif // Core Audio + /////////////////////////////////////////////////////////////////////////////// @@ -13632,8 +15625,9 @@ mal_result mal_device__stop_backend__opensl(mal_device* pDevice) // Make sure the client is aware that the device has stopped. There may be an OpenSL|ES callback for this, but I haven't found it. mal_device__set_state(pDevice, MAL_STATE_STOPPED); - if (pDevice->onStop) { - pDevice->onStop(pDevice); + mal_stop_proc onStop = pDevice->onStop; + if (onStop) { + onStop(pDevice); } return MAL_SUCCESS; @@ -14912,8 +16906,8 @@ mal_result mal_context_init__sdl(mal_context* pContext) "SDL2.dll", "SDL.dll" #elif defined(MAL_APPLE) - "libSDL2-2.0.0.dylib", // Can any Mac users out there comfirm these library names? - "libSDL-1.2.0.dylib" + "SDL2.framework/SDL2", + "SDL.framework/SDL" #else "libSDL2-2.0.so.0", "libSDL-1.2.so.0" @@ -15042,11 +17036,12 @@ mal_result mal_device_init__sdl(mal_context* pContext, mal_device_type type, mal fType = 2.0f; } - // Backend tax. Need to fiddle with this. Special case for Emscripten. + // Backend tax. Need to fiddle with this. Keep in mind that SDL always rounds the buffer size up to the next + // power of two which should cover the natural API overhead. Special case for Emscripten. #if defined(__EMSCRIPTEN__) - float fBackend = 4.0f; + float fBackend = 1.0f; #else - float fBackend = 2.0f; + float fBackend = 1.0f; #endif bufferSize = mal_calculate_default_buffer_size_in_frames(pConfig->performanceProfile, pConfig->sampleRate, fCPUSpeed*fType*fBackend); @@ -15156,6 +17151,12 @@ mal_result mal_device__stop_backend__sdl(mal_device* pDevice) { ((MAL_PFN_SDL_PauseAudio)pDevice->pContext->sdl.SDL_PauseAudio)(1); } + + mal_device__set_state(pDevice, MAL_STATE_STOPPED); + mal_stop_proc onStop = pDevice->onStop; + if (onStop) { + onStop(pDevice); + } return MAL_SUCCESS; } @@ -15614,6 +17615,7 @@ const mal_backend g_malDefaultBackends[] = { mal_backend_wasapi, mal_backend_dsound, mal_backend_winmm, + mal_backend_coreaudio, mal_backend_oss, mal_backend_pulseaudio, mal_backend_alsa, @@ -15627,8 +17629,9 @@ const mal_backend g_malDefaultBackends[] = { mal_bool32 mal_is_backend_asynchronous(mal_backend backend) { return - backend == mal_backend_jack || - backend == mal_backend_opensl || + backend == mal_backend_jack || + backend == mal_backend_coreaudio || + backend == mal_backend_opensl || backend == mal_backend_sdl; } @@ -15703,6 +17706,12 @@ mal_result mal_context_init(const mal_backend backends[], mal_uint32 backendCoun result = mal_context_init__jack(pContext); } break; #endif + #ifdef MAL_HAS_COREAUDIO + case mal_backend_coreaudio: + { + result = mal_context_init__coreaudio(pContext); + } break; + #endif #ifdef MAL_HAS_OSS case mal_backend_oss: { @@ -15801,6 +17810,12 @@ mal_result mal_context_uninit(mal_context* pContext) mal_context_uninit__jack(pContext); } break; #endif + #ifdef MAL_HAS_COREAUDIO + case mal_backend_coreaudio: + { + mal_context_uninit__coreaudio(pContext); + } break; + #endif #ifdef MAL_HAS_OSS case mal_backend_oss: { @@ -16141,6 +18156,12 @@ mal_result mal_device_init(mal_context* pContext, mal_device_type type, mal_devi result = mal_device_init__jack(pContext, type, pDeviceID, &config, pDevice); } break; #endif + #ifdef MAL_HAS_COREAUDIO + case mal_backend_coreaudio: + { + result = mal_device_init__coreaudio(pContext, type, pDeviceID, &config, pDevice); + } break; + #endif #ifdef MAL_HAS_OSS case mal_backend_oss: { @@ -16321,11 +18342,6 @@ void mal_device_uninit(mal_device* pDevice) mal_thread_wait(&pDevice->thread); } - mal_event_uninit(&pDevice->stopEvent); - mal_event_uninit(&pDevice->startEvent); - mal_event_uninit(&pDevice->wakeupEvent); - mal_mutex_uninit(&pDevice->lock); - #ifdef MAL_HAS_WASAPI if (pDevice->pContext->backend == mal_backend_wasapi) { mal_device_uninit__wasapi(pDevice); @@ -16356,6 +18372,11 @@ void mal_device_uninit(mal_device* pDevice) mal_device_uninit__jack(pDevice); } #endif +#ifdef MAL_HAS_COREAUDIO + if (pDevice->pContext->backend == mal_backend_coreaudio) { + mal_device_uninit__coreaudio(pDevice); + } +#endif #ifdef MAL_HAS_OSS if (pDevice->pContext->backend == mal_backend_oss) { mal_device_uninit__oss(pDevice); @@ -16382,6 +18403,10 @@ void mal_device_uninit(mal_device* pDevice) } #endif + mal_event_uninit(&pDevice->stopEvent); + mal_event_uninit(&pDevice->startEvent); + mal_event_uninit(&pDevice->wakeupEvent); + mal_mutex_uninit(&pDevice->lock); if (pDevice->isOwnerOfContext) { mal_context_uninit(pDevice->pContext); @@ -16445,6 +18470,14 @@ mal_result mal_device_start(mal_device* pDevice) } } else #endif +#ifdef MAL_HAS_COREAUDIO + if (pDevice->pContext->backend == mal_backend_coreaudio) { + result = mal_device__start_backend__coreaudio(pDevice); + if (result == MAL_SUCCESS) { + mal_device__set_state(pDevice, MAL_STATE_STARTED); + } + } else +#endif #ifdef MAL_HAS_OPENSL if (pDevice->pContext->backend == mal_backend_opensl) { result = mal_device__start_backend__opensl(pDevice); @@ -16511,6 +18544,11 @@ mal_result mal_device_stop(mal_device* pDevice) mal_device__stop_backend__jack(pDevice); } else #endif +#ifdef MAL_HAS_COREAUDIO + if (pDevice->pContext->backend == mal_backend_coreaudio) { + mal_device__stop_backend__coreaudio(pDevice); + } else +#endif #ifdef MAL_HAS_OPENSL if (pDevice->pContext->backend == mal_backend_opensl) { mal_device__stop_backend__opensl(pDevice); @@ -17143,13 +19181,13 @@ mal_bool32 mal_channel_map_contains_channel_position(mal_uint32 channels, const void mal_copy_memory_64(void* dst, const void* src, mal_uint64 sizeInBytes) { -#if 0xFFFFFFFFFFFFFFFF <= SIZE_MAX +#if 0xFFFFFFFFFFFFFFFF <= MAL_SIZE_MAX mal_copy_memory(dst, src, (size_t)sizeInBytes); #else while (sizeInBytes > 0) { mal_uint64 bytesToCopyNow = sizeInBytes; - if (bytesToCopyNow > SIZE_MAX) { - bytesToCopyNow = SIZE_MAX; + if (bytesToCopyNow > MAL_SIZE_MAX) { + bytesToCopyNow = MAL_SIZE_MAX; } mal_copy_memory(dst, src, (size_t)bytesToCopyNow); // Safe cast to size_t. @@ -17163,13 +19201,13 @@ void mal_copy_memory_64(void* dst, const void* src, mal_uint64 sizeInBytes) void mal_zero_memory_64(void* dst, mal_uint64 sizeInBytes) { -#if 0xFFFFFFFFFFFFFFFF <= SIZE_MAX +#if 0xFFFFFFFFFFFFFFFF <= MAL_SIZE_MAX mal_zero_memory(dst, (size_t)sizeInBytes); #else while (sizeInBytes > 0) { mal_uint64 bytesToZeroNow = sizeInBytes; - if (bytesToZeroNow > SIZE_MAX) { - bytesToZeroNow = SIZE_MAX; + if (bytesToZeroNow > MAL_SIZE_MAX) { + bytesToZeroNow = MAL_SIZE_MAX; } mal_zero_memory(dst, (size_t)bytesToZeroNow); // Safe cast to size_t. @@ -17210,8 +19248,26 @@ void mal_pcm_u8_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_u8_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_u8_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_u8_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_u8_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s16__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_u8_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode); } @@ -17221,13 +19277,9 @@ void mal_pcm_u8_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_u8_to_s16__sse(dst, src, count, ditherMode); #else mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17254,8 +19306,26 @@ void mal_pcm_u8_to_s24__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_u8_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_u8_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_u8_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_u8_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s24__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_u8_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode); } @@ -17265,13 +19335,9 @@ void mal_pcm_u8_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_u8_to_s24__sse(dst, src, count, ditherMode); #else mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17296,8 +19362,26 @@ void mal_pcm_u8_to_s32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_u8_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_u8_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_u8_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_u8_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_s32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_u8_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode); } @@ -17307,13 +19391,9 @@ void mal_pcm_u8_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_u8_to_s32__sse(dst, src, count, ditherMode); #else mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17339,8 +19419,26 @@ void mal_pcm_u8_to_f32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_u8_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_u8_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_u8_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_u8_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_u8_to_f32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_u8_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode); } @@ -17350,13 +19448,9 @@ void mal_pcm_u8_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_u8_to_f32__sse(dst, src, count, ditherMode); #else mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17477,8 +19571,26 @@ void mal_pcm_s16_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s16_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s16_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s16_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s16_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_u8__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s16_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode); } @@ -17488,13 +19600,9 @@ void mal_pcm_s16_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s16_to_u8__sse(dst, src, count, ditherMode); #else mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17525,8 +19633,26 @@ void mal_pcm_s16_to_s24__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s16_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s16_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s16_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s16_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s24__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s16_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode); } @@ -17536,13 +19662,9 @@ void mal_pcm_s16_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s16_to_s24__sse(dst, src, count, ditherMode); #else mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17564,8 +19686,26 @@ void mal_pcm_s16_to_s32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s16_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s16_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s16_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s16_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_s32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s16_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode); } @@ -17575,13 +19715,9 @@ void mal_pcm_s16_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s16_to_s32__sse(dst, src, count, ditherMode); #else mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17615,8 +19751,26 @@ void mal_pcm_s16_to_f32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s16_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s16_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s16_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s16_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s16_to_f32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s16_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode); } @@ -17626,13 +19780,9 @@ void mal_pcm_s16_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s16_to_f32__sse(dst, src, count, ditherMode); #else mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17731,8 +19881,26 @@ void mal_pcm_s24_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s24_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s24_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s24_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s24_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_u8__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s24_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode); } @@ -17742,13 +19910,9 @@ void mal_pcm_s24_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s24_to_u8__sse(dst, src, count, ditherMode); #else mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17788,8 +19952,26 @@ void mal_pcm_s24_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s24_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s24_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s24_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s24_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s16__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s24_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode); } @@ -17799,13 +19981,9 @@ void mal_pcm_s24_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s24_to_s16__sse(dst, src, count, ditherMode); #else mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17835,8 +20013,26 @@ void mal_pcm_s24_to_s32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s24_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s24_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s24_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s24_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_s32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s24_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode); } @@ -17846,13 +20042,9 @@ void mal_pcm_s24_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s24_to_s32__sse(dst, src, count, ditherMode); #else mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -17886,8 +20078,26 @@ void mal_pcm_s24_to_f32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s24_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s24_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s24_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s24_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s24_to_f32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s24_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode); } @@ -17897,13 +20107,9 @@ void mal_pcm_s24_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s24_to_f32__sse(dst, src, count, ditherMode); #else mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18009,8 +20215,26 @@ void mal_pcm_s32_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s32_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_u8__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode); } @@ -18020,13 +20244,9 @@ void mal_pcm_s32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s32_to_u8__sse(dst, src, count, ditherMode); #else mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18066,8 +20286,26 @@ void mal_pcm_s32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s32_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s16__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode); } @@ -18077,13 +20315,9 @@ void mal_pcm_s32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s32_to_s16__sse(dst, src, count, ditherMode); #else mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18108,8 +20342,26 @@ void mal_pcm_s32_to_s24__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s32_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_s24__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode); } @@ -18119,13 +20371,9 @@ void mal_pcm_s32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s32_to_s24__sse(dst, src, count, ditherMode); #else mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18165,8 +20413,26 @@ void mal_pcm_s32_to_f32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_s32_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_s32_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_s32_to_f32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_s32_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_s32_to_f32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_s32_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode); } @@ -18176,13 +20442,9 @@ void mal_pcm_s32_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_s32_to_f32__sse(dst, src, count, ditherMode); #else mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18274,8 +20536,26 @@ void mal_pcm_f32_to_u8__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_f32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_f32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_f32_to_u8__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_f32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_u8__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_f32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode); } @@ -18285,13 +20565,9 @@ void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_ { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_f32_to_u8__sse(dst, src, count, ditherMode); #else mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18329,13 +20605,346 @@ void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count, void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { - mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode); + mal_int16* dst_s16 = (mal_int16*)dst; + const float* src_f32 = (const float*)src; + + float ditherMin = 0; + float ditherMax = 0; + if (ditherMode != mal_dither_mode_none) { + ditherMin = 1.0f / -32768; + ditherMax = 1.0f / 32767; + } + + mal_uint64 i = 0; + + // Unrolled. + mal_uint64 count4 = count >> 2; + for (mal_uint64 i4 = 0; i4 < count4; i4 += 1) { + float d0 = mal_dither_f32(ditherMode, ditherMin, ditherMax); + float d1 = mal_dither_f32(ditherMode, ditherMin, ditherMax); + float d2 = mal_dither_f32(ditherMode, ditherMin, ditherMax); + float d3 = mal_dither_f32(ditherMode, ditherMin, ditherMax); + + float x0 = src_f32[i+0]; + float x1 = src_f32[i+1]; + float x2 = src_f32[i+2]; + float x3 = src_f32[i+3]; + + x0 = x0 + d0; + x1 = x1 + d1; + x2 = x2 + d2; + x3 = x3 + d3; + + x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0)); + x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1)); + x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2)); + x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3)); + + x0 = x0 * 32767.0f; + x1 = x1 * 32767.0f; + x2 = x2 * 32767.0f; + x3 = x3 * 32767.0f; + + dst_s16[i+0] = (mal_int16)x0; + dst_s16[i+1] = (mal_int16)x1; + dst_s16[i+2] = (mal_int16)x2; + dst_s16[i+3] = (mal_int16)x3; + + i += 4; + } + + // Leftover. + for (; i < count; i += 1) { + float x = src_f32[i]; + x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax); + x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip + x = x * 32767.0f; // -1..1 to -32767..32767 + + dst_s16[i] = (mal_int16)x; + } } -#ifdef MAL_USE_SSE -void mal_pcm_f32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { - mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode); + // Both the input and output buffers need to be aligned to 16 bytes. + if ((((mal_uintptr)dst & 15) != 0) || (((mal_uintptr)src & 15) != 0)) { + mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode); + return; + } + + mal_int16* dst_s16 = (mal_int16*)dst; + const float* src_f32 = (const float*)src; + + float ditherMin = 0; + float ditherMax = 0; + if (ditherMode != mal_dither_mode_none) { + ditherMin = 1.0f / -32768; + ditherMax = 1.0f / 32767; + } + + mal_uint64 i = 0; + + // SSE2. SSE allows us to output 8 s16's at a time which means our loop is unrolled 8 times. + mal_uint64 count8 = count >> 3; + for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) { + __m128 d0; + __m128 d1; + if (ditherMode == mal_dither_mode_none) { + d0 = _mm_set1_ps(0); + d1 = _mm_set1_ps(0); + } else if (ditherMode == mal_dither_mode_rectangle) { + d0 = _mm_set_ps( + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax) + ); + d1 = _mm_set_ps( + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax) + ); + } else { + d0 = _mm_set_ps( + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax) + ); + d1 = _mm_set_ps( + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax) + ); + } + + __m128 x0 = *((__m128*)(src_f32 + i) + 0); + __m128 x1 = *((__m128*)(src_f32 + i) + 1); + + x0 = _mm_add_ps(x0, d0); + x1 = _mm_add_ps(x1, d1); + + x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f)); + x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f)); + + _mm_stream_si128(((__m128i*)(dst_s16 + i)), _mm_packs_epi32(_mm_cvttps_epi32(x0), _mm_cvttps_epi32(x1))); + + i += 8; + } + + + // Leftover. + for (; i < count; i += 1) { + float x = src_f32[i]; + x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax); + x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip + x = x * 32767.0f; // -1..1 to -32767..32767 + + dst_s16[i] = (mal_int16)x; + } +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_f32_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + // Both the input and output buffers need to be aligned to 32 bytes. + if ((((mal_uintptr)dst & 31) != 0) || (((mal_uintptr)src & 31) != 0)) { + mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode); + return; + } + + mal_int16* dst_s16 = (mal_int16*)dst; + const float* src_f32 = (const float*)src; + + float ditherMin = 0; + float ditherMax = 0; + if (ditherMode != mal_dither_mode_none) { + ditherMin = 1.0f / -32768; + ditherMax = 1.0f / 32767; + } + + mal_uint64 i = 0; + + // AVX2. AVX2 allows us to output 16 s16's at a time which means our loop is unrolled 16 times. + mal_uint64 count16 = count >> 4; + for (mal_uint64 i16 = 0; i16 < count16; i16 += 1) { + __m256 d0; + __m256 d1; + if (ditherMode == mal_dither_mode_none) { + d0 = _mm256_set1_ps(0); + d1 = _mm256_set1_ps(0); + } else if (ditherMode == mal_dither_mode_rectangle) { + d0 = _mm256_set_ps( + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax) + ); + d1 = _mm256_set_ps( + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax), + mal_dither_f32_rectangle(ditherMin, ditherMax) + ); + } else { + d0 = _mm256_set_ps( + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax) + ); + d1 = _mm256_set_ps( + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax), + mal_dither_f32_triangle(ditherMin, ditherMax) + ); + } + + __m256 x0 = *((__m256*)(src_f32 + i) + 0); + __m256 x1 = *((__m256*)(src_f32 + i) + 1); + + x0 = _mm256_add_ps(x0, d0); + x1 = _mm256_add_ps(x1, d1); + + x0 = _mm256_mul_ps(x0, _mm256_set1_ps(32767.0f)); + x1 = _mm256_mul_ps(x1, _mm256_set1_ps(32767.0f)); + + // Computing the final result is a little more complicated for AVX2 than SSE2. + __m256i i0 = _mm256_cvttps_epi32(x0); + __m256i i1 = _mm256_cvttps_epi32(x1); + __m256i p0 = _mm256_permute2x128_si256(i0, i1, 0 | 32); + __m256i p1 = _mm256_permute2x128_si256(i0, i1, 1 | 48); + __m256i r = _mm256_packs_epi32(p0, p1); + + _mm256_stream_si256(((__m256i*)(dst_s16 + i)), r); + + i += 16; + } + + + // Leftover. + for (; i < count; i += 1) { + float x = src_f32[i]; + x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax); + x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip + x = x * 32767.0f; // -1..1 to -32767..32767 + + dst_s16[i] = (mal_int16)x; + } +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_f32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + // TODO: Convert this from AVX to AVX-512. + mal_pcm_f32_to_s16__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_f32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + // Both the input and output buffers need to be aligned to 16 bytes. + if ((((mal_uintptr)dst & 15) != 0) || (((mal_uintptr)src & 15) != 0)) { + mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode); + return; + } + + mal_int16* dst_s16 = (mal_int16*)dst; + const float* src_f32 = (const float*)src; + + float ditherMin = 0; + float ditherMax = 0; + if (ditherMode != mal_dither_mode_none) { + ditherMin = 1.0f / -32768; + ditherMax = 1.0f / 32767; + } + + mal_uint64 i = 0; + + // NEON. NEON allows us to output 8 s16's at a time which means our loop is unrolled 8 times. + mal_uint64 count8 = count >> 3; + for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) { + float32x4_t d0; + float32x4_t d1; + if (ditherMode == mal_dither_mode_none) { + d0 = vmovq_n_f32(0); + d1 = vmovq_n_f32(0); + } else if (ditherMode == mal_dither_mode_rectangle) { + float d0v[4]; + d0v[0] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d0v[1] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d0v[2] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d0v[3] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d0 = vld1q_f32(d0v); + + float d1v[4]; + d1v[0] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d1v[1] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d1v[2] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d1v[3] = mal_dither_f32_rectangle(ditherMin, ditherMax); + d1 = vld1q_f32(d1v); + } else { + float d0v[4]; + d0v[0] = mal_dither_f32_triangle(ditherMin, ditherMax); + d0v[1] = mal_dither_f32_triangle(ditherMin, ditherMax); + d0v[2] = mal_dither_f32_triangle(ditherMin, ditherMax); + d0v[3] = mal_dither_f32_triangle(ditherMin, ditherMax); + d0 = vld1q_f32(d0v); + + float d1v[4]; + d1v[0] = mal_dither_f32_triangle(ditherMin, ditherMax); + d1v[1] = mal_dither_f32_triangle(ditherMin, ditherMax); + d1v[2] = mal_dither_f32_triangle(ditherMin, ditherMax); + d1v[3] = mal_dither_f32_triangle(ditherMin, ditherMax); + d1 = vld1q_f32(d1v); + } + + float32x4_t x0 = *((float32x4_t*)(src_f32 + i) + 0); + float32x4_t x1 = *((float32x4_t*)(src_f32 + i) + 1); + + x0 = vaddq_f32(x0, d0); + x1 = vaddq_f32(x1, d1); + + x0 = vmulq_n_f32(x0, 32767.0f); + x1 = vmulq_n_f32(x1, 32767.0f); + + int32x4_t i0 = vcvtq_s32_f32(x0); + int32x4_t i1 = vcvtq_s32_f32(x1); + *((int16x8_t*)(dst_s16 + i)) = vcombine_s16(vqmovn_s32(i0), vqmovn_s32(i1)); + + i += 8; + } + + + // Leftover. + for (; i < count; i += 1) { + float x = src_f32[i]; + x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax); + x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip + x = x * 32767.0f; // -1..1 to -32767..32767 + + dst_s16[i] = (mal_int16)x; + } } #endif @@ -18343,13 +20952,9 @@ void mal_pcm_f32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_f32_to_s16__sse(dst, src, count, ditherMode); #else mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18387,8 +20992,26 @@ void mal_pcm_f32_to_s24__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_f32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_f32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_f32_to_s24__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_f32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s24__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_f32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode); } @@ -18398,13 +21021,9 @@ void mal_pcm_f32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_f32_to_s24__sse(dst, src, count, ditherMode); #else mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18439,8 +21058,26 @@ void mal_pcm_f32_to_s32__optimized(void* dst, const void* src, mal_uint64 count, mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode); } -#ifdef MAL_USE_SSE -void mal_pcm_f32_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +#if defined(MAL_SUPPORT_SSE2) +void mal_pcm_f32_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX2) +void mal_pcm_f32_to_s32__avx2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_AVX512) +void mal_pcm_f32_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) +{ + mal_pcm_f32_to_s32__avx2(dst, src, count, ditherMode); +} +#endif +#if defined(MAL_SUPPORT_NEON) +void mal_pcm_f32_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode) { mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode); } @@ -18450,13 +21087,9 @@ void mal_pcm_f32_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither { #ifdef MAL_USE_REFERENCE_CONVERSION_APIS mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode); -#else -#ifdef MAL_USE_SSE - mal_pcm_f32_to_s32__sse(dst, src, count, ditherMode); #else mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode); #endif -#endif } @@ -18526,6 +21159,433 @@ void mal_pcm_deinterleave_f32(void** dst, const void* src, mal_uint64 frameCount } +void mal_format_converter_init_callbacks__default(mal_format_converter* pConverter) +{ + mal_assert(pConverter != NULL); + + switch (pConverter->config.formatIn) + { + case mal_format_u8: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_u8_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_u8_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_u8_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_u8_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_u8_to_f32; + } + } break; + + case mal_format_s16: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s16_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s16_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s16_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s16_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s16_to_f32; + } + } break; + + case mal_format_s24: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s24_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s24_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s24_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s24_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s24_to_f32; + } + } break; + + case mal_format_s32: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s32_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s32_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s32_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s32_to_f32; + } + } break; + + case mal_format_f32: + default: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_f32_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_f32_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_f32_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_f32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_f32_to_f32; + } + } break; + } +} + +#if defined(MAL_SUPPORT_SSE2) +void mal_format_converter_init_callbacks__sse2(mal_format_converter* pConverter) +{ + mal_assert(pConverter != NULL); + + switch (pConverter->config.formatIn) + { + case mal_format_u8: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_u8_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_u8_to_s16__sse2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_u8_to_s24__sse2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_u8_to_s32__sse2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_u8_to_f32__sse2; + } + } break; + + case mal_format_s16: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s16_to_u8__sse2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s16_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s16_to_s24__sse2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s16_to_s32__sse2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s16_to_f32__sse2; + } + } break; + + case mal_format_s24: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s24_to_u8__sse2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s24_to_s16__sse2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s24_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s24_to_s32__sse2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s24_to_f32__sse2; + } + } break; + + case mal_format_s32: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s32_to_u8__sse2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s32_to_s16__sse2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s32_to_s24__sse2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s32_to_f32__sse2; + } + } break; + + case mal_format_f32: + default: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_f32_to_u8__sse2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_f32_to_s16__sse2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_f32_to_s24__sse2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_f32_to_s32__sse2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_f32_to_f32; + } + } break; + } +} +#endif + +#if defined(MAL_SUPPORT_AVX2) +void mal_format_converter_init_callbacks__avx2(mal_format_converter* pConverter) +{ + mal_assert(pConverter != NULL); + + switch (pConverter->config.formatIn) + { + case mal_format_u8: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_u8_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_u8_to_s16__avx2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_u8_to_s24__avx2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_u8_to_s32__avx2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_u8_to_f32__avx2; + } + } break; + + case mal_format_s16: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s16_to_u8__avx2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s16_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s16_to_s24__avx2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s16_to_s32__avx2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s16_to_f32__avx2; + } + } break; + + case mal_format_s24: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s24_to_u8__avx2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s24_to_s16__avx2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s24_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s24_to_s32__avx2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s24_to_f32__avx2; + } + } break; + + case mal_format_s32: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s32_to_u8__avx2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s32_to_s16__avx2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s32_to_s24__avx2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s32_to_f32__avx2; + } + } break; + + case mal_format_f32: + default: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_f32_to_u8__avx2; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_f32_to_s16__avx2; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_f32_to_s24__avx2; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_f32_to_s32__avx2; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_f32_to_f32; + } + } break; + } +} +#endif + +#if defined(MAL_SUPPORT_AVX512) +void mal_format_converter_init_callbacks__avx512(mal_format_converter* pConverter) +{ + mal_assert(pConverter != NULL); + + switch (pConverter->config.formatIn) + { + case mal_format_u8: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_u8_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_u8_to_s16__avx512; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_u8_to_s24__avx512; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_u8_to_s32__avx512; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_u8_to_f32__avx512; + } + } break; + + case mal_format_s16: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s16_to_u8__avx512; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s16_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s16_to_s24__avx512; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s16_to_s32__avx512; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s16_to_f32__avx512; + } + } break; + + case mal_format_s24: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s24_to_u8__avx512; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s24_to_s16__avx512; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s24_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s24_to_s32__avx512; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s24_to_f32__avx512; + } + } break; + + case mal_format_s32: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s32_to_u8__avx512; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s32_to_s16__avx512; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s32_to_s24__avx512; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s32_to_f32__avx512; + } + } break; + + case mal_format_f32: + default: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_f32_to_u8__avx512; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_f32_to_s16__avx512; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_f32_to_s24__avx512; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_f32_to_s32__avx512; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_f32_to_f32; + } + } break; + } +} +#endif + +#if defined(MAL_SUPPORT_NEON) +void mal_format_converter_init_callbacks__neon(mal_format_converter* pConverter) +{ + mal_assert(pConverter != NULL); + + switch (pConverter->config.formatIn) + { + case mal_format_u8: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_u8_to_u8; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_u8_to_s16__neon; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_u8_to_s24__neon; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_u8_to_s32__neon; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_u8_to_f32__neon; + } + } break; + + case mal_format_s16: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s16_to_u8__neon; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s16_to_s16; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s16_to_s24__neon; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s16_to_s32__neon; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s16_to_f32__neon; + } + } break; + + case mal_format_s24: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s24_to_u8__neon; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s24_to_s16__neon; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s24_to_s24; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s24_to_s32__neon; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s24_to_f32__neon; + } + } break; + + case mal_format_s32: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_s32_to_u8__neon; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_s32_to_s16__neon; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_s32_to_s24__neon; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_s32_to_s32; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_s32_to_f32__neon; + } + } break; + + case mal_format_f32: + default: + { + if (pConverter->config.formatOut == mal_format_u8) { + pConverter->onConvertPCM = mal_pcm_f32_to_u8__neon; + } else if (pConverter->config.formatOut == mal_format_s16) { + pConverter->onConvertPCM = mal_pcm_f32_to_s16__neon; + } else if (pConverter->config.formatOut == mal_format_s24) { + pConverter->onConvertPCM = mal_pcm_f32_to_s24__neon; + } else if (pConverter->config.formatOut == mal_format_s32) { + pConverter->onConvertPCM = mal_pcm_f32_to_s32__neon; + } else if (pConverter->config.formatOut == mal_format_f32) { + pConverter->onConvertPCM = mal_pcm_f32_to_f32; + } + } break; + } +} +#endif mal_result mal_format_converter_init(const mal_format_converter_config* pConfig, mal_format_converter* pConverter) { @@ -18540,86 +21600,36 @@ mal_result mal_format_converter_init(const mal_format_converter_config* pConfig, pConverter->config = *pConfig; - switch (pConfig->formatIn) + // SIMD + pConverter->useSSE2 = mal_has_sse2() && !pConfig->noSSE2; + pConverter->useAVX2 = mal_has_avx2() && !pConfig->noAVX2; + pConverter->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512; + pConverter->useNEON = mal_has_neon() && !pConfig->noNEON; + +#if defined(MAL_SUPPORT_AVX512) + if (pConverter->useAVX512) { + mal_format_converter_init_callbacks__avx512(pConverter); + } else +#endif +#if defined(MAL_SUPPORT_AVX2) + if (pConverter->useAVX2) { + mal_format_converter_init_callbacks__avx2(pConverter); + } else +#endif +#if defined(MAL_SUPPORT_SSE2) + if (pConverter->useSSE2) { + mal_format_converter_init_callbacks__sse2(pConverter); + } else +#endif +#if defined(MAL_SUPPORT_NEON) + if (pConverter->useNEON) { + mal_format_converter_init_callbacks__neon(pConverter); + } else +#endif { - case mal_format_u8: - { - if (pConfig->formatOut == mal_format_u8) { - pConverter->onConvertPCM = mal_pcm_u8_to_u8; - } else if (pConfig->formatOut == mal_format_s16) { - pConverter->onConvertPCM = mal_pcm_u8_to_s16; - } else if (pConfig->formatOut == mal_format_s24) { - pConverter->onConvertPCM = mal_pcm_u8_to_s24; - } else if (pConfig->formatOut == mal_format_s32) { - pConverter->onConvertPCM = mal_pcm_u8_to_s32; - } else if (pConfig->formatOut == mal_format_f32) { - pConverter->onConvertPCM = mal_pcm_u8_to_f32; - } - } break; - - case mal_format_s16: - { - if (pConfig->formatOut == mal_format_u8) { - pConverter->onConvertPCM = mal_pcm_s16_to_u8; - } else if (pConfig->formatOut == mal_format_s16) { - pConverter->onConvertPCM = mal_pcm_s16_to_s16; - } else if (pConfig->formatOut == mal_format_s24) { - pConverter->onConvertPCM = mal_pcm_s16_to_s24; - } else if (pConfig->formatOut == mal_format_s32) { - pConverter->onConvertPCM = mal_pcm_s16_to_s32; - } else if (pConfig->formatOut == mal_format_f32) { - pConverter->onConvertPCM = mal_pcm_s16_to_f32; - } - } break; - - case mal_format_s24: - { - if (pConfig->formatOut == mal_format_u8) { - pConverter->onConvertPCM = mal_pcm_s24_to_u8; - } else if (pConfig->formatOut == mal_format_s16) { - pConverter->onConvertPCM = mal_pcm_s24_to_s16; - } else if (pConfig->formatOut == mal_format_s24) { - pConverter->onConvertPCM = mal_pcm_s24_to_s24; - } else if (pConfig->formatOut == mal_format_s32) { - pConverter->onConvertPCM = mal_pcm_s24_to_s32; - } else if (pConfig->formatOut == mal_format_f32) { - pConverter->onConvertPCM = mal_pcm_s24_to_f32; - } - } break; - - case mal_format_s32: - { - if (pConfig->formatOut == mal_format_u8) { - pConverter->onConvertPCM = mal_pcm_s32_to_u8; - } else if (pConfig->formatOut == mal_format_s16) { - pConverter->onConvertPCM = mal_pcm_s32_to_s16; - } else if (pConfig->formatOut == mal_format_s24) { - pConverter->onConvertPCM = mal_pcm_s32_to_s24; - } else if (pConfig->formatOut == mal_format_s32) { - pConverter->onConvertPCM = mal_pcm_s32_to_s32; - } else if (pConfig->formatOut == mal_format_f32) { - pConverter->onConvertPCM = mal_pcm_s32_to_f32; - } - } break; - - case mal_format_f32: - default: - { - if (pConfig->formatOut == mal_format_u8) { - pConverter->onConvertPCM = mal_pcm_f32_to_u8; - } else if (pConfig->formatOut == mal_format_s16) { - pConverter->onConvertPCM = mal_pcm_f32_to_s16; - } else if (pConfig->formatOut == mal_format_s24) { - pConverter->onConvertPCM = mal_pcm_f32_to_s24; - } else if (pConfig->formatOut == mal_format_s32) { - pConverter->onConvertPCM = mal_pcm_f32_to_s32; - } else if (pConfig->formatOut == mal_format_f32) { - pConverter->onConvertPCM = mal_pcm_f32_to_f32; - } - } break; + mal_format_converter_init_callbacks__default(pConverter); } - switch (pConfig->formatOut) { case mal_format_u8: @@ -19185,7 +22195,7 @@ mal_result mal_channel_router_init(const mal_channel_router_config* pConfig, mal // SIMD pRouter->useSSE2 = mal_has_sse2() && !pConfig->noSSE2; - pRouter->useAVX = mal_has_avx() && !pConfig->noAVX; + pRouter->useAVX2 = mal_has_avx2() && !pConfig->noAVX2; pRouter->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512; pRouter->useNEON = mal_has_neon() && !pConfig->noNEON; @@ -19369,9 +22379,9 @@ static MAL_INLINE mal_bool32 mal_channel_router__can_use_sse2(mal_channel_router return pRouter->useSSE2 && (((mal_uintptr)pSamplesOut & 15) == 0) && (((mal_uintptr)pSamplesIn & 15) == 0); } -static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn) +static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx2(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn) { - return pRouter->useAVX && (((mal_uintptr)pSamplesOut & 31) == 0) && (((mal_uintptr)pSamplesIn & 31) == 0); + return pRouter->useAVX2 && (((mal_uintptr)pSamplesOut & 31) == 0) && (((mal_uintptr)pSamplesIn & 31) == 0); } static MAL_INLINE mal_bool32 mal_channel_router__can_use_avx512(mal_channel_router* pRouter, const float* pSamplesOut, const float* pSamplesIn) @@ -19438,8 +22448,8 @@ void mal_channel_router__do_routing(mal_channel_router* pRouter, mal_uint64 fram } else #endif -#if defined(MAL_SUPPORT_AVX) - if (mal_channel_router__can_use_avx(pRouter, ppSamplesOut[iChannelOut], ppSamplesIn[iChannelIn])) { +#if defined(MAL_SUPPORT_AVX2) + if (mal_channel_router__can_use_avx2(pRouter, ppSamplesOut[iChannelOut], ppSamplesIn[iChannelIn])) { __m256 weight = _mm256_set1_ps(pRouter->weights[iChannelIn][iChannelOut]); mal_uint64 frameCount8 = frameCount/8; @@ -19644,7 +22654,7 @@ void mal_src__build_sinc_table__sinc(mal_src* pSRC) mal_assert(pSRC != NULL); pSRC->sinc.table[0] = 1.0f; - for (int i = 1; i < mal_countof(pSRC->sinc.table); i += 1) { + for (mal_uint32 i = 1; i < mal_countof(pSRC->sinc.table); i += 1) { double x = i*MAL_PI_D / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION; pSRC->sinc.table[i] = (float)(sin(x)/x); } @@ -19660,7 +22670,7 @@ void mal_src__build_sinc_table__hann(mal_src* pSRC) { mal_src__build_sinc_table__sinc(pSRC); - for (int i = 0; i < mal_countof(pSRC->sinc.table); i += 1) { + for (mal_uint32 i = 0; i < mal_countof(pSRC->sinc.table); i += 1) { double x = pSRC->sinc.table[i]; double N = MAL_SRC_SINC_MAX_WINDOW_WIDTH*2; double n = ((double)(i) / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION) + MAL_SRC_SINC_MAX_WINDOW_WIDTH; @@ -19687,6 +22697,12 @@ mal_result mal_src_init(const mal_src_config* pConfig, mal_src* pSRC) pSRC->config = *pConfig; + // SIMD + pSRC->useSSE2 = mal_has_sse2() && !pConfig->noSSE2; + pSRC->useAVX2 = mal_has_avx2() && !pConfig->noAVX2; + pSRC->useAVX512 = mal_has_avx512f() && !pConfig->noAVX512; + pSRC->useNEON = mal_has_neon() && !pConfig->noNEON; + if (pSRC->config.algorithm == mal_src_algorithm_sinc) { // Make sure the window width within bounds. if (pSRC->config.sinc.windowWidth == 0) { @@ -19858,7 +22874,7 @@ mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCou // At this point we have a bunch of frames that the client has given to us for processing. From this we can determine the maximum number of output frames // that can be processed from this input. We want to output as many samples as possible from our input data. - float tAvailable = framesReadFromClient - tBeg; + float tAvailable = framesReadFromClient - tBeg - 1; // Subtract 1 because the last input sample is needed for interpolation and cannot be included in the output sample count calculation. mal_uint32 maxOutputFramesToRead = (mal_uint32)(tAvailable / factor); if (maxOutputFramesToRead == 0) { @@ -19919,6 +22935,9 @@ mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCou float iNextSample = iPrevSample + 1; float alpha = t - iPrevSample; + mal_assert(iPrevSample < mal_countof(pSRC->linear.input[iChannel])); + mal_assert(iNextSample < mal_countof(pSRC->linear.input[iChannel])); + float prevSample = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample]; float nextSample = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample]; @@ -20010,6 +23029,9 @@ mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRat // /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Comment this to disable interpolation of table lookups. Less accurate, but faster. +#define MAL_USE_SINC_TABLE_INTERPOLATION + // Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples. static MAL_INLINE float mal_src_sinc__get_input_sample_from_window(const mal_src* pSRC, mal_uint32 channel, mal_uint32 windowPosInSamples, mal_int32 sampleIndex) { @@ -20030,14 +23052,14 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC, mal_assert(pSRC != NULL); float xabs = (float)fabs(x); - if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) { - return 0; - } + //if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) { + // xabs = 1; // <-- A non-zero integer will always return 0. + //} xabs = xabs * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION; mal_int32 ixabs = (mal_int32)xabs; -#if 1 +#if defined(MAL_USE_SINC_TABLE_INTERPOLATION) float a = xabs - ixabs; return mal_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a); #else @@ -20045,6 +23067,146 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC, #endif } +#if defined(MAL_SUPPORT_SSE2) +static MAL_INLINE __m128 mal_fabsf_sse2(__m128 x) +{ + return _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)), x); +} + +static MAL_INLINE __m128 mal_truncf_sse2(__m128 x) +{ + return _mm_cvtepi32_ps(_mm_cvttps_epi32(x)); +} + +static MAL_INLINE __m128 mal_src_sinc__interpolation_factor__sse2(const mal_src* pSRC, __m128 x) +{ + //__m128 windowWidth128 = _mm_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH); + __m128 resolution128 = _mm_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION); + //__m128 one = _mm_set1_ps(1); + + __m128 xabs = mal_fabsf_sse2(x); + + // if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs; + //__m128 xcmp = _mm_cmp_ps(windowWidth128, xabs, 2); // 2 = Less than or equal = _mm_cmple_ps. + //xabs = _mm_or_ps(_mm_and_ps(one, xcmp), _mm_andnot_ps(xcmp, xabs)); // xabs = (xcmp) ? 1 : xabs; + + xabs = _mm_mul_ps(xabs, resolution128); + __m128i ixabs = _mm_cvttps_epi32(xabs); + + int* ixabsv = (int*)&ixabs; + + __m128 lo = _mm_set_ps( + pSRC->sinc.table[ixabsv[3]], + pSRC->sinc.table[ixabsv[2]], + pSRC->sinc.table[ixabsv[1]], + pSRC->sinc.table[ixabsv[0]] + ); + + __m128 hi = _mm_set_ps( + pSRC->sinc.table[ixabsv[3]+1], + pSRC->sinc.table[ixabsv[2]+1], + pSRC->sinc.table[ixabsv[1]+1], + pSRC->sinc.table[ixabsv[0]+1] + ); + + __m128 a = _mm_sub_ps(xabs, _mm_cvtepi32_ps(ixabs)); + __m128 r = mal_mix_f32_fast__sse2(lo, hi, a); + + return r; +} +#endif + +#if defined(MAL_SUPPORT_AVX2) +static MAL_INLINE __m256 mal_fabsf_avx2(__m256 x) +{ + return _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)), x); +} + +#if 0 +static MAL_INLINE __m256 mal_src_sinc__interpolation_factor__avx2(const mal_src* pSRC, __m256 x) +{ + //__m256 windowWidth256 = _mm256_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH); + __m256 resolution256 = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION); + //__m256 one = _mm256_set1_ps(1); + + __m256 xabs = mal_fabsf_avx2(x); + + // if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs; + //__m256 xcmp = _mm256_cmp_ps(windowWidth256, xabs, 2); // 2 = Less than or equal = _mm_cmple_ps. + //xabs = _mm256_or_ps(_mm256_and_ps(one, xcmp), _mm256_andnot_ps(xcmp, xabs)); // xabs = (xcmp) ? 1 : xabs; + + xabs = _mm256_mul_ps(xabs, resolution256); + + __m256i ixabs = _mm256_cvttps_epi32(xabs); + __m256 a = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs)); + + + int* ixabsv = (int*)&ixabs; + + __m256 lo = _mm256_set_ps( + pSRC->sinc.table[ixabsv[7]], + pSRC->sinc.table[ixabsv[6]], + pSRC->sinc.table[ixabsv[5]], + pSRC->sinc.table[ixabsv[4]], + pSRC->sinc.table[ixabsv[3]], + pSRC->sinc.table[ixabsv[2]], + pSRC->sinc.table[ixabsv[1]], + pSRC->sinc.table[ixabsv[0]] + ); + + __m256 hi = _mm256_set_ps( + pSRC->sinc.table[ixabsv[7]+1], + pSRC->sinc.table[ixabsv[6]+1], + pSRC->sinc.table[ixabsv[5]+1], + pSRC->sinc.table[ixabsv[4]+1], + pSRC->sinc.table[ixabsv[3]+1], + pSRC->sinc.table[ixabsv[2]+1], + pSRC->sinc.table[ixabsv[1]+1], + pSRC->sinc.table[ixabsv[0]+1] + ); + + __m256 r = mal_mix_f32_fast__avx2(lo, hi, a); + + return r; +} +#endif + +#endif + +#if defined(MAL_SUPPORT_NEON) +static MAL_INLINE float32x4_t mal_fabsf_neon(float32x4_t x) +{ + return vabdq_f32(vmovq_n_f32(0), x); +} + +static MAL_INLINE float32x4_t mal_src_sinc__interpolation_factor__neon(const mal_src* pSRC, float32x4_t x) +{ + float32x4_t xabs = mal_fabsf_neon(x); + xabs = vmulq_n_f32(xabs, MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION); + + int32x4_t ixabs = vcvtq_s32_f32(xabs); + + int* ixabsv = (int*)&ixabs; + + float lo[4]; + lo[0] = pSRC->sinc.table[ixabsv[0]]; + lo[1] = pSRC->sinc.table[ixabsv[1]]; + lo[2] = pSRC->sinc.table[ixabsv[2]]; + lo[3] = pSRC->sinc.table[ixabsv[3]]; + + float hi[4]; + hi[0] = pSRC->sinc.table[ixabsv[0]+1]; + hi[1] = pSRC->sinc.table[ixabsv[1]+1]; + hi[2] = pSRC->sinc.table[ixabsv[2]+1]; + hi[3] = pSRC->sinc.table[ixabsv[3]+1]; + + float32x4_t a = vsubq_f32(xabs, vcvtq_f32_s32(ixabs)); + float32x4_t r = mal_mix_f32_fast__neon(vld1q_f32(lo), vld1q_f32(hi), a); + + return r; +} +#endif + mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, void* pUserData) { mal_assert(pSRC != NULL); @@ -20057,9 +23219,48 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount mal_int32 windowWidth = (mal_int32)pSRC->config.sinc.windowWidth; mal_int32 windowWidth2 = windowWidth*2; + // There are cases where it's actually more efficient to increase the window width so that it's aligned with the respective + // SIMD pipeline being used. + mal_int32 windowWidthSIMD = windowWidth; +#if defined(MAL_SUPPORT_NEON) + if (pSRC->useNEON) { + windowWidthSIMD = (windowWidthSIMD + 1) & ~(1); + } +#endif +#if defined(MAL_SUPPORT_AVX512) + if (pSRC->useAVX512) { + windowWidthSIMD = (windowWidthSIMD + 7) & ~(7); + } + else +#endif +#if defined(MAL_SUPPORT_AVX2) + if (pSRC->useAVX2) { + windowWidthSIMD = (windowWidthSIMD + 3) & ~(3); + } + else +#endif +#if defined(MAL_SUPPORT_SSE2) + if (pSRC->useSSE2) { + windowWidthSIMD = (windowWidthSIMD + 1) & ~(1); + } +#endif + mal_int32 windowWidthSIMD2 = windowWidthSIMD*2; + (void)windowWidthSIMD2; // <-- Silence a warning when SIMD is disabled. + float* ppNextSamplesOut[MAL_MAX_CHANNELS]; mal_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels); + float _windowSamplesUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT]; + float* windowSamples = (float*)(((mal_uintptr)_windowSamplesUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1)); + mal_zero_memory(windowSamples, MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float)); + + float _iWindowFUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT]; + float* iWindowF = (float*)(((mal_uintptr)_iWindowFUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1)); + mal_zero_memory(iWindowF, MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float)); + for (mal_int32 i = 0; i < windowWidth2; ++i) { + iWindowF[i] = (float)(i - windowWidth); + } + mal_uint64 totalOutputFramesRead = 0; while (totalOutputFramesRead < frameCount) { // The maximum number of frames we can read this iteration depends on how many input samples we have available to us. This is the number @@ -20087,17 +23288,138 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount // Do SRC. float timeIn = timeInBeg; for (mal_uint32 iSample = 0; iSample < outputFramesToRead; iSample += 1) { - mal_int32 iTimeIn = (mal_int32)timeIn; + float sampleOut = 0; + float iTimeInF = mal_floorf(timeIn); + mal_uint32 iTimeIn = (mal_uint32)iTimeInF; - float sampleOut = 0; - for (mal_int32 iWindow = -windowWidth+1; iWindow < windowWidth; iWindow += 1) { - float t = (timeIn - iTimeIn); - float w = (float)(iWindow); + mal_int32 iWindow = 0; + + // Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier. + windowSamples[0] = 0; // <-- The first sample is always zero. + for (mal_int32 i = 1; i < windowWidth2; ++i) { + windowSamples[i] = pSRC->sinc.input[iChannel][iTimeIn + i]; + } + +#if defined(MAL_SUPPORT_AVX2) || defined(MAL_SUPPORT_AVX512) + if (pSRC->useAVX2 || pSRC->useAVX512) { + __m256i ixabs[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8]; + __m256 a[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8]; + __m256 resolution256 = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION); + + __m256 t = _mm256_set1_ps((timeIn - iTimeInF)); + __m256 r = _mm256_set1_ps(0); + + mal_int32 windowWidth8 = windowWidthSIMD2 >> 3; + for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) { + __m256 w = *((__m256*)iWindowF + iWindow8); + + __m256 xabs = _mm256_sub_ps(t, w); + xabs = mal_fabsf_avx2(xabs); + xabs = _mm256_mul_ps(xabs, resolution256); + + ixabs[iWindow8] = _mm256_cvttps_epi32(xabs); + a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8])); + } + + for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) { + int* ixabsv = (int*)&ixabs[iWindow8]; + + __m256 lo = _mm256_set_ps( + pSRC->sinc.table[ixabsv[7]], + pSRC->sinc.table[ixabsv[6]], + pSRC->sinc.table[ixabsv[5]], + pSRC->sinc.table[ixabsv[4]], + pSRC->sinc.table[ixabsv[3]], + pSRC->sinc.table[ixabsv[2]], + pSRC->sinc.table[ixabsv[1]], + pSRC->sinc.table[ixabsv[0]] + ); + + __m256 hi = _mm256_set_ps( + pSRC->sinc.table[ixabsv[7]+1], + pSRC->sinc.table[ixabsv[6]+1], + pSRC->sinc.table[ixabsv[5]+1], + pSRC->sinc.table[ixabsv[4]+1], + pSRC->sinc.table[ixabsv[3]+1], + pSRC->sinc.table[ixabsv[2]+1], + pSRC->sinc.table[ixabsv[1]+1], + pSRC->sinc.table[ixabsv[0]+1] + ); + + __m256 s = *((__m256*)windowSamples + iWindow8); + r = _mm256_add_ps(r, _mm256_mul_ps(s, mal_mix_f32_fast__avx2(lo, hi, a[iWindow8]))); + } + + // Horizontal add. + __m256 x = _mm256_hadd_ps(r, _mm256_permute2f128_ps(r, r, 1)); + x = _mm256_hadd_ps(x, x); + x = _mm256_hadd_ps(x, x); + sampleOut += _mm_cvtss_f32(_mm256_castps256_ps128(x)); + + iWindow += windowWidth8 * 8; + } + else +#endif +#if defined(MAL_SUPPORT_SSE2) + if (pSRC->useSSE2) { + __m128 t = _mm_set1_ps((timeIn - iTimeInF)); + __m128 r = _mm_set1_ps(0); + + mal_int32 windowWidth4 = windowWidthSIMD2 >> 2; + for (mal_int32 iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) { + __m128* s = (__m128*)windowSamples + iWindow4; + __m128* w = (__m128*)iWindowF + iWindow4; + + __m128 a = mal_src_sinc__interpolation_factor__sse2(pSRC, _mm_sub_ps(t, *w)); + r = _mm_add_ps(r, _mm_mul_ps(*s, a)); + } + + sampleOut += ((float*)(&r))[0]; + sampleOut += ((float*)(&r))[1]; + sampleOut += ((float*)(&r))[2]; + sampleOut += ((float*)(&r))[3]; + + iWindow += windowWidth4 * 4; + } + else +#endif +#if defined(MAL_SUPPORT_NEON) + if (pSRC->useNEON) { + float32x4_t t = vmovq_n_f32((timeIn - iTimeInF)); + float32x4_t r = vmovq_n_f32(0); + + mal_int32 windowWidth4 = windowWidthSIMD2 >> 2; + for (mal_int32 iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) { + float32x4_t* s = (float32x4_t*)windowSamples + iWindow4; + float32x4_t* w = (float32x4_t*)iWindowF + iWindow4; + + float32x4_t a = mal_src_sinc__interpolation_factor__neon(pSRC, vsubq_f32(t, *w)); + r = vaddq_f32(r, vmulq_f32(*s, a)); + } + + sampleOut += ((float*)(&r))[0]; + sampleOut += ((float*)(&r))[1]; + sampleOut += ((float*)(&r))[2]; + sampleOut += ((float*)(&r))[3]; + + iWindow += windowWidth4 * 4; + } + else +#endif + { + iWindow += 1; // The first one is a dummy for SIMD alignment purposes. Skip it. + } + + // Non-SIMD/Reference implementation. + float t = (timeIn - iTimeIn); + for (; iWindow < windowWidth2; iWindow += 1) { + float s = windowSamples[iWindow]; + float w = iWindowF[iWindow]; float a = mal_src_sinc__interpolation_factor(pSRC, (t - w)); - float s = mal_src_sinc__get_input_sample_from_window(pSRC, iChannel, iTimeIn, iWindow); + float r = s * a; - sampleOut += s * a; + sampleOut += r; } ppNextSamplesOut[iChannel][iSample] = (float)sampleOut; @@ -20359,7 +23681,8 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP) pDSP->pUserData = pConfig->pUserData; pDSP->isDynamicSampleRateAllowed = pConfig->allowDynamicSampleRate; - // This is generally the pipeline used for data conversion. Note that this can actually change which is explained later. + + // In general, this is the pipeline used for data conversion. Note that this can actually change which is explained later. // // Pre Format Conversion -> Sample Rate Conversion -> Channel Routing -> Post Format Conversion // @@ -20455,6 +23778,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP) pDSP ); preFormatConverterConfig.ditherMode = pConfig->ditherMode; + preFormatConverterConfig.noSSE2 = pConfig->noSSE2; + preFormatConverterConfig.noAVX2 = pConfig->noAVX2; + preFormatConverterConfig.noAVX512 = pConfig->noAVX512; + preFormatConverterConfig.noNEON = pConfig->noNEON; result = mal_format_converter_init(&preFormatConverterConfig, &pDSP->formatConverterIn); if (result != MAL_SUCCESS) { @@ -20466,10 +23793,14 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP) // or from an earlier stage in the pipeline. { mal_format_converter_config postFormatConverterConfig = mal_format_converter_config_init_new(); - postFormatConverterConfig.formatIn = pConfig->formatIn; - postFormatConverterConfig.formatOut = pConfig->formatOut; - postFormatConverterConfig.channels = pConfig->channelsOut; + postFormatConverterConfig.formatIn = pConfig->formatIn; + postFormatConverterConfig.formatOut = pConfig->formatOut; + postFormatConverterConfig.channels = pConfig->channelsOut; postFormatConverterConfig.ditherMode = pConfig->ditherMode; + postFormatConverterConfig.noSSE2 = pConfig->noSSE2; + postFormatConverterConfig.noAVX2 = pConfig->noAVX2; + postFormatConverterConfig.noAVX512 = pConfig->noAVX512; + postFormatConverterConfig.noNEON = pConfig->noNEON; if (pDSP->isPreFormatConversionRequired) { postFormatConverterConfig.onReadDeinterleaved = mal_dsp__post_format_converter_on_read_deinterleaved; postFormatConverterConfig.formatIn = mal_format_f32; @@ -20493,6 +23824,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP) pDSP ); srcConfig.algorithm = pConfig->srcAlgorithm; + srcConfig.noSSE2 = pConfig->noSSE2; + srcConfig.noAVX2 = pConfig->noAVX2; + srcConfig.noAVX512 = pConfig->noAVX512; + srcConfig.noNEON = pConfig->noNEON; mal_copy_memory(&srcConfig.sinc, &pConfig->sinc, sizeof(pConfig->sinc)); result = mal_src_init(&srcConfig, &pDSP->src); @@ -20511,6 +23846,10 @@ mal_result mal_dsp_init(const mal_dsp_config* pConfig, mal_dsp* pDSP) pConfig->channelMixMode, mal_dsp__channel_router_on_read_deinterleaved, pDSP); + routerConfig.noSSE2 = pConfig->noSSE2; + routerConfig.noAVX2 = pConfig->noAVX2; + routerConfig.noAVX512 = pConfig->noAVX512; + routerConfig.noNEON = pConfig->noNEON; result = mal_channel_router_init(&routerConfig, &pDSP->channelRouter); if (result != MAL_SUCCESS) { @@ -20620,6 +23959,7 @@ typedef struct mal_uint32 channelsIn; mal_uint64 totalFrameCount; mal_uint64 iNextFrame; + mal_bool32 isFeedingZeros; // When set to true, feeds the DSP zero samples. } mal_convert_frames__data; mal_uint32 mal_convert_frames__on_read(mal_dsp* pDSP, mal_uint32 frameCount, void* pFramesOut, void* pUserData) @@ -20636,8 +23976,13 @@ mal_uint32 mal_convert_frames__on_read(mal_dsp* pDSP, mal_uint32 frameCount, voi framesToRead = (mal_uint32)framesRemaining; } - mal_uint32 frameSizeInBytes = mal_get_bytes_per_sample(pData->formatIn) * pData->channelsIn; - mal_copy_memory(pFramesOut, (const mal_uint8*)pData->pDataIn + (frameSizeInBytes * pData->iNextFrame), frameSizeInBytes * framesToRead); + mal_uint32 frameSizeInBytes = mal_get_bytes_per_frame(pData->formatIn, pData->channelsIn); + + if (!pData->isFeedingZeros) { + mal_copy_memory(pFramesOut, (const mal_uint8*)pData->pDataIn + (frameSizeInBytes * pData->iNextFrame), frameSizeInBytes * framesToRead); + } else { + mal_zero_memory(pFramesOut, frameSizeInBytes * framesToRead); + } pData->iNextFrame += framesToRead; return framesToRead; @@ -20708,6 +24053,7 @@ mal_uint64 mal_convert_frames_ex(void* pOut, mal_format formatOut, mal_uint32 ch data.channelsIn = channelsIn; data.totalFrameCount = frameCountIn; data.iNextFrame = 0; + data.isFeedingZeros = MAL_FALSE; mal_dsp_config config; mal_zero_object(&config); @@ -20738,7 +24084,38 @@ mal_uint64 mal_convert_frames_ex(void* pOut, mal_format formatOut, mal_uint32 ch return 0; } - return mal_dsp_read(&dsp, frameCountOut, pOut, dsp.pUserData); + // Always output our computed frame count. There is a chance the sample rate conversion routine may not output the last sample + // due to precision issues with 32-bit floats, in which case we should feed the DSP zero samples so it can generate that last + // frame. + mal_uint64 totalFramesRead = mal_dsp_read(&dsp, frameCountOut, pOut, dsp.pUserData); + if (totalFramesRead < frameCountOut) { + mal_uint32 bpf = mal_get_bytes_per_frame(formatIn, channelsIn); + + data.isFeedingZeros = MAL_TRUE; + data.totalFrameCount = 0xFFFFFFFFFFFFFFFF; + data.pDataIn = NULL; + + while (totalFramesRead < frameCountOut) { + mal_uint64 framesToRead = (frameCountOut - totalFramesRead); + mal_assert(framesToRead > 0); + + mal_uint64 framesJustRead = mal_dsp_read(&dsp, framesToRead, mal_offset_ptr(pOut, totalFramesRead * bpf), dsp.pUserData); + totalFramesRead += framesJustRead; + + if (framesJustRead < framesToRead) { + break; + } + } + + // At this point we should have output every sample, but just to be super duper sure, just fill the rest with zeros. + if (totalFramesRead < frameCountOut) { + mal_zero_memory_64(mal_offset_ptr(pOut, totalFramesRead * bpf), ((frameCountOut - totalFramesRead) * bpf)); + totalFramesRead = frameCountOut; + } + } + + mal_assert(totalFramesRead == frameCountOut); + return totalFramesRead; } @@ -20802,7 +24179,7 @@ const char* mal_get_backend_name(mal_backend backend) case mal_backend_alsa: return "ALSA"; case mal_backend_pulseaudio: return "PulseAudio"; case mal_backend_jack: return "JACK"; - //case mal_backend_coreaudio: return "Core Audio"; + case mal_backend_coreaudio: return "Core Audio"; case mal_backend_oss: return "OSS"; case mal_backend_opensl: return "OpenSL|ES"; case mal_backend_openal: return "OpenAL"; @@ -20861,7 +24238,7 @@ float mal_calculate_cpu_speed_factor() // Our profiling test is based on how quick it can process 1 second worth of samples through mini_al's data conversion pipeline. // This factor is multiplied with the profiling time. May need to fiddle with this to get an accurate value. - float f = 1000; + double f = 1000; // Experiment: Reduce the factor a little when debug mode is used to reduce a blowout. #if !defined(NDEBUG) || defined(_DEBUG) @@ -20874,27 +24251,45 @@ float mal_calculate_cpu_speed_factor() mal_uint32 channelsOut = 6; // Using the heap here to avoid an unnecessary static memory allocation. Also too big for the stack. - mal_uint8* pInputFrames = (mal_uint8*)mal_aligned_malloc(sampleRateIn * channelsIn * sizeof(*pInputFrames), MAL_SIMD_ALIGNMENT); - if (pInputFrames == NULL) { + mal_uint8* pInputFrames = NULL; + float* pOutputFrames = NULL; + + size_t inputDataSize = sampleRateIn * channelsIn * sizeof(*pInputFrames); + size_t outputDataSize = sampleRateOut * channelsOut * sizeof(*pOutputFrames); + + void* pData = mal_malloc(inputDataSize + outputDataSize); + if (pData == NULL) { return 1; } - float* pOutputFrames = (float*)mal_aligned_malloc(sampleRateOut * channelsOut * sizeof(*pOutputFrames), MAL_SIMD_ALIGNMENT); - if (pOutputFrames == NULL) { - mal_aligned_free(pInputFrames); - return 1; - } + pInputFrames = (mal_uint8*)pData; + pOutputFrames = (float*)(pInputFrames + inputDataSize); + + + mal_calculate_cpu_speed_factor_data data; data.pInputFrames = pInputFrames; data.framesRemaining = sampleRateIn; mal_dsp_config config = mal_dsp_config_init(mal_format_u8, channelsIn, sampleRateIn, mal_format_f32, channelsOut, sampleRateOut, mal_calculate_cpu_speed_factor__on_read, &data); + + // Use linear sample rate conversion because it's the simplest and least likely to cause skewing as a result of tweaks to default + // configurations in the future. + config.srcAlgorithm = mal_src_algorithm_linear; + + // Experiment: Disable SIMD extensions when profiling just to try and keep things a bit more consistent. The idea is to get a general + // indication on the speed of the system, but SIMD is used more heavily in the DSP pipeline than in the general case which may make + // the results a little less realistic. + config.noSSE2 = MAL_TRUE; + config.noAVX2 = MAL_TRUE; + config.noAVX512 = MAL_TRUE; + config.noNEON = MAL_TRUE; + mal_dsp dsp; mal_result result = mal_dsp_init(&config, &dsp); if (result != MAL_SUCCESS) { - mal_aligned_free(pInputFrames); - mal_aligned_free(pOutputFrames); + mal_free(pData); return 1; } @@ -20914,11 +24309,11 @@ float mal_calculate_cpu_speed_factor() double executionTimeInSeconds = mal_timer_get_time_in_seconds(&timer) - startTime; executionTimeInSeconds /= iterationCount; - - mal_aligned_free(pInputFrames); - mal_aligned_free(pOutputFrames); - return (float)(executionTimeInSeconds * f); + mal_free(pData); + + // Guard against extreme blowouts. + return (float)mal_clamp(executionTimeInSeconds * f, 0.1, 100.0); } mal_uint32 mal_scale_buffer_size(mal_uint32 baseBufferSize, float scale) @@ -20928,11 +24323,20 @@ mal_uint32 mal_scale_buffer_size(mal_uint32 baseBufferSize, float scale) mal_uint32 mal_calculate_default_buffer_size_in_frames(mal_performance_profile performanceProfile, mal_uint32 sampleRate, float scale) { + mal_uint32 baseLatency; if (performanceProfile == mal_performance_profile_low_latency) { - return mal_scale_buffer_size((sampleRate/1000) * MAL_BASE_BUFFER_SIZE_IN_MILLISECONDS_LOW_LATENCY, scale); + baseLatency = MAL_BASE_BUFFER_SIZE_IN_MILLISECONDS_LOW_LATENCY; } else { - return mal_scale_buffer_size((sampleRate/1000) * MAL_BASE_BUFFER_SIZE_IN_MILLISECONDS_CONSERVATIVE, scale); + baseLatency = MAL_BASE_BUFFER_SIZE_IN_MILLISECONDS_CONSERVATIVE; } + + mal_uint32 sampleRateMS = (sampleRate/1000); + + mal_uint32 minBufferSize = sampleRateMS * mal_min(baseLatency / 5, 1); // <-- Guard against multiply by zero. + mal_uint32 maxBufferSize = sampleRateMS * (baseLatency * 40); + + mal_uint32 bufferSize = mal_scale_buffer_size((sampleRate/1000) * baseLatency, scale); + return mal_clamp(bufferSize, minBufferSize, maxBufferSize); } @@ -21663,7 +25067,7 @@ mal_uint32 mal_decoder_internal_on_read_frames__raw(mal_dsp* pDSP, mal_uint32 fr // For raw decoding we just read directly from the decoder's callbacks. mal_uint32 bpf = mal_get_bytes_per_frame(pDecoder->internalFormat, pDecoder->internalChannels); - return pDecoder->onRead(pDecoder, pSamplesOut, frameCount * bpf) / bpf; + return (mal_uint32)pDecoder->onRead(pDecoder, pSamplesOut, frameCount * bpf) / bpf; } mal_result mal_decoder_init_raw__internal(const mal_decoder_config* pConfigIn, const mal_decoder_config* pConfigOut, mal_decoder* pDecoder) @@ -21792,6 +25196,13 @@ mal_result mal_decoder_init__internal(mal_decoder_read_proc onRead, mal_decoder_ mal_assert(pConfig != NULL); mal_assert(pDecoder != NULL); + // Silence some warnings in the case that we don't have any decoder backends enabled. + (void)onRead; + (void)onSeek; + (void)pUserData; + (void)pConfig; + (void)pDecoder; + // We use trial and error to open a decoder. mal_result result = MAL_NO_BACKEND; @@ -22248,7 +25659,7 @@ mal_result mal_decoder__full_decode_and_uninit(mal_decoder* pDecoder, mal_decode newDataCapInFrames = 4096; } - if ((newDataCapInFrames * bpf) > SIZE_MAX) { + if ((newDataCapInFrames * bpf) > MAL_SIZE_MAX) { mal_free(pDataOut); return MAL_TOO_LARGE; } @@ -22380,7 +25791,7 @@ mal_result mal_sine_wave_init(double amplitude, double periodsPerSecond, mal_uin pSineWave->amplitude = amplitude; pSineWave->periodsPerSecond = periodsPerSecond; - pSineWave->delta = MAL_PI_D*2 / sampleRate; + pSineWave->delta = MAL_TAU_D / sampleRate; pSineWave->time = 0; return MAL_SUCCESS; @@ -22426,6 +25837,7 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSineWave, mal_uint64 count, float* // - API CHANGE: Change the default channel mapping to the standard Microsoft mapping. // - API CHANGE: Remove backend-specific result codes. // - API CHANGE: Changes to the format conversion APIs (mal_pcm_f32_to_s16(), etc.) +// - Add support for Core Audio (Apple). // - Add support for PulseAudio. // - This is the highest priority backend on Linux (higher priority than ALSA) since it is commonly // installed by default on many of the popular distros and offer's more seamless integration on @@ -22448,12 +25860,14 @@ mal_uint64 mal_sine_wave_read(mal_sine_wave* pSineWave, mal_uint64 count, float* // as the backend's internal device, and as such results in a pass-through data transmission pipeline. // - Add support for passing in NULL for the device config in mal_device_init(), which uses a default // config. This requires manually calling mal_device_set_send/recv_callback(). +// - Add support for decoding from raw PCM data (mal_decoder_init_raw(), etc.) // - Make mal_device_init_ex() more robust. // - Make some APIs more const-correct. +// - Fix errors with SDL detection on Apple platforms. // - Fix errors with OpenAL detection. // - Fix some memory leaks. // - Fix a bug with opening decoders from memory. -// - Add support for decoding from raw PCM data (mal_decoder_init_raw(), etc.) +// - Early work on SSE2, AVX2 and NEON optimizations. // - Miscellaneous bug fixes. // - Documentation updates. //