diff --git a/src/external/dr_flac.h b/src/external/dr_flac.h
index c836847e..13f42b2a 100644
--- a/src/external/dr_flac.h
+++ b/src/external/dr_flac.h
@@ -1,119 +1,118 @@
-// FLAC audio decoder. Public domain. See "unlicense" statement at the end of this file.
-// dr_flac - v0.9.7 - 2018-07-05
-//
-// David Reid - mackron@gmail.com
+/*
+FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_flac - v0.11.7 - 2019-05-06
 
-// USAGE
-//
-// dr_flac is a single-file library. To use it, do something like the following in one .c file.
-//     #define DR_FLAC_IMPLEMENTATION
-//     #include "dr_flac.h"
-//
-// You can then #include this file in other parts of the program as you would with any other header file. To decode audio data,
-// do something like the following:
-//
-//     drflac* pFlac = drflac_open_file("MySong.flac");
-//     if (pFlac == NULL) {
-//         // Failed to open FLAC file
-//     }
-//
-//     drflac_int32* pSamples = malloc(pFlac->totalSampleCount * sizeof(drflac_int32));
-//     drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSamples);
-//
-// The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of
-// channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are
-// always output as interleaved signed 32-bit PCM. In the example above a native FLAC stream was opened, however dr_flac has
-// seamless support for Ogg encapsulated FLAC streams as well.
-//
-// You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and
-// the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of
-// samples, just call it again. Example:
-//
-//     while (drflac_read_s32(pFlac, chunkSize, pChunkSamples) > 0) {
-//         do_something();
-//     }
-//
-// You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example,
-// if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel.
-// The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the
-// left channel, etc.
-//
-//
-// If you just want to quickly decode an entire FLAC file in one go you can do something like this:
-//
-//     unsigned int channels;
-//     unsigned int sampleRate;
-//     drflac_uint64 totalSampleCount;
-//     drflac_int32* pSampleData = drflac_open_and_decode_file_s32("MySong.flac", &channels, &sampleRate, &totalSampleCount);
-//     if (pSampleData == NULL) {
-//         // Failed to open and decode FLAC file.
-//     }
-//
-//     ...
-//
-//     drflac_free(pSampleData);
-//
-//
-// You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs
-// respectively, but note that these should be considered lossy.
-//
-//
-// If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or
-// drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the
-// normal versions and also just a little bit harder to use.
-//
-// dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before
-// drflac_open_with_metdata() returns.
-//
-//
-// The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain
-// scenarios such as broadcast style streams like internet radio where the header may not be present because the user has
-// started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed().
-//
-// It is not recommended to use these APIs for file based streams because a missing header would usually indicate a
-// corrupted or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend
-// a lot of time finding the first frame.
-//
-//
-//
-// OPTIONS
-// #define these options before including this file.
-//
-// #define DR_FLAC_NO_STDIO
-//   Disable drflac_open_file().
-//
-// #define DR_FLAC_NO_OGG
-//   Disables support for Ogg/FLAC streams.
-//
-// #define DR_FLAC_NO_WIN32_IO
-//   In the Win32 build, dr_flac uses the Win32 IO APIs for drflac_open_file() by default. This setting will make it use the
-//   standard FILE APIs instead. Ignored when DR_FLAC_NO_STDIO is #defined. (The rationale for this configuration is that
-//   there's a bug in one compiler's Win32 implementation of the FILE APIs which is not present in the Win32 IO APIs.)
-//
-// #define DR_FLAC_BUFFER_SIZE <number>
-//   Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls
-//   back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing
-//   returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of
-//   onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
-//
-// #define DR_FLAC_NO_CRC
-//   Disables CRC checks. This will offer a performance boost when CRC is unnecessary.
-//
-// #define DR_FLAC_NO_SIMD
-//   Disables SIMD optimizations (SSE on x86/x64 architectures). Use this if you are having compatibility issues with your
-//   compiler.
-//
-//
-//
-// QUICK NOTES
-// - dr_flac does not currently support changing the sample rate nor channel count mid stream.
-// - Audio data is output as signed 32-bit PCM, regardless of the bits per sample the FLAC stream is encoded as.
-// - This has not been tested on big-endian architectures.
-// - Rice codes in unencoded binary form (see https://xiph.org/flac/format.html#rice_partition) has not been tested. If anybody
-//   knows where I can find some test files for this, let me know.
-// - dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
-// - When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open()
-//   returning inconsistent samples.
+David Reid - mackron@gmail.com
+*/
+
+/*
+USAGE
+=====
+dr_flac is a single-file library. To use it, do something like the following in one .c file.
+    #define DR_FLAC_IMPLEMENTATION
+    #include "dr_flac.h"
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data,
+do something like the following:
+
+    drflac* pFlac = drflac_open_file("MySong.flac");
+    if (pFlac == NULL) {
+        // Failed to open FLAC file
+    }
+
+    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
+    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
+
+The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of
+channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are
+always output as interleaved signed 32-bit PCM. In the example above a native FLAC stream was opened, however dr_flac has
+seamless support for Ogg encapsulated FLAC streams as well.
+
+You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and
+the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of
+samples, just call it again. Example:
+
+    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
+        do_something();
+    }
+
+You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example,
+if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel.
+The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the
+left channel, etc.
+
+
+If you just want to quickly decode an entire FLAC file in one go you can do something like this:
+
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount);
+    if (pSampleData == NULL) {
+        // Failed to open and decode FLAC file.
+    }
+
+    ...
+
+    drflac_free(pSampleData);
+
+
+You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs
+respectively, but note that these should be considered lossy.
+
+
+If you need access to metadata (album art, etc.), use drflac_open_with_metadata(), drflac_open_file_with_metdata() or
+drflac_open_memory_with_metadata(). The rationale for keeping these APIs separate is that they're slightly slower than the
+normal versions and also just a little bit harder to use.
+
+dr_flac reports metadata to the application through the use of a callback, and every metadata block is reported before
+drflac_open_with_metdata() returns.
+
+
+The main opening APIs (drflac_open(), etc.) will fail if the header is not present. The presents a problem in certain
+scenarios such as broadcast style streams like internet radio where the header may not be present because the user has
+started playback mid-stream. To handle this, use the relaxed APIs: drflac_open_relaxed() and drflac_open_with_metadata_relaxed().
+
+It is not recommended to use these APIs for file based streams because a missing header would usually indicate a
+corrupted or perverse file. In addition, these APIs can take a long time to initialize because they may need to spend
+a lot of time finding the first frame.
+
+
+
+OPTIONS
+=======
+#define these options before including this file.
+
+#define DR_FLAC_NO_STDIO
+  Disable drflac_open_file() and family.
+
+#define DR_FLAC_NO_OGG
+  Disables support for Ogg/FLAC streams.
+
+#define DR_FLAC_BUFFER_SIZE <number>
+  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls
+  back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing
+  returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of
+  onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
+
+#define DR_FLAC_NO_CRC
+  Disables CRC checks. This will offer a performance boost when CRC is unnecessary.
+
+#define DR_FLAC_NO_SIMD
+  Disables SIMD optimizations (SSE on x86/x64 architectures). Use this if you are having compatibility issues with your
+  compiler.
+
+
+
+QUICK NOTES
+===========
+- dr_flac does not currently support changing the sample rate nor channel count mid stream.
+- Audio data is output as signed 32-bit PCM, regardless of the bits per sample the FLAC stream is encoded as.
+- This has not been tested on big-endian architectures.
+- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
+- When using Ogg encapsulation, a corrupted metadata block will result in drflac_open_with_metadata() and drflac_open()
+  returning inconsistent samples.
+*/
 
 #ifndef dr_flac_h
 #define dr_flac_h
@@ -145,9 +144,25 @@ typedef drflac_uint32    drflac_bool32;
 #define DRFLAC_TRUE      1
 #define DRFLAC_FALSE     0
 
-// As data is read from the client it is placed into an internal buffer for fast access. This controls the
-// size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing
-// returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
+    #define DRFLAC_DEPRECATED       __declspec(deprecated)
+#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
+    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
+#elif defined(__has_feature)                /* Clang */
+    #if defined(__has_feature(attribute_deprecated))
+        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
+    #else
+        #define DRFLAC_DEPRECATED
+    #endif
+#else
+    #define DRFLAC_DEPRECATED
+#endif
+
+/*
+As data is read from the client it is placed into an internal buffer for fast access. This controls the
+size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing
+returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+*/
 #ifndef DR_FLAC_BUFFER_SIZE
 #define DR_FLAC_BUFFER_SIZE   4096
 #endif
@@ -156,24 +171,18 @@ typedef drflac_uint32    drflac_bool32;
 extern "C" {
 #endif
 
-// Check if we can enable 64-bit optimizations.
-#if defined(_WIN64)
+/* Check if we can enable 64-bit optimizations. */
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
 #define DRFLAC_64BIT
 #endif
 
-#if defined(__GNUC__)
-#if defined(__x86_64__) || defined(__ppc64__)
-#define DRFLAC_64BIT
-#endif
-#endif
-
 #ifdef DRFLAC_64BIT
 typedef drflac_uint64 drflac_cache_t;
 #else
 typedef drflac_uint32 drflac_cache_t;
 #endif
 
-// The various metadata block types.
+/* The various metadata block types. */
 #define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
 #define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
 #define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
@@ -183,7 +192,7 @@ typedef drflac_uint32 drflac_cache_t;
 #define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
 #define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
 
-// The various picture types specified in the PICTURE block.
+/* The various picture types specified in the PICTURE block. */
 #define DRFLAC_PICTURE_TYPE_OTHER                   0
 #define DRFLAC_PICTURE_TYPE_FILE_ICON               1
 #define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
@@ -219,12 +228,12 @@ typedef enum
     drflac_seek_origin_current
 } drflac_seek_origin;
 
-// Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block.
+/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
 #pragma pack(2)
 typedef struct
 {
     drflac_uint64 firstSample;
-    drflac_uint64 frameOffset;   // The offset from the first byte of the header of the first frame.
+    drflac_uint64 frameOffset;   /* The offset from the first byte of the header of the first frame. */
     drflac_uint16 sampleCount;
 } drflac_seekpoint;
 #pragma pack()
@@ -244,15 +253,17 @@ typedef struct
 
 typedef struct
 {
-    // The metadata type. Use this to know how to interpret the data below.
+    /* The metadata type. Use this to know how to interpret the data below. */
     drflac_uint32 type;
 
-    // A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
-    // not modify the contents of this buffer. Use the structures below for more meaningful and structured
-    // information about the metadata. It's possible for this to be null.
+    /*
+    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
+    not modify the contents of this buffer. Use the structures below for more meaningful and structured
+    information about the metadata. It's possible for this to be null.
+    */
     const void* pRawData;
 
-    // The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL.
+    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
     drflac_uint32 rawDataSize;
 
     union
@@ -282,7 +293,7 @@ typedef struct
             drflac_uint32 vendorLength;
             const char* vendor;
             drflac_uint32 commentCount;
-            const char* comments;
+            const void* pComments;
         } vorbis_comment;
 
         struct
@@ -291,7 +302,7 @@ typedef struct
             drflac_uint64 leadInSampleCount;
             drflac_bool32 isCD;
             drflac_uint8 trackCount;
-            const drflac_uint8* pTrackData;
+            const void* pTrackData;
         } cuesheet;
 
         struct
@@ -312,40 +323,46 @@ typedef struct
 } drflac_metadata;
 
 
-// Callback for when data needs to be read from the client.
-//
-// pUserData   [in]  The user data that was passed to drflac_open() and family.
-// pBufferOut  [out] The output buffer.
-// bytesToRead [in]  The number of bytes to read.
-//
-// Returns the number of bytes actually read.
-//
-// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
-// either the entire bytesToRead is filled or you have reached the end of the stream.
+/*
+Callback for when data needs to be read from the client.
+
+pUserData   [in]  The user data that was passed to drflac_open() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
 typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
 
-// Callback for when data needs to be seeked.
-//
-// pUserData [in] The user data that was passed to drflac_open() and family.
-// offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-// origin    [in] The origin of the seek - the current position or the start of the stream.
-//
-// Returns whether or not the seek was successful.
-//
-// The offset will never be negative. Whether or not it is relative to the beginning or current position is determined
-// by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current.
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drflac_open() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+The offset will never be negative. Whether or not it is relative to the beginning or current position is determined
+by the "origin" parameter which will be either drflac_seek_origin_start or drflac_seek_origin_current.
+*/
 typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
 
-// Callback for when a metadata block is read.
-//
-// pUserData [in] The user data that was passed to drflac_open() and family.
-// pMetadata [in] A pointer to a structure containing the data of the metadata block.
-//
-// Use pMetadata->type to determine which metadata block is being handled and how to read the data.
+/*
+Callback for when a metadata block is read.
+
+pUserData [in] The user data that was passed to drflac_open() and family.
+pMetadata [in] A pointer to a structure containing the data of the metadata block.
+
+Use pMetadata->type to determine which metadata block is being handled and how to read the data.
+*/
 typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
 
 
-// Structure for internal use. Only used for decoders opened with drflac_open_memory.
+/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
 typedef struct
 {
     const drflac_uint8* data;
@@ -353,420 +370,588 @@ typedef struct
     size_t currentReadPos;
 } drflac__memory_stream;
 
-// Structure for internal use. Used for bit streaming.
+/* Structure for internal use. Used for bit streaming. */
 typedef struct
 {
-    // The function to call when more data needs to be read.
+    /* The function to call when more data needs to be read. */
     drflac_read_proc onRead;
 
-    // The function to call when the current read position needs to be moved.
+    /* The function to call when the current read position needs to be moved. */
     drflac_seek_proc onSeek;
 
-    // The user data to pass around to onRead and onSeek.
+    /* The user data to pass around to onRead and onSeek. */
     void* pUserData;
 
 
-    // The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
-    // stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
-    // or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    /*
+    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
+    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
+    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    */
     size_t unalignedByteCount;
 
-    // The content of the unaligned bytes.
+    /* The content of the unaligned bytes. */
     drflac_cache_t unalignedCache;
 
-    // The index of the next valid cache line in the "L2" cache.
+    /* The index of the next valid cache line in the "L2" cache. */
     drflac_uint32 nextL2Line;
 
-    // The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining.
+    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
     drflac_uint32 consumedBits;
 
-    // The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
-    // Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    /*
+    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
+    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    */
     drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
     drflac_cache_t cache;
 
-    // CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
-    // is reset to 0 at the beginning of each frame.
+    /*
+    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    is reset to 0 at the beginning of each frame.
+    */
     drflac_uint16 crc16;
-    drflac_cache_t crc16Cache;          // A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded.
-    drflac_uint32 crc16CacheIgnoredBytes;   // The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache.
+    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
+    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
 } drflac_bs;
 
 typedef struct
 {
-    // The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC.
+    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
     drflac_uint8 subframeType;
 
-    // The number of wasted bits per sample as specified by the sub-frame header.
+    /* The number of wasted bits per sample as specified by the sub-frame header. */
     drflac_uint8 wastedBitsPerSample;
 
-    // The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC.
+    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
     drflac_uint8 lpcOrder;
 
-    // The number of bits per sample for this subframe. This is not always equal to the current frame's bit per sample because
-    // an extra bit is required for side channels when interchannel decorrelation is being used.
+    /*
+    The number of bits per sample for this subframe. This is not always equal to the current frame's bit per sample because
+    an extra bit is required for side channels when interchannel decorrelation is being used.
+    */
     drflac_uint32 bitsPerSample;
 
-    // A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. Note that
-    // it's a signed 32-bit integer for each value.
+    /*
+    A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. Note that
+    it's a signed 32-bit integer for each value.
+    */
     drflac_int32* pDecodedSamples;
 } drflac_subframe;
 
 typedef struct
 {
-    // If the stream uses variable block sizes, this will be set to the index of the first sample. If fixed block sizes are used, this will
-    // always be set to 0.
+    /*
+    If the stream uses variable block sizes, this will be set to the index of the first sample. If fixed block sizes are used, this will
+    always be set to 0.
+    */
     drflac_uint64 sampleNumber;
 
-    // If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0.
+    /* If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. */
     drflac_uint32 frameNumber;
 
-    // The sample rate of this frame.
+    /* The sample rate of this frame. */
     drflac_uint32 sampleRate;
 
-    // The number of samples in each sub-frame within this frame.
+    /* The number of samples in each sub-frame within this frame. */
     drflac_uint16 blockSize;
 
-    // The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
-    // will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    /*
+    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
+    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    */
     drflac_uint8 channelAssignment;
 
-    // The number of bits per sample within this frame.
+    /* The number of bits per sample within this frame. */
     drflac_uint8 bitsPerSample;
 
-    // The frame's CRC.
+    /* The frame's CRC. */
     drflac_uint8 crc8;
 } drflac_frame_header;
 
 typedef struct
 {
-    // The header.
+    /* The header. */
     drflac_frame_header header;
 
-    // The number of samples left to be read in this frame. This is initially set to the block size multiplied by the channel count. As samples
-    // are read, this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    /*
+    The number of samples left to be read in this frame. This is initially set to the block size multiplied by the channel count. As samples
+    are read, this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    */
     drflac_uint32 samplesRemaining;
 
-    // The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels.
+    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
     drflac_subframe subframes[8];
 } drflac_frame;
 
 typedef struct
 {
-    // The function to call when a metadata block is read.
+    /* The function to call when a metadata block is read. */
     drflac_meta_proc onMeta;
 
-    // The user data posted to the metadata callback function.
+    /* The user data posted to the metadata callback function. */
     void* pUserDataMD;
 
 
-    // The sample rate. Will be set to something like 44100.
+    /* The sample rate. Will be set to something like 44100. */
     drflac_uint32 sampleRate;
 
-    // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
-    // value specified in the STREAMINFO block.
+    /*
+    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
+    value specified in the STREAMINFO block.
+    */
     drflac_uint8 channels;
 
-    // The bits per sample. Will be set to something like 16, 24, etc.
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
     drflac_uint8 bitsPerSample;
 
-    // The maximum block size, in samples. This number represents the number of samples in each channel (not combined).
+    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
     drflac_uint16 maxBlockSize;
 
-    // The total number of samples making up the stream. This includes every channel. For example, if the stream has 2 channels,
-    // with each channel having a total of 4096, this value will be set to 2*4096 = 8192. Can be 0 in which case it's still a
-    // valid stream, but just means the total sample count is unknown. Likely the case with streams like internet radio.
+    /*
+    The total number of samples making up the stream. This includes every channel. For example, if the stream has 2 channels,
+    with each channel having a total of 4096, this value will be set to 2*4096 = 8192. Can be 0 in which case it's still a
+    valid stream, but just means the total sample count is unknown. Likely the case with streams like internet radio.
+    */
     drflac_uint64 totalSampleCount;
+    drflac_uint64 totalPCMFrameCount;   /* <-- Equal to totalSampleCount / channels. */
 
 
-    // The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream.
+    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
     drflac_container container;
 
-    // The number of seekpoints in the seektable.
+    /* The number of seekpoints in the seektable. */
     drflac_uint32 seekpointCount;
 
 
-    // Information about the frame the decoder is currently sitting on.
+    /* Information about the frame the decoder is currently sitting on. */
     drflac_frame currentFrame;
 
-    // The index of the sample the decoder is currently sitting on. This is only used for seeking.
+    /* The index of the sample the decoder is currently sitting on. This is only used for seeking. */
     drflac_uint64 currentSample;
 
-    // The position of the first frame in the stream. This is only ever used for seeking.
+    /* The position of the first frame in the stream. This is only ever used for seeking. */
     drflac_uint64 firstFramePos;
 
 
-    // A hack to avoid a malloc() when opening a decoder with drflac_open_memory().
+    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
     drflac__memory_stream memoryStream;
 
 
-    // A pointer to the decoded sample data. This is an offset of pExtraData.
+    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
     drflac_int32* pDecodedSamples;
 
-    // A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table.
+    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
     drflac_seekpoint* pSeekpoints;
 
-    // Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData.
+    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
     void* _oggbs;
 
-    // The bit streamer. The raw FLAC data is fed through this object.
+    /* The bit streamer. The raw FLAC data is fed through this object. */
     drflac_bs bs;
 
-    // Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs.
+    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
     drflac_uint8 pExtraData[1];
 } drflac;
 
 
-// Opens a FLAC decoder.
-//
-// onRead    [in]           The function to call when data needs to be read from the client.
-// onSeek    [in]           The function to call when the read position of the client data needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-//
-// Returns a pointer to an object representing the decoder.
-//
-// Close the decoder with drflac_close().
-//
-// This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated
-// FLAC, both of which should work seamlessly without any manual intervention. Ogg encapsulation also works with
-// multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
-//
-// This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory()
-// to open the stream from a file or from a block of memory respectively.
-//
-// The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where
-// the header may not be present.
-//
-// See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close()
+/*
+Opens a FLAC decoder.
+
+onRead    [in]           The function to call when data needs to be read from the client.
+onSeek    [in]           The function to call when the read position of the client data needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+
+Returns a pointer to an object representing the decoder.
+
+Close the decoder with drflac_close().
+
+This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated
+FLAC, both of which should work seamlessly without any manual intervention. Ogg encapsulation also works with
+multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
+
+This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory()
+to open the stream from a file or from a block of memory respectively.
+
+The STREAMINFO block must be present for this to succeed. Use drflac_open_relaxed() to open a FLAC stream where
+the header may not be present.
+
+See also: drflac_open_file(), drflac_open_memory(), drflac_open_with_metadata(), drflac_close()
+*/
 drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData);
 
-// The same as drflac_open(), except attempts to open the stream even when a header block is not present.
-//
-// Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do
-// not set this to drflac_container_unknown - that is for internal use only.
-//
-// Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never
-// found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an
-// indicator that the end of the stream was found.
+/*
+The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+
+Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do
+not set this to drflac_container_unknown - that is for internal use only.
+
+Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never
+found it will continue forever. To abort, force your onRead callback to return 0, which dr_flac will use as an
+indicator that the end of the stream was found.
+*/
 drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData);
 
-// Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
-//
-// onRead    [in]           The function to call when data needs to be read from the client.
-// onSeek    [in]           The function to call when the read position of the client data needs to move.
-// onMeta    [in]           The function to call for every metadata block.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
-//
-// Returns a pointer to an object representing the decoder.
-//
-// Close the decoder with drflac_close().
-//
-// This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will do a DRFLAC_MALLOC()
-// and DRFLAC_FREE() for every metadata block except for STREAMINFO and PADDING blocks.
-//
-// The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function
-// returns.
-//
-// The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC
-// stream where the header may not be present.
-//
-// Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata
-// block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata()
-// is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply
-// seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on
-// whether or not the stream is being opened with metadata.
-//
-// See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close()
+/*
+Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
+
+onRead    [in]           The function to call when data needs to be read from the client.
+onSeek    [in]           The function to call when the read position of the client data needs to move.
+onMeta    [in]           The function to call for every metadata block.
+pUserData [in, optional] A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
+
+Returns a pointer to an object representing the decoder.
+
+Close the decoder with drflac_close().
+
+This is slower than drflac_open(), so avoid this one if you don't need metadata. Internally, this will do a DRFLAC_MALLOC()
+and DRFLAC_FREE() for every metadata block except for STREAMINFO and PADDING blocks.
+
+The caller is notified of the metadata via the onMeta callback. All metadata blocks will be handled before the function
+returns.
+
+The STREAMINFO block must be present for this to succeed. Use drflac_open_with_metadata_relaxed() to open a FLAC
+stream where the header may not be present.
+
+Note that this will behave inconsistently with drflac_open() if the stream is an Ogg encapsulated stream and a metadata
+block is corrupted. This is due to the way the Ogg stream recovers from corrupted pages. When drflac_open_with_metadata()
+is being used, the open routine will try to read the contents of the metadata block, whereas drflac_open() will simply
+seek past it (for the sake of efficiency). This inconsistency can result in different samples being returned depending on
+whether or not the stream is being opened with metadata.
+
+See also: drflac_open_file_with_metadata(), drflac_open_memory_with_metadata(), drflac_open(), drflac_close()
+*/
 drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData);
 
-// The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
-//
-// See also: drflac_open_with_metadata(), drflac_open_relaxed()
+/*
+The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
+
+See also: drflac_open_with_metadata(), drflac_open_relaxed()
+*/
 drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData);
 
-// Closes the given FLAC decoder.
-//
-// pFlac [in] The decoder to close.
-//
-// This will destroy the decoder object.
+/*
+Closes the given FLAC decoder.
+
+pFlac [in] The decoder to close.
+
+This will destroy the decoder object.
+*/
 void drflac_close(drflac* pFlac);
 
 
-// Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
-//
-// pFlac         [in]            The decoder.
-// samplesToRead [in]            The number of samples to read.
-// pBufferOut    [out, optional] A pointer to the buffer that will receive the decoded samples.
-//
-// Returns the number of samples actually read.
-//
-// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
-// seeked.
-drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut);
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
 
-// Same as drflac_read_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit.
-//
-// pFlac         [in]            The decoder.
-// samplesToRead [in]            The number of samples to read.
-// pBufferOut    [out, optional] A pointer to the buffer that will receive the decoded samples.
-//
-// Returns the number of samples actually read.
-//
-// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
-// seeked.
-//
-// Note that this is lossy for streams where the bits per sample is larger than 16.
-drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut);
+pFlac        [in]            The decoder.
+framesToRead [in]            The number of PCM frames to read.
+pBufferOut   [out, optional] A pointer to the buffer that will receive the decoded samples.
 
-// Same as drflac_read_s32(), except outputs samples as 32-bit floating-point PCM.
-//
-// pFlac         [in]            The decoder.
-// samplesToRead [in]            The number of samples to read.
-// pBufferOut    [out, optional] A pointer to the buffer that will receive the decoded samples.
-//
-// Returns the number of samples actually read.
-//
-// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples
-// seeked.
-//
-// Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly
-// represent every possible number.
-drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut);
+Returns the number of PCM frames actually read.
 
-// Seeks to the sample at the given index.
-//
-// pFlac       [in] The decoder.
-// sampleIndex [in] The index of the sample to seek to. See notes below.
-//
-// Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise.
-//
-// The sample index is based on interleaving. In a stereo stream, for example, the sample at index 0 is the first sample
-// in the left channel; the sample at index 1 is the first sample on the right channel, and so on.
-//
-// When seeking, you will likely want to ensure it's rounded to a multiple of the channel count. You can do this with
-// something like drflac_seek_to_sample(pFlac, (mySampleIndex + (mySampleIndex % pFlac->channels)))
-drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex);
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames
+seeked.
+*/
+drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
+
+/*
+Same as drflac_read_pcm_frames_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit.
+
+Note that this is lossy for streams where the bits per sample is larger than 16.
+*/
+drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
+
+/*
+Same as drflac_read_pcm_frames_s32(), except outputs samples as 32-bit floating-point PCM.
+
+Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly
+represent every possible number.
+*/
+drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut);
+
+/*
+Seeks to the PCM frame at the given index.
+
+pFlac         [in] The decoder.
+pcmFrameIndex [in] The index of the PCM frame to seek to. See notes below.
+
+Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise.
+*/
+drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
 
 
 
 #ifndef DR_FLAC_NO_STDIO
-// Opens a FLAC decoder from the file at the given path.
-//
-// filename [in] The path of the file to open, either absolute or relative to the current directory.
-//
-// Returns a pointer to an object representing the decoder.
-//
-// Close the decoder with drflac_close().
-//
-// This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the
-// number of files a process can have open at any given time, so keep this mind if you have many decoders open at the
-// same time.
-//
-// See also: drflac_open(), drflac_open_file_with_metadata(), drflac_close()
+/*
+Opens a FLAC decoder from the file at the given path.
+
+filename [in] The path of the file to open, either absolute or relative to the current directory.
+
+Returns a pointer to an object representing the decoder.
+
+Close the decoder with drflac_close().
+
+This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the
+number of files a process can have open at any given time, so keep this mind if you have many decoders open at the
+same time.
+
+See also: drflac_open(), drflac_open_file_with_metadata(), drflac_close()
+*/
 drflac* drflac_open_file(const char* filename);
 
-// Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
-//
-// Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+/*
+Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
+
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+*/
 drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData);
 #endif
 
-// Opens a FLAC decoder from a pre-allocated block of memory
-//
-// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-// the lifetime of the decoder.
+/*
+Opens a FLAC decoder from a pre-allocated block of memory
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the decoder.
+*/
 drflac* drflac_open_memory(const void* data, size_t dataSize);
 
-// Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
-//
-// Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+/*
+Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
+
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+*/
 drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData);
 
 
 
-//// High Level APIs ////
+/* High Level APIs */
 
-// Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
-// pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with DRFLAC_FREE().
-//
-// Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
-// read samples into a dynamically sized buffer on the heap until no samples are left.
-//
-// Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
-drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/*
+Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
+pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with DRFLAC_FREE().
 
-// Same as drflac_open_and_decode_s32(), except returns signed 16-bit integer samples.
-drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
+read samples into a dynamically sized buffer on the heap until no samples are left.
 
-// Same as drflac_open_and_decode_s32(), except returns 32-bit floating-point samples.
-float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
+*/
+drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
 #ifndef DR_FLAC_NO_STDIO
-// Same as drflac_open_and_decode_s32() except opens the decoder from a file.
-drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
+drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
-// Same as drflac_open_and_decode_file_s32(), except returns signed 16-bit integer samples.
-drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
-// Same as drflac_open_and_decode_file_f32(), except returns 32-bit floating-point samples.
-float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 #endif
 
-// Same as drflac_open_and_decode_s32() except opens the decoder from a block of memory.
-drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
+drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
-// Same as drflac_open_and_decode_memory_s32(), except returns signed 16-bit integer samples.
-drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
-// Same as drflac_open_and_decode_memory_s32(), except returns 32-bit floating-point samples.
-float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount);
 
-// Frees memory that was allocated internally by dr_flac.
+/* Frees memory that was allocated internally by dr_flac. */
 void drflac_free(void* p);
 
 
-// Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block.
+/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
 typedef struct
 {
     drflac_uint32 countRemaining;
     const char* pRunningData;
 } drflac_vorbis_comment_iterator;
 
-// Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
-// metadata block.
-void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const char* pComments);
+/*
+Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
+metadata block.
+*/
+void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
 
-// Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
-// returned string is NOT null terminated.
+/*
+Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
+returned string is NOT null terminated.
+*/
 const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
 
 
+/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_cuesheet_track_iterator;
+
+/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
+#pragma pack(4)
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 index;
+    drflac_uint8 reserved[3];
+} drflac_cuesheet_track_index;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 trackNumber;
+    char ISRC[12];
+    drflac_bool8 isAudio;
+    drflac_bool8 preEmphasis;
+    drflac_uint8 indexCount;
+    const drflac_cuesheet_track_index* pIndexPoints;
+} drflac_cuesheet_track;
+
+/*
+Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
+block.
+*/
+void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
+
+/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
+drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
+
+
+/* Deprecated APIs */
+DRFLAC_DEPRECATED drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut);    /* Use drflac_read_pcm_frames_s32() instead. */
+DRFLAC_DEPRECATED drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut);    /* Use drflac_read_pcm_frames_s16() instead. */
+DRFLAC_DEPRECATED drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut);           /* Use drflac_read_pcm_frames_f32() instead. */
+DRFLAC_DEPRECATED drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex);                          /* Use drflac_seek_to_pcm_frame() instead. */
+DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); /* Use drflac_open_and_read_pcm_frames_s32(). */
+DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); /* Use drflac_open_and_read_pcm_frames_s16(). */
+DRFLAC_DEPRECATED float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);        /* Use drflac_open_and_read_pcm_frames_f32(). */
+DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                                         /* Use drflac_open_file_and_read_pcm_frames_s32(). */
+DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                                         /* Use drflac_open_file_and_read_pcm_frames_s16(). */
+DRFLAC_DEPRECATED float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                                                /* Use drflac_open_file_and_read_pcm_frames_f32(). */
+DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                          /* Use drflac_open_memory_and_read_pcm_frames_s32(). */
+DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                          /* Use drflac_open_memory_and_read_pcm_frames_s16(). */
+DRFLAC_DEPRECATED float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount);                                 /* Use drflac_open_memory_and_read_pcm_frames_f32(). */
 
 #ifdef __cplusplus
 }
 #endif
-#endif  //dr_flac_h
+#endif  /* dr_flac_h */
 
 
-///////////////////////////////////////////////////////////////////////////////
-//
-// IMPLEMENTATION
-//
-///////////////////////////////////////////////////////////////////////////////
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
 #ifdef DR_FLAC_IMPLEMENTATION
+
+/* Disable some annoying warnings. */
+#if defined(__GNUC__)
+    #pragma GCC diagnostic push
+    #if __GNUC__ >= 7
+    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+    #endif
+#endif
+
+#ifdef __linux__
+    #ifndef _BSD_SOURCE
+        #define _BSD_SOURCE
+    #endif
+    #ifndef __USE_BSD
+        #define __USE_BSD
+    #endif
+    #include <endian.h>
+#endif
+
 #include <stdlib.h>
 #include <string.h>
 
-// CPU architecture.
-#if defined(__x86_64__) || defined(_M_X64)
-#define DRFLAC_X64
-#elif defined(__i386) || defined(_M_IX86)
-#define DRFLAC_X86
-#elif defined(__arm__) || defined(_M_ARM)
-#define DRFLAC_ARM
+#ifdef _MSC_VER
+#define DRFLAC_INLINE __forceinline
+#else
+#ifdef __GNUC__
+#define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+#else
+#define DRFLAC_INLINE
+#endif
 #endif
 
-// Compile-time CPU feature support.
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRFLAC_ARM
+#endif
+
+/* Intrinsics Support */
+#if !defined(DR_FLAC_NO_SIMD)
+    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
+        #if defined(_MSC_VER) && !defined(__clang__)
+            /* MSVC. */
+            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #else
+            /* Assume GNUC-style. */
+            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_SSE41)
+            #include <smmintrin.h>
+        #elif defined(DRFLAC_SUPPORT_SSE2)
+            #include <emmintrin.h>
+        #endif
+    #endif
+
+    #if defined(DRFLAC_ARM)
+        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            #define DRFLAC_SUPPORT_NEON
+        #endif
+
+        /* Fall back to looking for the #include file. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
+                #define DRFLAC_SUPPORT_NEON
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_NEON)
+            #include <arm_neon.h>
+        #endif
+    #endif
+#endif
+
+/* Compile-time CPU feature support. */
 #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
     #if defined(_MSC_VER) && !defined(__clang__)
         #if _MSC_VER >= 1400
@@ -776,52 +961,127 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, dr
                 __cpuid(info, fid);
             }
         #else
-        #define DRFLAC_NO_CPUID
+            #define DRFLAC_NO_CPUID
         #endif
     #else
         #if defined(__GNUC__) || defined(__clang__)
             static void drflac__cpuid(int info[4], int fid)
             {
-                __asm__ __volatile__ (
-                    "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
-                );
+                /*
+                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+                supporting different assembly dialects.
+                
+                What's basically happening is that we're saving and restoring the ebx register manually.
+                */
+                #if defined(DRFLAC_X86) && defined(__PIC__)
+                    __asm__ __volatile__ (
+                        "xchg{l} {%%}ebx, %k1;"
+                        "cpuid;"
+                        "xchg{l} {%%}ebx, %k1;"
+                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #else
+                    __asm__ __volatile__ (
+                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #endif
             }
         #else
-        #define DRFLAC_NO_CPUID
+            #define DRFLAC_NO_CPUID
         #endif
     #endif
 #else
-#define DRFLAC_NO_CPUID
+    #define DRFLAC_NO_CPUID
 #endif
 
-
-#ifdef __linux__
-#define _BSD_SOURCE
-#include <endian.h>
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse2()
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac_cpuid(info, 1);
+                return (info[3] & (1 << 26)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
 #endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse41()
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac_cpuid(info, 1);
+                return (info[2] & (1 << 19)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
 
 #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
-#define DRFLAC_HAS_LZCNT_INTRINSIC
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
-#define DRFLAC_HAS_LZCNT_INTRINSIC
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
 #elif defined(__clang__)
     #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
-    #define DRFLAC_HAS_LZCNT_INTRINSIC
+        #define DRFLAC_HAS_LZCNT_INTRINSIC
     #endif
 #endif
 
 #if defined(_MSC_VER) && _MSC_VER >= 1300
-#define DRFLAC_HAS_BYTESWAP_INTRINSIC
-#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-#define DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
 #elif defined(__clang__)
-    #if __has_builtin(__builtin_bswap16) && __has_builtin(__builtin_bswap32) && __has_builtin(__builtin_bswap64)
-    #define DRFLAC_HAS_BYTESWAP_INTRINSIC
+    #if __has_builtin(__builtin_bswap16)
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #endif
+    #if __has_builtin(__builtin_bswap32)
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #endif
+    #if __has_builtin(__builtin_bswap64)
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
     #endif
 #endif
 
 
-// Standard library stuff.
+/* Standard library stuff. */
 #ifndef DRFLAC_ASSERT
 #include <assert.h>
 #define DRFLAC_ASSERT(expression)           assert(expression)
@@ -842,21 +1102,11 @@ const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, dr
 #define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
 #endif
 
-#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  // 64 for AVX-512 in the future.
-
-#ifdef _MSC_VER
-#define DRFLAC_INLINE __forceinline
-#else
-#ifdef __GNUC__
-#define DRFLAC_INLINE inline __attribute__((always_inline))
-#else
-#define DRFLAC_INLINE inline
-#endif
-#endif
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
 
 typedef drflac_int32 drflac_result;
 #define DRFLAC_SUCCESS                                  0
-#define DRFLAC_ERROR                                    -1  // A generic error.
+#define DRFLAC_ERROR                                    -1  /* A generic error. */
 #define DRFLAC_INVALID_ARGS                             -2
 #define DRFLAC_END_OF_STREAM                            -128
 #define DRFLAC_CRC_MISMATCH                             -129
@@ -875,6 +1125,12 @@ typedef drflac_int32 drflac_result;
 #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
 
+/*
+Keeps track of the number of leading samples for each sub-frame. This is required because the SSE pipeline will occasionally
+reference excess prior samples.
+*/
+#define DRFLAC_LEADING_SAMPLES                          32
+
 
 #define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
 #define drflac_assert                                   DRFLAC_ASSERT
@@ -882,30 +1138,35 @@ typedef drflac_int32 drflac_result;
 #define drflac_zero_memory                              DRFLAC_ZERO_MEMORY
 
 
-// CPU caps.
+/* CPU caps. */
 static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
 #ifndef DRFLAC_NO_CPUID
-static drflac_bool32 drflac__gIsSSE42Supported = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
 static void drflac__init_cpu_caps()
 {
     int info[4] = {0};
 
-    // LZCNT
+    /* LZCNT */
     drflac__cpuid(info, 0x80000001);
-    drflac__gIsLZCNTSupported = (info[2] & (1 <<  5)) != 0;
+    drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
 
-    // SSE4.2
-    drflac__cpuid(info, 1);
-    drflac__gIsSSE42Supported = (info[2] & (1 << 19)) != 0;
+    /* SSE2 */
+    drflac__gIsSSE2Supported = drflac_has_sse2();
+
+    /* SSE4.1 */
+    drflac__gIsSSE41Supported = drflac_has_sse41();
 }
 #endif
 
 
-//// Endian Management ////
+/* Endian Management */
 static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian()
 {
 #if defined(DRFLAC_X86) || defined(DRFLAC_X64)
     return DRFLAC_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRFLAC_TRUE;
 #else
     int n = 1;
     return (*(char*)&n) == 1;
@@ -914,7 +1175,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian()
 
 static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
 {
-#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
     #if defined(_MSC_VER)
         return _byteswap_ushort(n);
     #elif defined(__GNUC__) || defined(__clang__)
@@ -930,7 +1191,7 @@ static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
 
 static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
 {
-#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
     #if defined(_MSC_VER)
         return _byteswap_ulong(n);
     #elif defined(__GNUC__) || defined(__clang__)
@@ -948,7 +1209,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
 
 static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
 {
-#ifdef DRFLAC_HAS_BYTESWAP_INTRINSIC
+#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
     #if defined(_MSC_VER)
         return _byteswap_uint64(n);
     #elif defined(__GNUC__) || defined(__clang__)
@@ -971,55 +1232,39 @@ static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
 
 static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
 {
-#ifdef __linux__
-    return be16toh(n);
-#else
     if (drflac__is_little_endian()) {
         return drflac__swap_endian_uint16(n);
     }
 
     return n;
-#endif
 }
 
 static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
 {
-#ifdef __linux__
-    return be32toh(n);
-#else
     if (drflac__is_little_endian()) {
         return drflac__swap_endian_uint32(n);
     }
 
     return n;
-#endif
 }
 
 static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
 {
-#ifdef __linux__
-    return be64toh(n);
-#else
     if (drflac__is_little_endian()) {
         return drflac__swap_endian_uint64(n);
     }
 
     return n;
-#endif
 }
 
 
 static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
 {
-#ifdef __linux__
-    return le32toh(n);
-#else
     if (!drflac__is_little_endian()) {
         return drflac__swap_endian_uint32(n);
     }
 
     return n;
-#endif
 }
 
 
@@ -1036,7 +1281,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
 
 
 
-// The CRC code below is based on this document: http://zlib.net/crc_v3.txt
+/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
 static drflac_uint8 drflac__crc8_table[] = {
     0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
     0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
@@ -1098,8 +1343,6 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint
 
 static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
 {
-    drflac_assert(count <= 32);
-
 #ifdef DR_FLAC_NO_CRC
     (void)crc;
     (void)data;
@@ -1107,7 +1350,7 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 da
     return 0;
 #else
 #if 0
-    // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);")
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
     drflac_uint8 p = 0x07;
     for (int i = count-1; i >= 0; --i) {
         drflac_uint8 bit = (data & (1 << i)) >> i;
@@ -1119,13 +1362,19 @@ static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 da
     }
     return crc;
 #else
-    drflac_uint32 wholeBytes = count >> 3;
-    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
 
     static drflac_uint64 leftoverDataMaskTable[8] = {
         0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
     };
-    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+    
+    drflac_assert(count <= 32);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
 
     switch (wholeBytes) {
         case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
@@ -1165,8 +1414,6 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_
 
 static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
 {
-    drflac_assert(count <= 64);
-
 #ifdef DR_FLAC_NO_CRC
     (void)crc;
     (void)data;
@@ -1174,7 +1421,7 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac
     return 0;
 #else
 #if 0
-    // REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);")
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
     drflac_uint16 p = 0x8005;
     for (int i = count-1; i >= 0; --i) {
         drflac_uint16 bit = (data & (1ULL << i)) >> i;
@@ -1187,13 +1434,19 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac
 
     return crc;
 #else
-    drflac_uint32 wholeBytes = count >> 3;
-    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
 
     static drflac_uint64 leftoverDataMaskTable[8] = {
         0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
     };
-    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    drflac_assert(count <= 64);
+    
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
 
     switch (wholeBytes) {
         default:
@@ -1210,32 +1463,36 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac
 
 static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
 {
-    drflac_assert(count <= 64);
-
 #ifdef DR_FLAC_NO_CRC
     (void)crc;
     (void)data;
     (void)count;
     return 0;
 #else
-    drflac_uint32 wholeBytes = count >> 3;
-    drflac_uint32 leftoverBits = count - (wholeBytes*8);
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
 
     static drflac_uint64 leftoverDataMaskTable[8] = {
         0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
     };
-    drflac_uint64 leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+    
+    drflac_assert(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
 
     switch (wholeBytes) {
         default:
-        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0xFF00000000000000 << leftoverBits)) >> (56 + leftoverBits)));
-        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00FF000000000000 << leftoverBits)) >> (48 + leftoverBits)));
-        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000FF0000000000 << leftoverBits)) >> (40 + leftoverBits)));
-        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000FF00000000 << leftoverBits)) >> (32 + leftoverBits)));
-        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000FF000000 << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x0000000000FF0000 << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x000000000000FF00 << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & ((drflac_uint64)0x00000000000000FF << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
         case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
     }
     return crc;
@@ -1259,27 +1516,26 @@ static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_
 #define drflac__be2host__cache_line drflac__be2host_32
 #endif
 
-// BIT READING ATTEMPT #2
-//
-// This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
-// on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
-// is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
-// array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
-// from onRead() is read into.
-#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                  (sizeof((bs)->cache))
-#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                   (sizeof((bs)->cache)*8)
-#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)              (DRFLAC_CACHE_L1_SIZE_BITS(bs) - ((bs)->consumedBits))
-#ifdef DRFLAC_64BIT
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((drflac_uint64)-1LL) >> (_bitCount)))
-#else
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)       (~(((drflac_uint32)-1) >> (_bitCount)))
-#endif
-#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
-#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)           (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
-#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), _bitCount) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), _bitCount))
-#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                  (sizeof((bs)->cacheL2))
-#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                  (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
-#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)             (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
+/*
+BIT READING ATTEMPT #2
+
+This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
+on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
+is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
+array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
+from onRead() is read into.
+*/
+#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
+#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
+#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
+#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
+#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
+#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
+#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
 
 
 #ifndef DR_FLAC_NO_CRC
@@ -1297,19 +1553,23 @@ static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
 
 static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
 {
-    // We should never be flushing in a situation where we are not aligned on a byte boundary.
+    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
     drflac_assert((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
 
-    // The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
-    // by the number of bits that have been consumed.
+    /*
+    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    by the number of bits that have been consumed.
+    */
     if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
         drflac__update_crc16(bs);
     } else {
-        // We only accumulate the consumed bits.
+        /* We only accumulate the consumed bits. */
         bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
 
-        // The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
-        // so we can handle that later.
+        /*
+        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        so we can handle that later.
+        */
         bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
     }
 
@@ -1319,19 +1579,24 @@ static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
 
 static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
 {
-    // Fast path. Try loading straight from L2.
+    size_t bytesRead;
+    size_t alignedL1LineCount;
+
+    /* Fast path. Try loading straight from L2. */
     if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
         bs->cache = bs->cacheL2[bs->nextL2Line++];
         return DRFLAC_TRUE;
     }
 
-    // If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
-    // any left.
+    /*
+    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
+    any left.
+    */
     if (bs->unalignedByteCount > 0) {
-        return DRFLAC_FALSE;   // If we have any unaligned bytes it means there's no more aligned bytes left in the client.
+        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
     }
 
-    size_t bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
+    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
 
     bs->nextL2Line = 0;
     if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
@@ -1340,13 +1605,15 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs
     }
 
 
-    // If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
-    // means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
-    // and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
-    // the size of the L1 so we'll need to seek backwards by any misaligned bytes.
-    size_t alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
+    /*
+    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
+    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
+    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
+    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
+    */
+    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
 
-    // We need to keep track of any unaligned bytes for later use.
+    /* We need to keep track of any unaligned bytes for later use. */
     bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
     if (bs->unalignedByteCount > 0) {
         bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
@@ -1354,7 +1621,8 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs
 
     if (alignedL1LineCount > 0) {
         size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
-        for (size_t i = alignedL1LineCount; i > 0; --i) {
+        size_t i;
+        for (i = alignedL1LineCount; i > 0; --i) {
             bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
         }
 
@@ -1362,7 +1630,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs
         bs->cache = bs->cacheL2[bs->nextL2Line++];
         return DRFLAC_TRUE;
     } else {
-        // If we get into this branch it means we weren't able to load any L1-aligned data.
+        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
         bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
         return DRFLAC_FALSE;
     }
@@ -1370,11 +1638,13 @@ static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs
 
 static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
 {
+    size_t bytesRead;
+
 #ifndef DR_FLAC_NO_CRC
     drflac__update_crc16(bs);
 #endif
 
-    // Fast path. Try just moving the next value in the L2 cache to the L1 cache.
+    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
     if (drflac__reload_l1_cache_from_l2(bs)) {
         bs->cache = drflac__be2host__cache_line(bs->cache);
         bs->consumedBits = 0;
@@ -1384,13 +1654,16 @@ static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
         return DRFLAC_TRUE;
     }
 
-    // Slow path.
+    /* Slow path. */
 
-    // If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
-    // few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
-    // data from the unaligned cache.
-    size_t bytesRead = bs->unalignedByteCount;
+    /*
+    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
+    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
+    data from the unaligned cache.
+    */
+    bytesRead = bs->unalignedByteCount;
     if (bytesRead == 0) {
+        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
         return DRFLAC_FALSE;
     }
 
@@ -1398,8 +1671,8 @@ static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
     bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
 
     bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
-    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs->consumedBits);    // <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property.
-    bs->unalignedByteCount = 0;     // <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes.
+    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
+    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
 
 #ifndef DR_FLAC_NO_CRC
     bs->crc16Cache = bs->cache >> bs->consumedBits;
@@ -1410,10 +1683,10 @@ static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
 
 static void drflac__reset_cache(drflac_bs* bs)
 {
-    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  // <-- This clears the L2 cache.
-    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   // <-- This clears the L1 cache.
+    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
+    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
     bs->cache = 0;
-    bs->unalignedByteCount = 0;                         // <-- This clears the trailing unaligned bytes.
+    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
     bs->unalignedCache = 0;
 
 #ifndef DR_FLAC_NO_CRC
@@ -1437,18 +1710,31 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
     }
 
     if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /*
+        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
+        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
+        more optimal solution for this.
+        */
+#ifdef DRFLAC_64BIT
+        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+        bs->consumedBits += bitCount;
+        bs->cache <<= bitCount;
+#else
         if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
             *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
             bs->consumedBits += bitCount;
             bs->cache <<= bitCount;
         } else {
+            /* Cannot shift by 32-bits, so need to do it differently. */
             *pResultOut = (drflac_uint32)bs->cache;
             bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
             bs->cache = 0;
         }
+#endif
+
         return DRFLAC_TRUE;
     } else {
-        // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them.
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
         drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
         drflac_uint32 bitCountLo = bitCount - bitCountHi;
         drflac_uint32 resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
@@ -1466,17 +1752,19 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
 
 static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
 {
+    drflac_uint32 result;
+    drflac_uint32 signbit;
+
     drflac_assert(bs != NULL);
     drflac_assert(pResult != NULL);
     drflac_assert(bitCount > 0);
     drflac_assert(bitCount <= 32);
 
-    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
 
-    drflac_uint32 signbit = ((result >> (bitCount-1)) & 0x01);
+    signbit = ((result >> (bitCount-1)) & 0x01);
     result |= (~signbit + 1) << bitCount;
 
     *pResult = (drflac_int32)result;
@@ -1486,15 +1774,16 @@ static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, dr
 #ifdef DRFLAC_64BIT
 static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
 {
+    drflac_uint32 resultHi;
+    drflac_uint32 resultLo;
+
     drflac_assert(bitCount <= 64);
     drflac_assert(bitCount >  32);
 
-    drflac_uint32 resultHi;
     if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
         return DRFLAC_FALSE;
     }
 
-    drflac_uint32 resultLo;
     if (!drflac__read_uint32(bs, 32, &resultLo)) {
         return DRFLAC_FALSE;
     }
@@ -1504,18 +1793,20 @@ static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, d
 }
 #endif
 
-// Function below is unused, but leaving it here in case I need to quickly add it again.
+/* Function below is unused, but leaving it here in case I need to quickly add it again. */
 #if 0
 static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
 {
+    drflac_uint64 result;
+    drflac_uint64 signbit;
+
     drflac_assert(bitCount <= 64);
 
-    drflac_uint64 result;
     if (!drflac__read_uint64(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
 
-    drflac_uint64 signbit = ((result >> (bitCount-1)) & 0x01);
+    signbit = ((result >> (bitCount-1)) & 0x01);
     result |= (~signbit + 1) << bitCount;
 
     *pResultOut = (drflac_int64)result;
@@ -1525,12 +1816,13 @@ static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, dr
 
 static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
 {
+    drflac_uint32 result;
+
     drflac_assert(bs != NULL);
     drflac_assert(pResult != NULL);
     drflac_assert(bitCount > 0);
     drflac_assert(bitCount <= 16);
 
-    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
@@ -1542,12 +1834,13 @@ static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, d
 #if 0
 static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
 {
+    drflac_int32 result;
+
     drflac_assert(bs != NULL);
     drflac_assert(pResult != NULL);
     drflac_assert(bitCount > 0);
     drflac_assert(bitCount <= 16);
 
-    drflac_int32 result;
     if (!drflac__read_int32(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
@@ -1559,12 +1852,13 @@ static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, dr
 
 static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
 {
+    drflac_uint32 result;
+
     drflac_assert(bs != NULL);
     drflac_assert(pResult != NULL);
     drflac_assert(bitCount > 0);
     drflac_assert(bitCount <= 8);
 
-    drflac_uint32 result;
     if (!drflac__read_uint32(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
@@ -1575,12 +1869,13 @@ static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, dr
 
 static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
 {
+    drflac_int32 result;
+
     drflac_assert(bs != NULL);
     drflac_assert(pResult != NULL);
     drflac_assert(bitCount > 0);
     drflac_assert(bitCount <= 8);
 
-    drflac_int32 result;
     if (!drflac__read_int32(bs, bitCount, &result)) {
         return DRFLAC_FALSE;
     }
@@ -1597,12 +1892,12 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
         bs->cache <<= bitsToSeek;
         return DRFLAC_TRUE;
     } else {
-        // It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here.
+        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
         bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
         bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
         bs->cache         = 0;
 
-        // Simple case. Seek in groups of the same number as bits that fit within a cache line.
+        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
 #ifdef DRFLAC_64BIT
         while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
             drflac_uint64 bin;
@@ -1621,7 +1916,7 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
         }
 #endif
 
-        // Whole leftover bytes.
+        /* Whole leftover bytes. */
         while (bitsToSeek >= 8) {
             drflac_uint8 bin;
             if (!drflac__read_uint8(bs, 8, &bin)) {
@@ -1630,13 +1925,13 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
             bitsToSeek -= 8;
         }
 
-        // Leftover bits.
+        /* Leftover bits. */
         if (bitsToSeek > 0) {
             drflac_uint8 bin;
             if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
                 return DRFLAC_FALSE;
             }
-            bitsToSeek = 0; // <-- Necessary for the assert below.
+            bitsToSeek = 0; /* <-- Necessary for the assert below. */
         }
 
         drflac_assert(bitsToSeek == 0);
@@ -1645,23 +1940,26 @@ static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
 }
 
 
-// This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16.
+/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
 static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
 {
     drflac_assert(bs != NULL);
 
-    // The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
-    // thing to do is align to the next byte.
+    /*
+    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    thing to do is align to the next byte.
+    */
     if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
         return DRFLAC_FALSE;
     }
 
     for (;;) {
+        drflac_uint8 hi;
+
 #ifndef DR_FLAC_NO_CRC
         drflac__reset_crc16(bs);
 #endif
 
-        drflac_uint8 hi;
         if (!drflac__read_uint8(bs, 8, &hi)) {
             return DRFLAC_FALSE;
         }
@@ -1682,8 +1980,8 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
         }
     }
 
-    // Should never get here.
-    //return DRFLAC_FALSE;
+    /* Should never get here. */
+    /*return DRFLAC_FALSE;*/
 }
 
 
@@ -1696,6 +1994,7 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
 
 static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
 {
+    drflac_uint32 n;
     static drflac_uint32 clz_table_4[] = {
         0,
         4,
@@ -1704,13 +2003,17 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
         1, 1, 1, 1, 1, 1, 1, 1
     };
 
-    drflac_uint32 n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
     if (n == 0) {
 #ifdef DRFLAC_64BIT
-        if ((x & 0xFFFFFFFF00000000ULL) == 0) { n  = 32; x <<= 32; }
-        if ((x & 0xFFFF000000000000ULL) == 0) { n += 16; x <<= 16; }
-        if ((x & 0xFF00000000000000ULL) == 0) { n += 8;  x <<= 8;  }
-        if ((x & 0xF000000000000000ULL) == 0) { n += 4;  x <<= 4;  }
+        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
+        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
+        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
+        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
 #else
         if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
         if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
@@ -1725,7 +2028,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
 static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported()
 {
-    // If the compiler itself does not support the intrinsic then we'll need to return false.
+    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
 #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
     return drflac__gIsLZCNTSupported;
 #else
@@ -1743,13 +2046,16 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
     #endif
 #else
     #if defined(__GNUC__) || defined(__clang__)
+        if (x == 0) {
+            return sizeof(x)*8;
+        }
         #ifdef DRFLAC_64BIT
-            return (drflac_uint32)__builtin_clzll((unsigned long long)x);
+            return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
         #else
-            return (drflac_uint32)__builtin_clzl((unsigned long)x);
+            return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
         #endif
     #else
-        // Unsupported compiler.
+        /* Unsupported compiler. */
         #error "This compiler does not support the lzcnt intrinsic."
     #endif
 #endif
@@ -1757,9 +2063,16 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
 #endif
 
 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#include <intrin.h> /* For BitScanReverse(). */
+
 static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
 {
     drflac_uint32 n;
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
 #ifdef DRFLAC_64BIT
     _BitScanReverse64((unsigned long*)&n, x);
 #else
@@ -1771,25 +2084,26 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
 
 static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
 {
-    // This function assumes at least one bit is set. Checking for 0 needs to be done at a higher level, outside this function.
 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
     if (drflac__is_lzcnt_supported()) {
         return drflac__clz_lzcnt(x);
     } else
 #endif
     {
-    #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
         return drflac__clz_msvc(x);
-    #else
+#else
         return drflac__clz_software(x);
-    #endif
+#endif
     }
 }
 
 
-static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
 {
     drflac_uint32 zeroCounter = 0;
+    drflac_uint32 setBitOffsetPlus1;
+
     while (bs->cache == 0) {
         zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
         if (!drflac__reload_cache(bs)) {
@@ -1797,7 +2111,7 @@ static inline drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsign
         }
     }
 
-    drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
     setBitOffsetPlus1 += 1;
 
     bs->consumedBits += setBitOffsetPlus1;
@@ -1814,9 +2128,11 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro
     drflac_assert(bs != NULL);
     drflac_assert(offsetFromStart > 0);
 
-    // Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
-    // is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
-    // To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    /*
+    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
+    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
+    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    */
     if (offsetFromStart > 0x7FFFFFFF) {
         drflac_uint64 bytesRemaining = offsetFromStart;
         if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
@@ -1842,7 +2158,7 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro
         }
     }
 
-    // The cache should be reset to force a reload of fresh data from the client.
+    /* The cache should be reset to force a reload of fresh data from the client. */
     drflac__reset_cache(bs);
     return DRFLAC_TRUE;
 }
@@ -1850,12 +2166,18 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro
 
 static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
 {
+    drflac_uint8 crc;
+    drflac_uint64 result;
+    unsigned char utf8[7] = {0};
+    int byteCount;
+    int i;
+
     drflac_assert(bs != NULL);
     drflac_assert(pNumberOut != NULL);
+    drflac_assert(pCRCOut != NULL);
 
-    drflac_uint8 crc = *pCRCOut;
+    crc = *pCRCOut;
 
-    unsigned char utf8[7] = {0};
     if (!drflac__read_uint8(bs, 8, utf8)) {
         *pNumberOut = 0;
         return DRFLAC_END_OF_STREAM;
@@ -1868,7 +2190,7 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
         return DRFLAC_SUCCESS;
     }
 
-    int byteCount = 1;
+    byteCount = 1;
     if ((utf8[0] & 0xE0) == 0xC0) {
         byteCount = 2;
     } else if ((utf8[0] & 0xF0) == 0xE0) {
@@ -1883,14 +2205,14 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
         byteCount = 7;
     } else {
         *pNumberOut = 0;
-        return DRFLAC_CRC_MISMATCH;     // Bad UTF-8 encoding.
+        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
     }
 
-    // Read extra bytes.
+    /* Read extra bytes. */
     drflac_assert(byteCount > 1);
 
-    drflac_uint64 result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
-    for (int i = 1; i < byteCount; ++i) {
+    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
+    for (i = 1; i < byteCount; ++i) {
         if (!drflac__read_uint8(bs, 8, utf8 + i)) {
             *pNumberOut = 0;
             return DRFLAC_END_OF_STREAM;
@@ -1907,20 +2229,21 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
 
 
 
+/*
+The next two functions are responsible for calculating the prediction.
 
-// The next two functions are responsible for calculating the prediction.
-//
-// When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
-// safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
+safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+*/
 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
-    drflac_assert(order <= 32);
-
-    // 32-bit version.
-
-    // VC++ optimizes this to a single jmp. I've not yet verified this for other compilers.
     drflac_int32 prediction = 0;
 
+    drflac_assert(order <= 32);
+
+    /* 32-bit version. */
+
+    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
     switch (order)
     {
     case 32: prediction += coefficients[31] * pDecodedSamples[-32];
@@ -1962,13 +2285,14 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32
 
 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
+    drflac_int64 prediction;
+
     drflac_assert(order <= 32);
 
-    // 64-bit version.
+    /* 64-bit version. */
 
-    // This method is faster on the 32-bit build when compiling with VC++. See note below.
+    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
 #ifndef DRFLAC_64BIT
-    drflac_int64 prediction;
     if (order == 8)
     {
         prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
@@ -2085,18 +2409,21 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
     }
     else
     {
+        int j;
+
         prediction = 0;
-        for (int j = 0; j < (int)order; ++j) {
+        for (j = 0; j < (int)order; ++j) {
             prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
         }
     }
 #endif
 
-    // VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
-    // reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    /*
+    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
+    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    */
 #ifdef DRFLAC_64BIT
-    drflac_int64 prediction = 0;
-
+    prediction = 0;
     switch (order)
     {
     case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
@@ -2137,16 +2464,451 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
     return (drflac_int32)(prediction >> shift);
 }
 
+static DRFLAC_INLINE void drflac__calculate_prediction_64_x4(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, const drflac_uint32 riceParamParts[4], drflac_int32* pDecodedSamples)
+{
+    drflac_int64 prediction0 = 0;
+    drflac_int64 prediction1 = 0;
+    drflac_int64 prediction2 = 0;
+    drflac_int64 prediction3 = 0;
+
+    drflac_assert(order <= 32);
+
+    switch (order)
+    {
+    case 32:
+        prediction0 += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+        prediction1 += coefficients[31] * (drflac_int64)pDecodedSamples[-31];
+        prediction2 += coefficients[31] * (drflac_int64)pDecodedSamples[-30];
+        prediction3 += coefficients[31] * (drflac_int64)pDecodedSamples[-29];
+    case 31:
+        prediction0 += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+        prediction1 += coefficients[30] * (drflac_int64)pDecodedSamples[-30];
+        prediction2 += coefficients[30] * (drflac_int64)pDecodedSamples[-29];
+        prediction3 += coefficients[30] * (drflac_int64)pDecodedSamples[-28];
+    case 30:
+        prediction0 += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+        prediction1 += coefficients[29] * (drflac_int64)pDecodedSamples[-29];
+        prediction2 += coefficients[29] * (drflac_int64)pDecodedSamples[-28];
+        prediction3 += coefficients[29] * (drflac_int64)pDecodedSamples[-27];
+    case 29:
+        prediction0 += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+        prediction1 += coefficients[28] * (drflac_int64)pDecodedSamples[-28];
+        prediction2 += coefficients[28] * (drflac_int64)pDecodedSamples[-27];
+        prediction3 += coefficients[28] * (drflac_int64)pDecodedSamples[-26];
+    case 28:
+        prediction0 += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+        prediction1 += coefficients[27] * (drflac_int64)pDecodedSamples[-27];
+        prediction2 += coefficients[27] * (drflac_int64)pDecodedSamples[-26];
+        prediction3 += coefficients[27] * (drflac_int64)pDecodedSamples[-25];
+    case 27:
+        prediction0 += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+        prediction1 += coefficients[26] * (drflac_int64)pDecodedSamples[-26];
+        prediction2 += coefficients[26] * (drflac_int64)pDecodedSamples[-25];
+        prediction3 += coefficients[26] * (drflac_int64)pDecodedSamples[-24];
+    case 26:
+        prediction0 += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+        prediction1 += coefficients[25] * (drflac_int64)pDecodedSamples[-25];
+        prediction2 += coefficients[25] * (drflac_int64)pDecodedSamples[-24];
+        prediction3 += coefficients[25] * (drflac_int64)pDecodedSamples[-23];
+    case 25:
+        prediction0 += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+        prediction1 += coefficients[24] * (drflac_int64)pDecodedSamples[-24];
+        prediction2 += coefficients[24] * (drflac_int64)pDecodedSamples[-23];
+        prediction3 += coefficients[24] * (drflac_int64)pDecodedSamples[-22];
+    case 24:
+        prediction0 += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+        prediction1 += coefficients[23] * (drflac_int64)pDecodedSamples[-23];
+        prediction2 += coefficients[23] * (drflac_int64)pDecodedSamples[-22];
+        prediction3 += coefficients[23] * (drflac_int64)pDecodedSamples[-21];
+    case 23:
+        prediction0 += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+        prediction1 += coefficients[22] * (drflac_int64)pDecodedSamples[-22];
+        prediction2 += coefficients[22] * (drflac_int64)pDecodedSamples[-21];
+        prediction3 += coefficients[22] * (drflac_int64)pDecodedSamples[-20];
+    case 22:
+        prediction0 += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+        prediction1 += coefficients[21] * (drflac_int64)pDecodedSamples[-21];
+        prediction2 += coefficients[21] * (drflac_int64)pDecodedSamples[-20];
+        prediction3 += coefficients[21] * (drflac_int64)pDecodedSamples[-19];
+    case 21:
+        prediction0 += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+        prediction1 += coefficients[20] * (drflac_int64)pDecodedSamples[-20];
+        prediction2 += coefficients[20] * (drflac_int64)pDecodedSamples[-19];
+        prediction3 += coefficients[20] * (drflac_int64)pDecodedSamples[-18];
+    case 20:
+        prediction0 += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+        prediction1 += coefficients[19] * (drflac_int64)pDecodedSamples[-19];
+        prediction2 += coefficients[19] * (drflac_int64)pDecodedSamples[-18];
+        prediction3 += coefficients[19] * (drflac_int64)pDecodedSamples[-17];
+    case 19:
+        prediction0 += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+        prediction1 += coefficients[18] * (drflac_int64)pDecodedSamples[-18];
+        prediction2 += coefficients[18] * (drflac_int64)pDecodedSamples[-17];
+        prediction3 += coefficients[18] * (drflac_int64)pDecodedSamples[-16];
+    case 18:
+        prediction0 += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+        prediction1 += coefficients[17] * (drflac_int64)pDecodedSamples[-17];
+        prediction2 += coefficients[17] * (drflac_int64)pDecodedSamples[-16];
+        prediction3 += coefficients[17] * (drflac_int64)pDecodedSamples[-15];
+    case 17:
+        prediction0 += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+        prediction1 += coefficients[16] * (drflac_int64)pDecodedSamples[-16];
+        prediction2 += coefficients[16] * (drflac_int64)pDecodedSamples[-15];
+        prediction3 += coefficients[16] * (drflac_int64)pDecodedSamples[-14];
+
+    case 16:
+        prediction0 += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+        prediction1 += coefficients[15] * (drflac_int64)pDecodedSamples[-15];
+        prediction2 += coefficients[15] * (drflac_int64)pDecodedSamples[-14];
+        prediction3 += coefficients[15] * (drflac_int64)pDecodedSamples[-13];
+    case 15:
+        prediction0 += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+        prediction1 += coefficients[14] * (drflac_int64)pDecodedSamples[-14];
+        prediction2 += coefficients[14] * (drflac_int64)pDecodedSamples[-13];
+        prediction3 += coefficients[14] * (drflac_int64)pDecodedSamples[-12];
+    case 14:
+        prediction0 += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+        prediction1 += coefficients[13] * (drflac_int64)pDecodedSamples[-13];
+        prediction2 += coefficients[13] * (drflac_int64)pDecodedSamples[-12];
+        prediction3 += coefficients[13] * (drflac_int64)pDecodedSamples[-11];
+    case 13:
+        prediction0 += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+        prediction1 += coefficients[12] * (drflac_int64)pDecodedSamples[-12];
+        prediction2 += coefficients[12] * (drflac_int64)pDecodedSamples[-11];
+        prediction3 += coefficients[12] * (drflac_int64)pDecodedSamples[-10];
+    case 12:
+        prediction0 += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+        prediction1 += coefficients[11] * (drflac_int64)pDecodedSamples[-11];
+        prediction2 += coefficients[11] * (drflac_int64)pDecodedSamples[-10];
+        prediction3 += coefficients[11] * (drflac_int64)pDecodedSamples[- 9];
+    case 11:
+        prediction0 += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction1 += coefficients[10] * (drflac_int64)pDecodedSamples[-10];
+        prediction2 += coefficients[10] * (drflac_int64)pDecodedSamples[- 9];
+        prediction3 += coefficients[10] * (drflac_int64)pDecodedSamples[- 8];
+    case 10:
+        prediction0 += coefficients[9] * (drflac_int64)pDecodedSamples[-10];
+        prediction1 += coefficients[9] * (drflac_int64)pDecodedSamples[- 9];
+        prediction2 += coefficients[9] * (drflac_int64)pDecodedSamples[- 8];
+        prediction3 += coefficients[9] * (drflac_int64)pDecodedSamples[- 7];
+    case  9:
+        prediction0 += coefficients[8] * (drflac_int64)pDecodedSamples[- 9];
+        prediction1 += coefficients[8] * (drflac_int64)pDecodedSamples[- 8];
+        prediction2 += coefficients[8] * (drflac_int64)pDecodedSamples[- 7];
+        prediction3 += coefficients[8] * (drflac_int64)pDecodedSamples[- 6];
+    case  8:
+        prediction0 += coefficients[7] * (drflac_int64)pDecodedSamples[- 8];
+        prediction1 += coefficients[7] * (drflac_int64)pDecodedSamples[- 7];
+        prediction2 += coefficients[7] * (drflac_int64)pDecodedSamples[- 6];
+        prediction3 += coefficients[7] * (drflac_int64)pDecodedSamples[- 5];
+    case  7:
+        prediction0 += coefficients[6] * (drflac_int64)pDecodedSamples[- 7];
+        prediction1 += coefficients[6] * (drflac_int64)pDecodedSamples[- 6];
+        prediction2 += coefficients[6] * (drflac_int64)pDecodedSamples[- 5];
+        prediction3 += coefficients[6] * (drflac_int64)pDecodedSamples[- 4];
+    case  6:
+        prediction0 += coefficients[5] * (drflac_int64)pDecodedSamples[- 6];
+        prediction1 += coefficients[5] * (drflac_int64)pDecodedSamples[- 5];
+        prediction2 += coefficients[5] * (drflac_int64)pDecodedSamples[- 4];
+        prediction3 += coefficients[5] * (drflac_int64)pDecodedSamples[- 3];
+    case  5:
+        prediction0 += coefficients[4] * (drflac_int64)pDecodedSamples[- 5];
+        prediction1 += coefficients[4] * (drflac_int64)pDecodedSamples[- 4];
+        prediction2 += coefficients[4] * (drflac_int64)pDecodedSamples[- 3];
+        prediction3 += coefficients[4] * (drflac_int64)pDecodedSamples[- 2];
+    case  4:
+        prediction0 += coefficients[3] * (drflac_int64)pDecodedSamples[- 4];
+        prediction1 += coefficients[3] * (drflac_int64)pDecodedSamples[- 3];
+        prediction2 += coefficients[3] * (drflac_int64)pDecodedSamples[- 2];
+        prediction3 += coefficients[3] * (drflac_int64)pDecodedSamples[- 1];
+        order = 3;
+    }
+
+    switch (order)
+    {
+    case 3: prediction0 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case 2: prediction0 += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case 1: prediction0 += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+    pDecodedSamples[0] = riceParamParts[0] + (drflac_int32)(prediction0 >> shift);
+
+    switch (order)
+    {
+    case 3: prediction1 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 2];
+    case 2: prediction1 += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 1];
+    case 1: prediction1 += coefficients[ 0] * (drflac_int64)pDecodedSamples[  0];
+    }
+    pDecodedSamples[1] = riceParamParts[1] + (drflac_int32)(prediction1 >> shift);
+
+    switch (order)
+    {
+    case 3: prediction2 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 1];
+    case 2: prediction2 += coefficients[ 1] * (drflac_int64)pDecodedSamples[  0];
+    case 1: prediction2 += coefficients[ 0] * (drflac_int64)pDecodedSamples[  1];
+    }
+    pDecodedSamples[2] = riceParamParts[2] + (drflac_int32)(prediction2 >> shift);
+
+    switch (order)
+    {
+    case 3: prediction3 += coefficients[ 2] * (drflac_int64)pDecodedSamples[  0];
+    case 2: prediction3 += coefficients[ 1] * (drflac_int64)pDecodedSamples[  1];
+    case 1: prediction3 += coefficients[ 0] * (drflac_int64)pDecodedSamples[  2];
+    }
+    pDecodedSamples[3] = riceParamParts[3] + (drflac_int32)(prediction3 >> shift);
+}
+
+#if defined(DRFLAC_SUPPORT_SSE41)
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64__sse41(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    __m128i prediction = _mm_setzero_si128();
+
+    drflac_assert(order <= 32);
+
+    switch (order)
+    {
+    case 32:
+    case 31: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[31], 0, coefficients[30]), _mm_set_epi32(0, pDecodedSamples[-32], 0, pDecodedSamples[-31])));
+    case 30:
+    case 29: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[29], 0, coefficients[28]), _mm_set_epi32(0, pDecodedSamples[-30], 0, pDecodedSamples[-29])));
+    case 28:
+    case 27: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[27], 0, coefficients[26]), _mm_set_epi32(0, pDecodedSamples[-28], 0, pDecodedSamples[-27])));
+    case 26:
+    case 25: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[25], 0, coefficients[24]), _mm_set_epi32(0, pDecodedSamples[-26], 0, pDecodedSamples[-25])));
+    case 24:
+    case 23: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[23], 0, coefficients[22]), _mm_set_epi32(0, pDecodedSamples[-24], 0, pDecodedSamples[-23])));
+    case 22:
+    case 21: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[21], 0, coefficients[20]), _mm_set_epi32(0, pDecodedSamples[-22], 0, pDecodedSamples[-21])));
+    case 20:
+    case 19: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[19], 0, coefficients[18]), _mm_set_epi32(0, pDecodedSamples[-20], 0, pDecodedSamples[-19])));
+    case 18:
+    case 17: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[17], 0, coefficients[16]), _mm_set_epi32(0, pDecodedSamples[-18], 0, pDecodedSamples[-17])));
+    case 16:
+    case 15: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[15], 0, coefficients[14]), _mm_set_epi32(0, pDecodedSamples[-16], 0, pDecodedSamples[-15])));
+    case 14:
+    case 13: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[13], 0, coefficients[12]), _mm_set_epi32(0, pDecodedSamples[-14], 0, pDecodedSamples[-13])));
+    case 12:
+    case 11: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[11], 0, coefficients[10]), _mm_set_epi32(0, pDecodedSamples[-12], 0, pDecodedSamples[-11])));
+    case 10:
+    case  9: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 9], 0, coefficients[ 8]), _mm_set_epi32(0, pDecodedSamples[-10], 0, pDecodedSamples[- 9])));
+    case  8:
+    case  7: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 7], 0, coefficients[ 6]), _mm_set_epi32(0, pDecodedSamples[- 8], 0, pDecodedSamples[- 7])));
+    case  6:
+    case  5: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 5], 0, coefficients[ 4]), _mm_set_epi32(0, pDecodedSamples[- 6], 0, pDecodedSamples[- 5])));
+    case  4:
+    case  3: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 3], 0, coefficients[ 2]), _mm_set_epi32(0, pDecodedSamples[- 4], 0, pDecodedSamples[- 3])));
+    case  2:
+    case  1: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 1], 0, coefficients[ 0]), _mm_set_epi32(0, pDecodedSamples[- 2], 0, pDecodedSamples[- 1])));
+    }
+
+    return (drflac_int32)((
+        ((drflac_uint64*)&prediction)[0] +
+        ((drflac_uint64*)&prediction)[1]) >> shift);
+}
+
+static DRFLAC_INLINE void drflac__calculate_prediction_64_x2__sse41(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, const drflac_uint32 riceParamParts[4], drflac_int32* pDecodedSamples)
+{
+    __m128i prediction = _mm_setzero_si128();
+    drflac_int64 predictions[2] = {0, 0};
+
+    drflac_assert(order <= 32);
+
+    switch (order)
+    {
+    case 32: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[31], 0, coefficients[31]), _mm_set_epi32(0, pDecodedSamples[-31], 0, pDecodedSamples[-32])));
+    case 31: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[30], 0, coefficients[30]), _mm_set_epi32(0, pDecodedSamples[-30], 0, pDecodedSamples[-31])));
+    case 30: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[29], 0, coefficients[29]), _mm_set_epi32(0, pDecodedSamples[-29], 0, pDecodedSamples[-30])));
+    case 29: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[28], 0, coefficients[28]), _mm_set_epi32(0, pDecodedSamples[-28], 0, pDecodedSamples[-29])));
+    case 28: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[27], 0, coefficients[27]), _mm_set_epi32(0, pDecodedSamples[-27], 0, pDecodedSamples[-28])));
+    case 27: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[26], 0, coefficients[26]), _mm_set_epi32(0, pDecodedSamples[-26], 0, pDecodedSamples[-27])));
+    case 26: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[25], 0, coefficients[25]), _mm_set_epi32(0, pDecodedSamples[-25], 0, pDecodedSamples[-26])));
+    case 25: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[24], 0, coefficients[24]), _mm_set_epi32(0, pDecodedSamples[-24], 0, pDecodedSamples[-25])));
+    case 24: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[23], 0, coefficients[23]), _mm_set_epi32(0, pDecodedSamples[-23], 0, pDecodedSamples[-24])));
+    case 23: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[22], 0, coefficients[22]), _mm_set_epi32(0, pDecodedSamples[-22], 0, pDecodedSamples[-23])));
+    case 22: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[21], 0, coefficients[21]), _mm_set_epi32(0, pDecodedSamples[-21], 0, pDecodedSamples[-22])));
+    case 21: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[20], 0, coefficients[20]), _mm_set_epi32(0, pDecodedSamples[-20], 0, pDecodedSamples[-21])));
+    case 20: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[19], 0, coefficients[19]), _mm_set_epi32(0, pDecodedSamples[-19], 0, pDecodedSamples[-20])));
+    case 19: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[18], 0, coefficients[18]), _mm_set_epi32(0, pDecodedSamples[-18], 0, pDecodedSamples[-19])));
+    case 18: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[17], 0, coefficients[17]), _mm_set_epi32(0, pDecodedSamples[-17], 0, pDecodedSamples[-18])));
+    case 17: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[16], 0, coefficients[16]), _mm_set_epi32(0, pDecodedSamples[-16], 0, pDecodedSamples[-17])));
+    case 16: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[15], 0, coefficients[15]), _mm_set_epi32(0, pDecodedSamples[-15], 0, pDecodedSamples[-16])));
+    case 15: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[14], 0, coefficients[14]), _mm_set_epi32(0, pDecodedSamples[-14], 0, pDecodedSamples[-15])));
+    case 14: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[13], 0, coefficients[13]), _mm_set_epi32(0, pDecodedSamples[-13], 0, pDecodedSamples[-14])));
+    case 13: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[12], 0, coefficients[12]), _mm_set_epi32(0, pDecodedSamples[-12], 0, pDecodedSamples[-13])));
+    case 12: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[11], 0, coefficients[11]), _mm_set_epi32(0, pDecodedSamples[-11], 0, pDecodedSamples[-12])));
+    case 11: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[10], 0, coefficients[10]), _mm_set_epi32(0, pDecodedSamples[-10], 0, pDecodedSamples[-11])));
+    case 10: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 9], 0, coefficients[ 9]), _mm_set_epi32(0, pDecodedSamples[- 9], 0, pDecodedSamples[-10])));
+    case  9: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 8], 0, coefficients[ 8]), _mm_set_epi32(0, pDecodedSamples[- 8], 0, pDecodedSamples[- 9])));
+    case  8: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 7], 0, coefficients[ 7]), _mm_set_epi32(0, pDecodedSamples[- 7], 0, pDecodedSamples[- 8])));
+    case  7: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 6], 0, coefficients[ 6]), _mm_set_epi32(0, pDecodedSamples[- 6], 0, pDecodedSamples[- 7])));
+    case  6: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 5], 0, coefficients[ 5]), _mm_set_epi32(0, pDecodedSamples[- 5], 0, pDecodedSamples[- 6])));
+    case  5: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 4], 0, coefficients[ 4]), _mm_set_epi32(0, pDecodedSamples[- 4], 0, pDecodedSamples[- 5])));
+    case  4: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 3], 0, coefficients[ 3]), _mm_set_epi32(0, pDecodedSamples[- 3], 0, pDecodedSamples[- 4])));
+    case  3: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 2], 0, coefficients[ 2]), _mm_set_epi32(0, pDecodedSamples[- 2], 0, pDecodedSamples[- 3])));
+    case  2: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 1], 0, coefficients[ 1]), _mm_set_epi32(0, pDecodedSamples[- 1], 0, pDecodedSamples[- 2])));
+        order = 1;
+    }
+
+    _mm_storeu_si128((__m128i*)predictions, prediction);
+
+    switch (order)
+    {
+    case 1: predictions[0] += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+    pDecodedSamples[0] = riceParamParts[0] + (drflac_int32)(predictions[0] >> shift);
+
+    switch (order)
+    {
+    case 1: predictions[1] += coefficients[ 0] * (drflac_int64)pDecodedSamples[  0];
+    }
+    pDecodedSamples[1] = riceParamParts[1] + (drflac_int32)(predictions[1] >> shift);
+}
+
+
+static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
+{
+    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_slide1_epi32(__m128i a, __m128i b)
+{
+    /* a3a2a1a0/b3b2b1b0 -> a2a1a0b3 */
+
+    /* Result = a2a1a0b3 */
+    __m128i b3a3b2a2 = _mm_unpackhi_epi32(a, b);
+    __m128i a2b3a2b3 = _mm_shuffle_epi32(b3a3b2a2, _MM_SHUFFLE(0, 3, 0, 3));
+    __m128i a1a2a0b3 = _mm_unpacklo_epi32(a2b3a2b3, a);
+    __m128i a2a1a0b3 = _mm_shuffle_epi32(a1a2a0b3, _MM_SHUFFLE(2, 3, 1, 0));
+    return a2a1a0b3;
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_slide2_epi32(__m128i a, __m128i b)
+{
+    /* Result = a1a0b3b2 */
+    __m128i b1b0b3b2 = _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2));
+    __m128i a1b3a0b2 = _mm_unpacklo_epi32(b1b0b3b2, a);
+    __m128i a1a0b3b2 = _mm_shuffle_epi32(a1b3a0b2, _MM_SHUFFLE(3, 1, 2, 0));
+    return a1a0b3b2;
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_slide3_epi32(__m128i a, __m128i b)
+{
+    /* Result = a0b3b2b1 */
+    __m128i b1a1b0a0 = _mm_unpacklo_epi32(a, b);
+    __m128i a0b1a0b1 = _mm_shuffle_epi32(b1a1b0a0, _MM_SHUFFLE(0, 3, 0, 3));
+    __m128i b3a0b2b1 = _mm_unpackhi_epi32(a0b1a0b1, b);
+    __m128i a0b3b2b1 = _mm_shuffle_epi32(b3a0b2b1, _MM_SHUFFLE(2, 3, 1, 0));
+    return a0b3b2b1;
+}
+
+static DRFLAC_INLINE void drflac__calculate_prediction_32_x4__sse41(drflac_uint32 order, drflac_int32 shift, const __m128i* coefficients128, const __m128i riceParamParts128, drflac_int32* pDecodedSamples)
+{
+    drflac_assert(order <= 32);
+
+    /* I don't think this is as efficient as it could be. More work needs to be done on this. */
+    if (order > 0) {
+        drflac_int32 predictions[4];
+        drflac_uint32 riceParamParts[4];
+
+        __m128i s_09_10_11_12 = _mm_loadu_si128((const __m128i*)(pDecodedSamples - 12));
+        __m128i s_05_06_07_08 = _mm_loadu_si128((const __m128i*)(pDecodedSamples -  8));
+        __m128i s_01_02_03_04 = _mm_loadu_si128((const __m128i*)(pDecodedSamples -  4));
+
+        __m128i prediction = _mm_setzero_si128();
+
+        /*
+        The idea with this switch is to do do a single jump based on the value of "order". In my test library, "order" is never larger than 12, so
+        I have decided to do a less optimal, but simpler solution in the order > 12 case.
+        */
+        switch (order)
+        {
+        case 32: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[31], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 32))));
+        case 31: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[30], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 31))));
+        case 30: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[29], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 30))));
+        case 29: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[28], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 29))));
+        case 28: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[27], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 28))));
+        case 27: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[26], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 27))));
+        case 26: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[25], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 26))));
+        case 25: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[24], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 25))));
+        case 24: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[23], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 24))));
+        case 23: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[22], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 23))));
+        case 22: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[21], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 22))));
+        case 21: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[20], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 21))));
+        case 20: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[19], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 20))));
+        case 19: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[18], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 19))));
+        case 18: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[17], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 18))));
+        case 17: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[16], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 17))));
+        case 16: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[15], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 16))));
+        case 15: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[14], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 15))));
+        case 14: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[13], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 14))));
+        case 13: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[12], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 13))));
+
+        case 12: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[11], s_09_10_11_12));
+        case 11: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[10], drflac__mm_slide3_epi32(s_05_06_07_08, s_09_10_11_12)));
+        case 10: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 9], drflac__mm_slide2_epi32(s_05_06_07_08, s_09_10_11_12)));
+        case  9: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 8], drflac__mm_slide1_epi32(s_05_06_07_08, s_09_10_11_12)));
+        case  8: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 7], s_05_06_07_08));
+        case  7: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 6], drflac__mm_slide3_epi32(s_01_02_03_04, s_05_06_07_08)));
+        case  6: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 5], drflac__mm_slide2_epi32(s_01_02_03_04, s_05_06_07_08)));
+        case  5: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 4], drflac__mm_slide1_epi32(s_01_02_03_04, s_05_06_07_08)));
+        case  4: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 3], s_01_02_03_04)); order = 3;    /* <-- Don't forget to set order to 3 here! */
+        case  3: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 2], drflac__mm_slide3_epi32(_mm_setzero_si128(), s_01_02_03_04)));
+        case  2: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 1], drflac__mm_slide2_epi32(_mm_setzero_si128(), s_01_02_03_04)));
+        case  1: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 0], drflac__mm_slide1_epi32(_mm_setzero_si128(), s_01_02_03_04)));
+        }
+
+        _mm_storeu_si128((__m128i*)predictions, prediction);
+        _mm_storeu_si128((__m128i*)riceParamParts, riceParamParts128);
+
+        predictions[0] = riceParamParts[0] + (predictions[0] >> shift);
+
+        switch (order)
+        {
+        case 3: predictions[3] += ((const drflac_int32*)&coefficients128[ 2])[0] * predictions[  0];
+        case 2: predictions[2] += ((const drflac_int32*)&coefficients128[ 1])[0] * predictions[  0];
+        case 1: predictions[1] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[  0];
+        }
+        predictions[1] = riceParamParts[1] + (predictions[1] >> shift);
+
+        switch (order)
+        {
+        case 3:
+        case 2: predictions[3] += ((const drflac_int32*)&coefficients128[ 1])[0] * predictions[  1];
+        case 1: predictions[2] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[  1];
+        }
+        predictions[2] = riceParamParts[2] + (predictions[2] >> shift);
+
+        switch (order)
+        {
+        case 3:
+        case 2:
+        case 1: predictions[3] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[  2];
+        }
+        predictions[3] = riceParamParts[3] + (predictions[3] >> shift);
+
+        pDecodedSamples[0] = predictions[0];
+        pDecodedSamples[1] = predictions[1];
+        pDecodedSamples[2] = predictions[2];
+        pDecodedSamples[3] = predictions[3];
+    } else {
+        _mm_storeu_si128((__m128i*)pDecodedSamples, riceParamParts128);
+    }
+}
+#endif
+
 #if 0
-// Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
-// sake of readability and should only be used as a reference.
+/*
+Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+sake of readability and should only be used as a reference.
+*/
 static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
+    drflac_uint32 i;
+
     drflac_assert(bs != NULL);
     drflac_assert(count > 0);
     drflac_assert(pSamplesOut != NULL);
 
-    for (drflac_uint32 i = 0; i < count; ++i) {
+    for (i = 0; i < count; ++i) {
         drflac_uint32 zeroCounter = 0;
         for (;;) {
             drflac_uint8 bit;
@@ -2193,6 +2955,8 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
 static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
 {
     drflac_uint32 zeroCounter = 0;
+    drflac_uint32 decodedRice;
+
     for (;;) {
         drflac_uint8 bit;
         if (!drflac__read_uint8(bs, 1, &bit)) {
@@ -2206,7 +2970,6 @@ static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_ui
         }
     }
 
-    drflac_uint32 decodedRice;
     if (riceParam > 0) {
         if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
             return DRFLAC_FALSE;
@@ -2221,13 +2984,20 @@ static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_ui
 }
 #endif
 
+#if 0
 static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
 {
-    drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
-    drflac_cache_t resultHiShift = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParam;
+    drflac_cache_t riceParamMask;
+    drflac_uint32 zeroCounter;
+    drflac_uint32 setBitOffsetPlus1;
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength;
 
+    drflac_assert(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
 
-    drflac_uint32 zeroCounter = 0;
+    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+
+    zeroCounter = 0;
     while (bs->cache == 0) {
         zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
         if (!drflac__reload_cache(bs)) {
@@ -2235,64 +3005,409 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
         }
     }
 
-    drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
     zeroCounter += setBitOffsetPlus1;
     setBitOffsetPlus1 += 1;
 
-
-    drflac_uint32 riceParamPart;
-    drflac_uint32 riceLength = setBitOffsetPlus1 + riceParam;
+    riceLength = setBitOffsetPlus1 + riceParam;
     if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> (DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceLength));
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
 
         bs->consumedBits += riceLength;
         bs->cache <<= riceLength;
     } else {
-        bs->consumedBits += riceLength;
-        if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            bs->cache <<= setBitOffsetPlus1;
-        }
+        drflac_uint32 bitCountLo;
+        drflac_cache_t resultHi;
 
-        // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them.
-        drflac_uint32 bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        drflac_cache_t resultHi = bs->cache & riceParamMask;    // <-- This mask is OK because all bits after the first bits are always zero.
+        bs->consumedBits += riceLength;
+        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
+
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
 
         if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-        #ifndef DR_FLAC_NO_CRC
+#ifndef DR_FLAC_NO_CRC
             drflac__update_crc16(bs);
-        #endif
+#endif
             bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
             bs->consumedBits = 0;
-        #ifndef DR_FLAC_NO_CRC
+#ifndef DR_FLAC_NO_CRC
             bs->crc16Cache = bs->cache;
-        #endif
+#endif
         } else {
-            // Slow path. We need to fetch more data from the client.
+            /* Slow path. We need to fetch more data from the client. */
             if (!drflac__reload_cache(bs)) {
                 return DRFLAC_FALSE;
             }
         }
 
-        riceParamPart = (drflac_uint32)((resultHi >> resultHiShift) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo));
+        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
 
         bs->consumedBits += bitCountLo;
         bs->cache <<= bitCountLo;
     }
 
-    *pZeroCounterOut = zeroCounter;
-    *pRiceParamPartOut = riceParamPart;
+    pZeroCounterOut[0] = zeroCounter;
+    pRiceParamPartOut[0] = riceParamPart;
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        pZeroCounterOut[0] = lzcount;
+
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            drflac_uint32 riceParamPartHi;
+            drflac_uint32 riceParamPartLo;
+            drflac_uint32 riceParamPartLoBitCount;
+
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Grab the high part of the rice parameter part. */
+            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            drflac_assert(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+                
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            /* We should now have enough information to construct the rice parameter part. */
+            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            zeroCounter += lzcount;
+
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        pZeroCounterOut[0] = zeroCounter;
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x4(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /*
+    What this is doing is trying to efficiently extract 4 rice parts at a time, the idea being that we can exploit certain properties
+    to our advantage to make things more efficient.
+    */
+    int i;
+    for (i = 0; i < 4; ++i) {
+        /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+        drflac_uint32  lzcount = drflac__clz(bs_cache);
+        if (lzcount < sizeof(bs_cache)*8) {
+            pZeroCounterOut[i] = lzcount;
+
+            /*
+            It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+            this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+            outside of this function at a higher level.
+            */
+        extract_rice_param_part:
+            bs_cache       <<= lzcount;
+            bs_consumedBits += lzcount;
+
+            if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+                /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+                pRiceParamPartOut[i] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+                bs_cache       <<= riceParamPlus1;
+                bs_consumedBits += riceParamPlus1;
+            } else {
+                drflac_uint32 riceParamPartHi;
+                drflac_uint32 riceParamPartLo;
+                drflac_uint32 riceParamPartLoBitCount;
+
+                /*
+                Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+                line, reload the cache, and then combine it with the head of the next cache line.
+                */
+
+                /* Grab the high part of the rice parameter part. */
+                riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+                /* Before reloading the cache we need to grab the size in bits of the low part. */
+                riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+                
+                /* Now reload the cache. */
+                if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+                #ifndef DR_FLAC_NO_CRC
+                    drflac__update_crc16(bs);
+                #endif
+                    bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                    bs_consumedBits = riceParamPartLoBitCount;
+                #ifndef DR_FLAC_NO_CRC
+                    bs->crc16Cache = bs_cache;
+                #endif
+                } else {
+                    /* Slow path. We need to fetch more data from the client. */
+                    if (!drflac__reload_cache(bs)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    bs_cache = bs->cache;
+                    bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+                }
+
+                /* We should now have enough information to construct the rice parameter part. */
+                riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+                pRiceParamPartOut[i] = riceParamPartHi | riceParamPartLo;
+
+                bs_cache <<= riceParamPartLoBitCount;
+            }
+        } else {
+            /*
+            Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+            to drflac__clz() and we need to reload the cache.
+            */
+            drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+            for (;;) {
+                if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+                #ifndef DR_FLAC_NO_CRC
+                    drflac__update_crc16(bs);
+                #endif
+                    bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                    bs_consumedBits = 0;
+                #ifndef DR_FLAC_NO_CRC
+                    bs->crc16Cache = bs_cache;
+                #endif
+                } else {
+                    /* Slow path. We need to fetch more data from the client. */
+                    if (!drflac__reload_cache(bs)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    bs_cache = bs->cache;
+                    bs_consumedBits = bs->consumedBits;
+                }
+
+                lzcount = drflac__clz(bs_cache);
+                zeroCounter += lzcount;
+
+                if (lzcount < sizeof(bs_cache)*8) {
+                    break;
+                }
+            }
+
+            pZeroCounterOut[i] = zeroCounter;
+            goto extract_rice_param_part;
+        }
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            drflac_assert(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+                
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
     return DRFLAC_TRUE;
 }
 
 
-static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
-    drflac_assert(bs != NULL);
-    drflac_assert(count > 0);
-    drflac_assert(pSamplesOut != NULL);
-
-    static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
     drflac_uint32 zeroCountPart0;
     drflac_uint32 zeroCountPart1;
     drflac_uint32 zeroCountPart2;
@@ -2301,57 +3416,100 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_b
     drflac_uint32 riceParamPart1;
     drflac_uint32 riceParamPart2;
     drflac_uint32 riceParamPart3;
-    drflac_uint32 i4 = 0;
-    drflac_uint32 count4 = count >> 2;
-    while (i4 < count4) {
-        // Rice extraction.
-        if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
-            !drflac__read_rice_parts(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
-            !drflac__read_rice_parts(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
-            !drflac__read_rice_parts(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
-            return DRFLAC_FALSE;
-        }
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    drflac_uint32 i;
 
-        riceParamPart0 |= (zeroCountPart0 << riceParam);
-        riceParamPart1 |= (zeroCountPart1 << riceParam);
-        riceParamPart2 |= (zeroCountPart2 << riceParam);
-        riceParamPart3 |= (zeroCountPart3 << riceParam);
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+    drflac_assert(pSamplesOut != NULL);
 
-        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-        riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
-        riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
-        riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+    riceParamMask  = ~((~0UL) << riceParam);
+    pSamplesOutEnd = pSamplesOut + ((count >> 2) << 2);
+
+    if (bitsPerSample >= 24) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            /*
+            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
+            against an array. Not sure why, but perhaps it's making more efficient use of registers?
+            */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
 
-        if (bitsPerSample > 16) {
             pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
             pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
             pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
             pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
-        } else {
+
+            pSamplesOut += 4;
+        }
+    } else {
+        while (pSamplesOut < pSamplesOutEnd) {
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
             pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
             pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
             pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
             pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
-        }
 
-        i4 += 1;
-        pSamplesOut += 4;
+            pSamplesOut += 4;
+        }
     }
 
-    drflac_uint32 i = i4 << 2;
+    i = ((count >> 2) << 2);
     while (i < count) {
-        // Rice extraction.
-        if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
             return DRFLAC_FALSE;
         }
 
-        // Rice reconstruction.
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
         riceParamPart0 |= (zeroCountPart0 << riceParam);
         riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-        //riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);
+        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
 
-        // Sample reconstruction.
-        if (bitsPerSample > 16) {
+        /* Sample reconstruction. */
+        if (bitsPerSample >= 24) {
             pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
         } else {
             pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
@@ -2360,29 +3518,188 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_b
         i += 1;
         pSamplesOut += 1;
     }
-
+    
     return DRFLAC_TRUE;
 }
 
+#if defined(DRFLAC_SUPPORT_SSE41)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    /*drflac_uint32 zeroCountParts[4];*/
+    /*drflac_uint32 riceParamParts[4];*/
+
+    drflac_uint32 zeroCountParts0;
+    drflac_uint32 zeroCountParts1;
+    drflac_uint32 zeroCountParts2;
+    drflac_uint32 zeroCountParts3;
+    drflac_uint32 riceParamParts0;
+    drflac_uint32 riceParamParts1;
+    drflac_uint32 riceParamParts2;
+    drflac_uint32 riceParamParts3;
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    __m128i riceParamMask128;
+    __m128i one;
+    drflac_uint32 i;
+
+    drflac_assert(bs != NULL);
+    drflac_assert(count > 0);
+    drflac_assert(pSamplesOut != NULL);
+
+    riceParamMask = ~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+    one = _mm_set1_epi32(0x01);
+
+    pSamplesOutEnd = pSamplesOut + ((count >> 2) << 2);
+
+    if (bitsPerSample >= 24) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            __m128i zeroCountPart128;
+            __m128i riceParamPart128;
+            drflac_uint32 riceParamParts[4];
+
+            /* Rice extraction. */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+                return DRFLAC_FALSE;
+            }
+
+            zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+            riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+            riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+            riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+            riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, one), _mm_set1_epi32(0xFFFFFFFF)));   /* <-- Only supported from SSE4.1 */
+            /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, one)), one));*/  /* <-- SSE2 compatible */
+
+            _mm_storeu_si128((__m128i*)riceParamParts, riceParamPart128);
+
+        #if defined(DRFLAC_64BIT)
+            /* The scalar implementation seems to be faster on 64-bit in my testing. */
+            drflac__calculate_prediction_64_x4(order, shift, coefficients, riceParamParts, pSamplesOut);
+        #else
+            pSamplesOut[0] = riceParamParts[0] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamParts[1] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamParts[2] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamParts[3] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 3);
+        #endif
+
+            pSamplesOut += 4;
+        }
+    } else {
+        drflac_int32 coefficientsUnaligned[32*4 + 4] = {0};
+        drflac_int32* coefficients128 = (drflac_int32*)(((size_t)coefficientsUnaligned + 15) & ~15);
+
+        for (i = 0; i < order; ++i) {
+            coefficients128[i*4+0] = coefficients[i];
+            coefficients128[i*4+1] = coefficients[i];
+            coefficients128[i*4+2] = coefficients[i];
+            coefficients128[i*4+3] = coefficients[i];
+        }
+
+        while (pSamplesOut < pSamplesOutEnd) {
+            __m128i zeroCountPart128;
+            __m128i riceParamPart128;
+            /*drflac_int32 riceParamParts[4];*/
+
+            /* Rice extraction. */
+#if 1
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+                return DRFLAC_FALSE;
+            }
+
+            zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+            riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+#else
+            if (!drflac__read_rice_parts_x4(bs, riceParam, zeroCountParts, riceParamParts)) {
+                return DRFLAC_FALSE;
+            }
+
+            zeroCountPart128 = _mm_set_epi32(zeroCountParts[3], zeroCountParts[2], zeroCountParts[1], zeroCountParts[0]);
+            riceParamPart128 = _mm_set_epi32(riceParamParts[3], riceParamParts[2], riceParamParts[1], riceParamParts[0]);
+#endif
+
+            riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+            riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+            riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, one), _mm_set1_epi32(0xFFFFFFFF)));
+
+#if 1
+            drflac__calculate_prediction_32_x4__sse41(order, shift, (const __m128i*)coefficients128, riceParamPart128, pSamplesOut);
+#else
+            _mm_storeu_si128((__m128i*)riceParamParts, riceParamPart128);
+
+            pSamplesOut[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamParts[1] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamParts[2] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamParts[3] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+#endif
+
+            pSamplesOut += 4;
+        }
+    }
+
+
+    i = ((count >> 2) << 2);
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        if (bitsPerSample >= 24) {
+            pSamplesOut[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        } else {
+            pSamplesOut[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+        }
+
+        i += 1;
+        pSamplesOut += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+#endif
+
 static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
-#if 0
-    return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-#else
-    return drflac__decode_samples_with_residual__rice__simple(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+#if defined(DRFLAC_SUPPORT_SSE41)
+    if (drflac__gIsSSE41Supported) {
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
 #endif
+    {
+        /* Scalar fallback. */
+    #if 0
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #else
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #endif
+    }
 }
 
-// Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes.
+/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
 static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
 {
+    drflac_uint32 i;
+
     drflac_assert(bs != NULL);
     drflac_assert(count > 0);
 
-    for (drflac_uint32 i = 0; i < count; ++i) {
-        drflac_uint32 zeroCountPart;
-        drflac_uint32 riceParamPart;
-        if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) {
+    for (i = 0; i < count; ++i) {
+        if (!drflac__seek_rice_parts(bs, riceParam)) {
             return DRFLAC_FALSE;
         }
     }
@@ -2392,14 +3709,20 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
 
 static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
+    drflac_uint32 i;
+
     drflac_assert(bs != NULL);
     drflac_assert(count > 0);
-    drflac_assert(unencodedBitsPerSample > 0 && unencodedBitsPerSample <= 32);
+    drflac_assert(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
     drflac_assert(pSamplesOut != NULL);
 
-    for (unsigned int i = 0; i < count; ++i) {
-        if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
-            return DRFLAC_FALSE;
+    for (i = 0; i < count; ++i) {
+        if (unencodedBitsPerSample > 0) {
+            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            pSamplesOut[i] = 0;
         }
 
         if (bitsPerSample > 16) {
@@ -2413,60 +3736,66 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
 }
 
 
-// Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
-// when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
-// <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+/*
+Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
 static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
     drflac_assert(bs != NULL);
     drflac_assert(blockSize != 0);
-    drflac_assert(pDecodedSamples != NULL);       // <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode?
+    drflac_assert(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
 
-    drflac_uint8 residualMethod;
     if (!drflac__read_uint8(bs, 2, &residualMethod)) {
         return DRFLAC_FALSE;
     }
 
     if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    // Unknown or unsupported residual coding method.
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
     }
 
-    // Ignore the first <order> values.
+    /* Ignore the first <order> values. */
     pDecodedSamples += order;
 
-
-    drflac_uint8 partitionOrder;
     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
         return DRFLAC_FALSE;
     }
 
-    // From the FLAC spec:
-    //   The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
     if (partitionOrder > 8) {
         return DRFLAC_FALSE;
     }
 
-    // Validation check.
+    /* Validation check. */
     if ((blockSize / (1 << partitionOrder)) <= order) {
         return DRFLAC_FALSE;
     }
 
-    drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    drflac_uint32 partitionsRemaining = (1 << partitionOrder);
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
     for (;;) {
         drflac_uint8 riceParam = 0;
         if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
             if (!drflac__read_uint8(bs, 4, &riceParam)) {
                 return DRFLAC_FALSE;
             }
-            if (riceParam == 16) {
+            if (riceParam == 15) {
                 riceParam = 0xFF;
             }
         } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
             if (!drflac__read_uint8(bs, 5, &riceParam)) {
                 return DRFLAC_FALSE;
             }
-            if (riceParam == 32) {
+            if (riceParam == 31) {
                 riceParam = 0xFF;
             }
         }
@@ -2488,7 +3817,6 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
         pDecodedSamples += samplesInPartition;
 
-
         if (partitionsRemaining == 1) {
             break;
         }
@@ -2503,30 +3831,48 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
     return DRFLAC_TRUE;
 }
 
-// Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
-// when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
-// <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+/*
+Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
 static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
 {
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
     drflac_assert(bs != NULL);
     drflac_assert(blockSize != 0);
 
-    drflac_uint8 residualMethod;
     if (!drflac__read_uint8(bs, 2, &residualMethod)) {
         return DRFLAC_FALSE;
     }
 
     if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    // Unknown or unsupported residual coding method.
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
     }
 
-    drflac_uint8 partitionOrder;
     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
         return DRFLAC_FALSE;
     }
 
-    drflac_uint32 samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    drflac_uint32 partitionsRemaining = (1 << partitionOrder);
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
     for (;;)
     {
         drflac_uint8 riceParam = 0;
@@ -2534,14 +3880,14 @@ static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32
             if (!drflac__read_uint8(bs, 4, &riceParam)) {
                 return DRFLAC_FALSE;
             }
-            if (riceParam == 16) {
+            if (riceParam == 15) {
                 riceParam = 0xFF;
             }
         } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
             if (!drflac__read_uint8(bs, 5, &riceParam)) {
                 return DRFLAC_FALSE;
             }
-            if (riceParam == 32) {
+            if (riceParam == 31) {
                 riceParam = 0xFF;
             }
         }
@@ -2576,15 +3922,19 @@ static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32
 
 static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples)
 {
-    // Only a single sample needs to be decoded here.
+    drflac_uint32 i;
+
+    /* Only a single sample needs to be decoded here. */
     drflac_int32 sample;
     if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
         return DRFLAC_FALSE;
     }
 
-    // We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
-    // we'll want to look at a more efficient way.
-    for (drflac_uint32 i = 0; i < blockSize; ++i) {
+    /*
+    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
+    we'll want to look at a more efficient way.
+    */
+    for (i = 0; i < blockSize; ++i) {
         pDecodedSamples[i] = sample;
     }
 
@@ -2593,7 +3943,9 @@ static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint
 
 static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_int32* pDecodedSamples)
 {
-    for (drflac_uint32 i = 0; i < blockSize; ++i) {
+    drflac_uint32 i;
+
+    for (i = 0; i < blockSize; ++i) {
         drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
             return DRFLAC_FALSE;
@@ -2607,7 +3959,9 @@ static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint
 
 static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
 {
-    drflac_int32 lpcCoefficientsTable[5][4] = {
+    drflac_uint32 i;
+
+    static drflac_int32 lpcCoefficientsTable[5][4] = {
         {0,  0, 0,  0},
         {1,  0, 0,  0},
         {2, -1, 0,  0},
@@ -2615,8 +3969,8 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
         {4, -6, 4, -1}
     };
 
-    // Warm up samples and coefficients.
-    for (drflac_uint32 i = 0; i < lpcOrder; ++i) {
+    /* Warm up samples and coefficients. */
+    for (i = 0; i < lpcOrder; ++i) {
         drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
             return DRFLAC_FALSE;
@@ -2625,7 +3979,6 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
         pDecodedSamples[i] = sample;
     }
 
-
     if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
         return DRFLAC_FALSE;
     }
@@ -2636,8 +3989,11 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
 static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
 {
     drflac_uint8 i;
+    drflac_uint8 lpcPrecision;
+    drflac_int8 lpcShift;
+    drflac_int32 coefficients[32];
 
-    // Warm up samples.
+    /* Warm up samples. */
     for (i = 0; i < lpcOrder; ++i) {
         drflac_int32 sample;
         if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
@@ -2647,23 +4003,19 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
         pDecodedSamples[i] = sample;
     }
 
-    drflac_uint8 lpcPrecision;
     if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
         return DRFLAC_FALSE;
     }
     if (lpcPrecision == 15) {
-        return DRFLAC_FALSE;    // Invalid.
+        return DRFLAC_FALSE;    /* Invalid. */
     }
     lpcPrecision += 1;
 
-
-    drflac_int8 lpcShift;
     if (!drflac__read_int8(bs, 5, &lpcShift)) {
         return DRFLAC_FALSE;
     }
 
-
-    drflac_int32 coefficients[32];
+    drflac_zero_memory(coefficients, sizeof(coefficients));
     for (i = 0; i < lpcOrder; ++i) {
         if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
             return DRFLAC_FALSE;
@@ -2678,71 +4030,82 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
 }
 
 
-static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
+static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
 {
+    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
+
     drflac_assert(bs != NULL);
     drflac_assert(header != NULL);
 
-    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
-    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   // -1 = reserved.
-
-    // Keep looping until we find a valid sync code.
+    /* Keep looping until we find a valid sync code. */
     for (;;) {
+        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
+        drflac_uint8 reserved = 0;
+        drflac_uint8 blockingStrategy = 0;
+        drflac_uint8 blockSize = 0;
+        drflac_uint8 sampleRate = 0;
+        drflac_uint8 channelAssignment = 0;
+        drflac_uint8 bitsPerSample = 0;
+        drflac_bool32 isVariableBlockSize;
+
         if (!drflac__find_and_seek_to_next_sync_code(bs)) {
             return DRFLAC_FALSE;
         }
 
-        drflac_uint8 crc8 = 0xCE; // 0xCE = drflac_crc8(0, 0x3FFE, 14);
-
-        drflac_uint8 reserved = 0;
         if (!drflac__read_uint8(bs, 1, &reserved)) {
             return DRFLAC_FALSE;
         }
+        if (reserved == 1) {
+            continue;
+        }
         crc8 = drflac_crc8(crc8, reserved, 1);
 
-
-        drflac_uint8 blockingStrategy = 0;
         if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
             return DRFLAC_FALSE;
         }
         crc8 = drflac_crc8(crc8, blockingStrategy, 1);
 
-
-        drflac_uint8 blockSize = 0;
         if (!drflac__read_uint8(bs, 4, &blockSize)) {
             return DRFLAC_FALSE;
         }
+        if (blockSize == 0) {
+            continue;
+        }
         crc8 = drflac_crc8(crc8, blockSize, 4);
 
-
-        drflac_uint8 sampleRate = 0;
         if (!drflac__read_uint8(bs, 4, &sampleRate)) {
             return DRFLAC_FALSE;
         }
         crc8 = drflac_crc8(crc8, sampleRate, 4);
 
-
-        drflac_uint8 channelAssignment = 0;
         if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
             return DRFLAC_FALSE;
         }
+        if (channelAssignment > 10) {
+            continue;
+        }
         crc8 = drflac_crc8(crc8, channelAssignment, 4);
 
-
-        drflac_uint8 bitsPerSample = 0;
         if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
             return DRFLAC_FALSE;
         }
+        if (bitsPerSample == 3 || bitsPerSample == 7) {
+            continue;
+        }
         crc8 = drflac_crc8(crc8, bitsPerSample, 3);
 
 
         if (!drflac__read_uint8(bs, 1, &reserved)) {
             return DRFLAC_FALSE;
         }
+        if (reserved == 1) {
+            continue;
+        }
         crc8 = drflac_crc8(crc8, reserved, 1);
 
 
-        drflac_bool32 isVariableBlockSize = blockingStrategy == 1;
+        isVariableBlockSize = blockingStrategy == 1;
         if (isVariableBlockSize) {
             drflac_uint64 sampleNumber;
             drflac_result result = drflac__read_utf8_coded_number(bs, &sampleNumber, &crc8);
@@ -2765,7 +4128,7 @@ static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8
                     continue;
                 }
             }
-            header->frameNumber  = (drflac_uint32)frameNumber;   // <-- Safe cast.
+            header->frameNumber  = (drflac_uint32)frameNumber;   /* <-- Safe cast. */
             header->sampleNumber = 0;
         }
 
@@ -2811,7 +4174,7 @@ static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8
             crc8 = drflac_crc8(crc8, header->sampleRate, 16);
             header->sampleRate *= 10;
         } else {
-            continue;  // Invalid. Assume an invalid block.
+            continue;  /* Invalid. Assume an invalid block. */
         }
 
 
@@ -2826,11 +4189,11 @@ static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8
             return DRFLAC_FALSE;
         }
 
-    #ifndef DR_FLAC_NO_CRC
+#ifndef DR_FLAC_NO_CRC
         if (header->crc8 != crc8) {
-            continue;    // CRC mismatch. Loop back to the top and find the next sync code.
+            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
         }
-    #endif
+#endif
         return DRFLAC_TRUE;
     }
 }
@@ -2838,16 +4201,18 @@ static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8
 static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
 {
     drflac_uint8 header;
+    int type;
+
     if (!drflac__read_uint8(bs, 8, &header)) {
         return DRFLAC_FALSE;
     }
 
-    // First bit should always be 0.
+    /* First bit should always be 0. */
     if ((header & 0x80) != 0) {
         return DRFLAC_FALSE;
     }
 
-    int type = (header & 0x7E) >> 1;
+    type = (header & 0x7E) >> 1;
     if (type == 0) {
         pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
     } else if (type == 1) {
@@ -2872,7 +4237,7 @@ static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe
         return DRFLAC_FALSE;
     }
 
-    // Wasted bits per sample.
+    /* Wasted bits per sample. */
     pSubframe->wastedBitsPerSample = 0;
     if ((header & 0x01) == 1) {
         unsigned int wastedBitsPerSample;
@@ -2887,15 +4252,17 @@ static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe
 
 static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
 {
+    drflac_subframe* pSubframe;
+
     drflac_assert(bs != NULL);
     drflac_assert(frame != NULL);
 
-    drflac_subframe* pSubframe = frame->subframes + subframeIndex;
+    pSubframe = frame->subframes + subframeIndex;
     if (!drflac__read_subframe_header(bs, pSubframe)) {
         return DRFLAC_FALSE;
     }
 
-    // Side channels require an extra bit per sample. Took a while to figure that one out...
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
     pSubframe->bitsPerSample = frame->header.bitsPerSample;
     if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
         pSubframe->bitsPerSample += 1;
@@ -2903,7 +4270,10 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
         pSubframe->bitsPerSample += 1;
     }
 
-    // Need to handle wasted bits per sample.
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= pSubframe->bitsPerSample) {
+        return DRFLAC_FALSE;
+    }
     pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample;
     pSubframe->pDecodedSamples = pDecodedSamplesOut;
 
@@ -2937,15 +4307,17 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
 
 static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
 {
+    drflac_subframe* pSubframe;
+
     drflac_assert(bs != NULL);
     drflac_assert(frame != NULL);
 
-    drflac_subframe* pSubframe = frame->subframes + subframeIndex;
+    pSubframe = frame->subframes + subframeIndex;
     if (!drflac__read_subframe_header(bs, pSubframe)) {
         return DRFLAC_FALSE;
     }
 
-    // Side channels require an extra bit per sample. Took a while to figure that one out...
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
     pSubframe->bitsPerSample = frame->header.bitsPerSample;
     if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
         pSubframe->bitsPerSample += 1;
@@ -2953,7 +4325,10 @@ static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, i
         pSubframe->bitsPerSample += 1;
     }
 
-    // Need to handle wasted bits per sample.
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= pSubframe->bitsPerSample) {
+        return DRFLAC_FALSE;
+    }
     pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample;
     pSubframe->pDecodedSamples = NULL;
 
@@ -2988,22 +4363,23 @@ static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, i
 
         case DRFLAC_SUBFRAME_LPC:
         {
+            unsigned char lpcPrecision;
+
             unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample;
             if (!drflac__seek_bits(bs, bitsToSeek)) {
                 return DRFLAC_FALSE;
             }
 
-            unsigned char lpcPrecision;
             if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
                 return DRFLAC_FALSE;
             }
             if (lpcPrecision == 15) {
-                return DRFLAC_FALSE;    // Invalid.
+                return DRFLAC_FALSE;    /* Invalid. */
             }
             lpcPrecision += 1;
 
 
-            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    // +5 for shift.
+            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
             if (!drflac__seek_bits(bs, bitsToSeek)) {
                 return DRFLAC_FALSE;
             }
@@ -3022,35 +4398,43 @@ static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, i
 
 static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
 {
-    drflac_assert(channelAssignment <= 10);
-
     drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+
+    drflac_assert(channelAssignment <= 10);
     return lookup[channelAssignment];
 }
 
-static drflac_result drflac__decode_frame(drflac* pFlac)
+static drflac_result drflac__decode_flac_frame(drflac* pFlac)
 {
-    // This function should be called while the stream is sitting on the first byte after the frame header.
+    int channelCount;
+    int i;
+    drflac_uint8 paddingSizeInBits;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    /* This function should be called while the stream is sitting on the first byte after the frame header. */
     drflac_zero_memory(pFlac->currentFrame.subframes, sizeof(pFlac->currentFrame.subframes));
 
-    // The frame block size must never be larger than the maximum block size defined by the FLAC stream.
+    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
     if (pFlac->currentFrame.header.blockSize > pFlac->maxBlockSize) {
         return DRFLAC_ERROR;
     }
 
-    // The number of channels in the frame must match the channel count from the STREAMINFO block.
-    int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
     if (channelCount != (int)pFlac->channels) {
         return DRFLAC_ERROR;
     }
 
-    for (int i = 0; i < channelCount; ++i) {
-        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + (pFlac->currentFrame.header.blockSize * i))) {
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + ((pFlac->currentFrame.header.blockSize+DRFLAC_LEADING_SAMPLES) * i) + DRFLAC_LEADING_SAMPLES)) {
             return DRFLAC_ERROR;
         }
     }
 
-    drflac_uint8 paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7;
+    paddingSizeInBits = DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7;
     if (paddingSizeInBits > 0) {
         drflac_uint8 padding = 0;
         if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
@@ -3059,16 +4443,15 @@ static drflac_result drflac__decode_frame(drflac* pFlac)
     }
 
 #ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
 #endif
-    drflac_uint16 desiredCRC16;
     if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
         return DRFLAC_END_OF_STREAM;
     }
 
 #ifndef DR_FLAC_NO_CRC
     if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    // CRC mismatch.
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
     }
 #endif
 
@@ -3077,51 +4460,59 @@ static drflac_result drflac__decode_frame(drflac* pFlac)
     return DRFLAC_SUCCESS;
 }
 
-static drflac_result drflac__seek_frame(drflac* pFlac)
+static drflac_result drflac__seek_flac_frame(drflac* pFlac)
 {
-    int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
-    for (int i = 0; i < channelCount; ++i) {
+    int channelCount;
+    int i;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+    for (i = 0; i < channelCount; ++i) {
         if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFrame, i)) {
             return DRFLAC_ERROR;
         }
     }
 
-    // Padding.
+    /* Padding. */
     if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
         return DRFLAC_ERROR;
     }
 
-    // CRC.
+    /* CRC. */
 #ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
 #endif
-    drflac_uint16 desiredCRC16;
     if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
         return DRFLAC_END_OF_STREAM;
     }
 
 #ifndef DR_FLAC_NO_CRC
     if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    // CRC mismatch.
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
     }
 #endif
 
     return DRFLAC_SUCCESS;
 }
 
-static drflac_bool32 drflac__read_and_decode_next_frame(drflac* pFlac)
+static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
 {
     drflac_assert(pFlac != NULL);
 
     for (;;) {
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        drflac_result result;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
 
-        drflac_result result = drflac__decode_frame(pFlac);
+        result = drflac__decode_flac_frame(pFlac);
         if (result != DRFLAC_SUCCESS) {
             if (result == DRFLAC_CRC_MISMATCH) {
-                continue;   // CRC mismatch. Skip to the next frame.
+                continue;   /* CRC mismatch. Skip to the next frame. */
             } else {
                 return DRFLAC_FALSE;
             }
@@ -3134,29 +4525,67 @@ static drflac_bool32 drflac__read_and_decode_next_frame(drflac* pFlac)
 
 static void drflac__get_current_frame_sample_range(drflac* pFlac, drflac_uint64* pFirstSampleInFrameOut, drflac_uint64* pLastSampleInFrameOut)
 {
+    unsigned int channelCount;
+    drflac_uint64 firstSampleInFrame;
+    drflac_uint64 lastSampleInFrame;
+
     drflac_assert(pFlac != NULL);
 
-    unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
 
-    drflac_uint64 firstSampleInFrame = pFlac->currentFrame.header.sampleNumber;
+    firstSampleInFrame = pFlac->currentFrame.header.sampleNumber*channelCount;
     if (firstSampleInFrame == 0) {
         firstSampleInFrame = pFlac->currentFrame.header.frameNumber * pFlac->maxBlockSize*channelCount;
     }
 
-    drflac_uint64 lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channelCount);
+    lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.header.blockSize*channelCount);
     if (lastSampleInFrame > 0) {
-        lastSampleInFrame -= 1; // Needs to be zero based.
+        lastSampleInFrame -= 1; /* Needs to be zero based. */
     }
 
-    if (pFirstSampleInFrameOut) *pFirstSampleInFrameOut = firstSampleInFrame;
-    if (pLastSampleInFrameOut) *pLastSampleInFrameOut = lastSampleInFrame;
+    if (pFirstSampleInFrameOut) {
+        *pFirstSampleInFrameOut = firstSampleInFrame;
+    }
+    if (pLastSampleInFrameOut) {
+        *pLastSampleInFrameOut = lastSampleInFrame;
+    }
 }
 
+/* This function will be replacing drflac__get_current_frame_sample_range(), but it's not currently used so I have commented it out to silence a compiler warning. */
+#if 0
+static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 lastPCMFrame;
+
+    drflac_assert(pFlac != NULL);
+
+    firstPCMFrame = pFlac->currentFrame.header.sampleNumber;
+    if (firstPCMFrame == 0) {
+        firstPCMFrame = pFlac->currentFrame.header.frameNumber * pFlac->maxBlockSize;
+    }
+
+    lastPCMFrame = firstPCMFrame + (pFlac->currentFrame.header.blockSize);
+    if (lastPCMFrame > 0) {
+        lastPCMFrame -= 1; /* Needs to be zero based. */
+    }
+
+    if (pFirstPCMFrame) {
+        *pFirstPCMFrame = firstPCMFrame;
+    }
+    if (pLastPCMFrame) {
+        *pLastPCMFrame = lastPCMFrame;
+    }
+}
+#endif
+
 static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
 {
+    drflac_bool32 result;
+
     drflac_assert(pFlac != NULL);
 
-    drflac_bool32 result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos);
+    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos);
 
     drflac_zero_memory(&pFlac->currentFrame, sizeof(pFlac->currentFrame));
     pFlac->currentSample = 0;
@@ -3164,94 +4593,134 @@ static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
     return result;
 }
 
-static DRFLAC_INLINE drflac_result drflac__seek_to_next_frame(drflac* pFlac)
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
 {
-    // This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section.
+    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
     drflac_assert(pFlac != NULL);
-    return drflac__seek_frame(pFlac);
+    return drflac__seek_flac_frame(pFlac);
+}
+
+drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead)
+{
+    drflac_uint64 samplesRead = 0;
+    while (samplesToRead > 0) {
+        if (pFlac->currentFrame.samplesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            if (pFlac->currentFrame.samplesRemaining > samplesToRead) {
+                samplesRead   += samplesToRead;
+                pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead;   /* <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536. */
+                samplesToRead  = 0;
+            } else {
+                samplesRead   += pFlac->currentFrame.samplesRemaining;
+                samplesToRead -= pFlac->currentFrame.samplesRemaining;
+                pFlac->currentFrame.samplesRemaining = 0;
+            }
+        }
+    }
+
+    pFlac->currentSample += samplesRead;
+    return samplesRead;
+}
+
+drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
+{
+    return drflac__seek_forward_by_samples(pFlac, pcmFramesToSeek*pFlac->channels);
 }
 
 static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_uint64 sampleIndex)
 {
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningSampleCount;
+
     drflac_assert(pFlac != NULL);
 
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-
-    // If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file.
-    drflac_uint64 runningSampleCount;
+    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
     if (sampleIndex >= pFlac->currentSample) {
-        // Seeking forward. Need to seek from the current position.
+        /* Seeking forward. Need to seek from the current position. */
         runningSampleCount = pFlac->currentSample;
 
-        // The frame header for the first frame may not yet have been read. We need to do that if necessary.
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
         if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) {
-            if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
                 return DRFLAC_FALSE;
             }
         } else {
             isMidFrame = DRFLAC_TRUE;
         }
     } else {
-        // Seeking backwards. Need to seek from the start of the file.
+        /* Seeking backwards. Need to seek from the start of the file. */
         runningSampleCount = 0;
 
-        // Move back to the start.
+        /* Move back to the start. */
         if (!drflac__seek_to_first_frame(pFlac)) {
             return DRFLAC_FALSE;
         }
 
-        // Decode the first frame in preparation for sample-exact seeking below.
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        /* Decode the first frame in preparation for sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
     }
 
-    // We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
-    // header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    /*
+    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
+    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    */
     for (;;) {
+        drflac_uint64 sampleCountInThisFrame;
         drflac_uint64 firstSampleInFrame = 0;
         drflac_uint64 lastSampleInFrame = 0;
+
         drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
 
-        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
         if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
-            // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            // it never existed and keep iterating.
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
             drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount;
 
             if (!isMidFrame) {
-                drflac_result result = drflac__decode_frame(pFlac);
+                drflac_result result = drflac__decode_flac_frame(pFlac);
                 if (result == DRFLAC_SUCCESS) {
-                    // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
-                    return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;  // <-- If this fails, something bad has happened (it should never fail).
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
                 } else {
                     if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   // CRC mismatch. Pretend this frame never existed.
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
                     } else {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
-                // We started seeking mid-frame which means we need to skip the frame decoding part.
-                return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode;
             }
         } else {
-            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            // frame never existed and leave the running sample count untouched.
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
             if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_frame(pFlac);
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
                 if (result == DRFLAC_SUCCESS) {
                     runningSampleCount += sampleCountInThisFrame;
                 } else {
                     if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   // CRC mismatch. Pretend this frame never existed.
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
                     } else {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
-                // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
                 runningSampleCount += pFlac->currentFrame.samplesRemaining;
                 pFlac->currentFrame.samplesRemaining = 0;
                 isMidFrame = DRFLAC_FALSE;
@@ -3259,8 +4728,8 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u
         }
 
     next_iteration:
-        // Grab the next frame in preparation for the next iteration.
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
     }
@@ -3269,15 +4738,18 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u
 
 static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_uint64 sampleIndex)
 {
+    drflac_uint32 iClosestSeekpoint = 0;
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningSampleCount;
+    drflac_uint32 iSeekpoint;
+
     drflac_assert(pFlac != NULL);
 
     if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
         return DRFLAC_FALSE;
     }
 
-
-    drflac_uint32 iClosestSeekpoint = 0;
-    for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
         if (pFlac->pSeekpoints[iSeekpoint].firstSample*pFlac->channels >= sampleIndex) {
             break;
         }
@@ -3285,82 +4757,87 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui
         iClosestSeekpoint = iSeekpoint;
     }
 
-
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-
-    // At this point we should have found the seekpoint closest to our sample. If we are seeking forward and the closest seekpoint is _before_ the current sample, we
-    // just seek forward from where we are. Otherwise we start seeking from the seekpoint's first sample.
-    drflac_uint64 runningSampleCount;
+    /*
+    At this point we should have found the seekpoint closest to our sample. If we are seeking forward and the closest seekpoint is _before_ the current sample, we
+    just seek forward from where we are. Otherwise we start seeking from the seekpoint's first sample.
+    */
     if ((sampleIndex >= pFlac->currentSample) && (pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels <= pFlac->currentSample)) {
-        // Optimized case. Just seek forward from where we are.
+        /* Optimized case. Just seek forward from where we are. */
         runningSampleCount = pFlac->currentSample;
 
-        // The frame header for the first frame may not yet have been read. We need to do that if necessary.
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
         if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) {
-            if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
                 return DRFLAC_FALSE;
             }
         } else {
             isMidFrame = DRFLAC_TRUE;
         }
     } else {
-        // Slower case. Seek to the start of the seekpoint and then seek forward from there.
+        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
         runningSampleCount = pFlac->pSeekpoints[iClosestSeekpoint].firstSample*pFlac->channels;
 
         if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos + pFlac->pSeekpoints[iClosestSeekpoint].frameOffset)) {
             return DRFLAC_FALSE;
         }
 
-        // Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below.
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
     }
 
     for (;;) {
+        drflac_uint64 sampleCountInThisFrame;
         drflac_uint64 firstSampleInFrame = 0;
         drflac_uint64 lastSampleInFrame = 0;
         drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
 
-        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
         if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
-            // The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
-            // it never existed and keep iterating.
+            /*
+            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
+            it never existed and keep iterating.
+            */
             drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount;
 
             if (!isMidFrame) {
-                drflac_result result = drflac__decode_frame(pFlac);
+                drflac_result result = drflac__decode_flac_frame(pFlac);
                 if (result == DRFLAC_SUCCESS) {
-                    // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
-                    return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;  // <-- If this fails, something bad has happened (it should never fail).
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
                 } else {
                     if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   // CRC mismatch. Pretend this frame never existed.
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
                     } else {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
-                // We started seeking mid-frame which means we need to skip the frame decoding part.
-                return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode;
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode;
             }
         } else {
-            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            // frame never existed and leave the running sample count untouched.
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
             if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_frame(pFlac);
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
                 if (result == DRFLAC_SUCCESS) {
                     runningSampleCount += sampleCountInThisFrame;
                 } else {
                     if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   // CRC mismatch. Pretend this frame never existed.
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
                     } else {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
-                // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
                 runningSampleCount += pFlac->currentFrame.samplesRemaining;
                 pFlac->currentFrame.samplesRemaining = 0;
                 isMidFrame = DRFLAC_FALSE;
@@ -3368,8 +4845,8 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui
         }
 
     next_iteration:
-        // Grab the next frame in preparation for the next iteration.
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
     }
@@ -3379,8 +4856,8 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui
 #ifndef DR_FLAC_NO_OGG
 typedef struct
 {
-    drflac_uint8 capturePattern[4];  // Should be "OggS"
-    drflac_uint8 structureVersion;   // Always 0.
+    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
+    drflac_uint8 structureVersion;   /* Always 0. */
     drflac_uint8 headerType;
     drflac_uint64 granulePosition;
     drflac_uint32 serialNumber;
@@ -3407,8 +4884,8 @@ typedef struct
     drflac_uint64 runningFilePos;
     drflac_bool32 hasStreamInfoBlock;
     drflac_bool32 hasMetadataBlocks;
-    drflac_bs bs;                           // <-- A bit streamer is required for loading data during initialization.
-    drflac_frame_header firstFrameHeader;   // <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block.
+    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
+    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
 
 #ifndef DR_FLAC_NO_OGG
     drflac_uint32 oggSerial;
@@ -3438,26 +4915,27 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_r
 
 drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
 {
-    // min/max block size.
     drflac_uint32 blockSizes;
+    drflac_uint64 frameSizes = 0;
+    drflac_uint64 importantProps;
+    drflac_uint8 md5[16];
+
+    /* min/max block size. */
     if (onRead(pUserData, &blockSizes, 4) != 4) {
         return DRFLAC_FALSE;
     }
 
-    // min/max frame size.
-    drflac_uint64 frameSizes = 0;
+    /* min/max frame size. */
     if (onRead(pUserData, &frameSizes, 6) != 6) {
         return DRFLAC_FALSE;
     }
 
-    // Sample rate, channels, bits per sample and total sample count.
-    drflac_uint64 importantProps;
+    /* Sample rate, channels, bits per sample and total sample count. */
     if (onRead(pUserData, &importantProps, 8) != 8) {
         return DRFLAC_FALSE;
     }
 
-    // MD5
-    drflac_uint8 md5[16];
+    /* MD5 */
     if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
         return DRFLAC_FALSE;
     }
@@ -3467,13 +4945,13 @@ drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData,
     importantProps = drflac__be2host_64(importantProps);
 
     pStreamInfo->minBlockSize     = (blockSizes & 0xFFFF0000) >> 16;
-    pStreamInfo->maxBlockSize     = blockSizes & 0x0000FFFF;
-    pStreamInfo->minFrameSize     = (drflac_uint32)((frameSizes     & (drflac_uint64)0xFFFFFF0000000000) >> 40);
-    pStreamInfo->maxFrameSize     = (drflac_uint32)((frameSizes     & (drflac_uint64)0x000000FFFFFF0000) >> 16);
-    pStreamInfo->sampleRate       = (drflac_uint32)((importantProps & (drflac_uint64)0xFFFFF00000000000) >> 44);
-    pStreamInfo->channels         = (drflac_uint8 )((importantProps & (drflac_uint64)0x00000E0000000000) >> 41) + 1;
-    pStreamInfo->bitsPerSample    = (drflac_uint8 )((importantProps & (drflac_uint64)0x000001F000000000) >> 36) + 1;
-    pStreamInfo->totalSampleCount = (importantProps & (drflac_uint64)0x0000000FFFFFFFFF) * pStreamInfo->channels;
+    pStreamInfo->maxBlockSize     = (blockSizes & 0x0000FFFF);
+    pStreamInfo->minFrameSize     = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
+    pStreamInfo->maxFrameSize     = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
+    pStreamInfo->sampleRate       = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
+    pStreamInfo->channels         = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
+    pStreamInfo->bitsPerSample    = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
+    pStreamInfo->totalSampleCount =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF))) * pStreamInfo->channels;
     drflac_copy_memory(pStreamInfo->md5, md5, sizeof(md5));
 
     return DRFLAC_TRUE;
@@ -3481,13 +4959,16 @@ drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData,
 
 drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize)
 {
-    // We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
-    // we'll be sitting on byte 42.
+    /*
+    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
+    we'll be sitting on byte 42.
+    */
     drflac_uint64 runningFilePos = 42;
     drflac_uint64 seektablePos   = 0;
     drflac_uint32 seektableSize  = 0;
 
     for (;;) {
+        drflac_metadata metadata;
         drflac_uint8 isLastBlock = 0;
         drflac_uint8 blockType;
         drflac_uint32 blockSize;
@@ -3496,8 +4977,6 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
         }
         runningFilePos += 4;
 
-
-        drflac_metadata metadata;
         metadata.type = blockType;
         metadata.pRawData = NULL;
         metadata.rawDataSize = 0;
@@ -3506,6 +4985,10 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
         {
             case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
             {
+                if (blockSize < 4) {
+                    return DRFLAC_FALSE;
+                }
+
                 if (onMeta) {
                     void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
@@ -3534,7 +5017,10 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                 seektableSize = blockSize;
 
                 if (onMeta) {
-                    void* pRawData = DRFLAC_MALLOC(blockSize);
+                    drflac_uint32 iSeekpoint;
+                    void* pRawData;
+
+                    pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
                     }
@@ -3549,8 +5035,8 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                     metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
                     metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
 
-                    // Endian swap.
-                    for (drflac_uint32 iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
+                    /* Endian swap. */
+                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
                         drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
                         pSeekpoint->firstSample = drflac__be2host_64(pSeekpoint->firstSample);
                         pSeekpoint->frameOffset = drflac__be2host_64(pSeekpoint->frameOffset);
@@ -3565,8 +5051,17 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
 
             case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
             {
+                if (blockSize < 8) {
+                    return DRFLAC_FALSE;
+                }
+
                 if (onMeta) {
-                    void* pRawData = DRFLAC_MALLOC(blockSize);
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint32 i;
+
+                    pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
                     }
@@ -3579,11 +5074,43 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
-                    const char* pRunningData = (const char*)pRawData;
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.vorbis_comment.vendor       = pRunningData;                                      pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.vorbis_comment.comments     = pRunningData;
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
+                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.pComments    = pRunningData;
+
+                    /* Check that the comments section is valid before passing it to the callback */
+                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
+                        drflac_uint32 commentLength;
+
+                        if (pRunningDataEnd - pRunningData < 4) {
+                            DRFLAC_FREE(pRawData);
+                            return DRFLAC_FALSE;
+                        }
+
+                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                            DRFLAC_FREE(pRawData);
+                            return DRFLAC_FALSE;
+                        }
+                        pRunningData += commentLength;
+                    }
+
                     onMeta(pUserDataMD, &metadata);
 
                     DRFLAC_FREE(pRawData);
@@ -3592,8 +5119,18 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
 
             case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
             {
+                if (blockSize < 396) {
+                    return DRFLAC_FALSE;
+                }
+
                 if (onMeta) {
-                    void* pRawData = DRFLAC_MALLOC(blockSize);
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint8 iTrack;
+                    drflac_uint8 iIndex;
+
+                    pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
                     }
@@ -3606,12 +5143,42 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
-                    const char* pRunningData = (const char*)pRawData;
-                    drflac_copy_memory(metadata.data.cuesheet.catalog, pRunningData, 128);                        pRunningData += 128;
-                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(drflac_uint64*)pRunningData); pRunningData += 4;
-                    metadata.data.cuesheet.isCD              = ((pRunningData[0] & 0x80) >> 7) != 0;              pRunningData += 259;
-                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                   pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = (const drflac_uint8*)pRunningData;
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    drflac_copy_memory(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
+                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = pRunningData;
+
+                    /* Check that the cuesheet tracks are valid before passing it to the callback */
+                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                        drflac_uint8 indexCount;
+                        drflac_uint32 indexPointSize;
+
+                        if (pRunningDataEnd - pRunningData < 36) {
+                            DRFLAC_FREE(pRawData);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Skip to the index point count */
+                        pRunningData += 35;
+                        indexCount = pRunningData[0]; pRunningData += 1;
+                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                            DRFLAC_FREE(pRawData);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Endian swap. */
+                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
+                            pRunningData += sizeof(drflac_cuesheet_track_index);
+                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        }
+                    }
+
                     onMeta(pUserDataMD, &metadata);
 
                     DRFLAC_FREE(pRawData);
@@ -3620,8 +5187,16 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
 
             case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
             {
+                if (blockSize < 32) {
+                    return DRFLAC_FALSE;
+                }
+
                 if (onMeta) {
-                    void* pRawData = DRFLAC_MALLOC(blockSize);
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+
+                    pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
                     }
@@ -3634,18 +5209,39 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                     metadata.pRawData = pRawData;
                     metadata.rawDataSize = blockSize;
 
-                    const char* pRunningData = (const char*)pRawData;
-                    metadata.data.picture.type              = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength        = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mime              = pRunningData;                                      pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.description       = pRunningData;
-                    metadata.data.picture.width             = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height            = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth        = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount   = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize   = drflac__be2host_32(*(drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pPictureData      = (const drflac_uint8*)pRunningData;
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
+
+                    /* Need space for the picture after the block */
+                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        DRFLAC_FREE(pRawData);
+                        return DRFLAC_FALSE;
+                    }
+
                     onMeta(pUserDataMD, &metadata);
 
                     DRFLAC_FREE(pRawData);
@@ -3657,9 +5253,9 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
                 if (onMeta) {
                     metadata.data.padding.unused = 0;
 
-                    // Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback.
+                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
                     if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop.
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
                     } else {
                         onMeta(pUserDataMD, &metadata);
                     }
@@ -3668,18 +5264,20 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
 
             case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
             {
-                // Invalid chunk. Just skip over this one.
+                /* Invalid chunk. Just skip over this one. */
                 if (onMeta) {
                     if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  // An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop.
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
                     }
                 }
             } break;
 
             default:
             {
-                // It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
-                // can at the very least report the chunk to the application and let it look at the raw data.
+                /*
+                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
+                can at the very least report the chunk to the application and let it look at the raw data.
+                */
                 if (onMeta) {
                     void* pRawData = DRFLAC_MALLOC(blockSize);
                     if (pRawData == NULL) {
@@ -3700,7 +5298,7 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
             } break;
         }
 
-        // If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above.
+        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
         if (onMeta == NULL && blockSize > 0) {
             if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
                 isLastBlock = DRFLAC_TRUE;
@@ -3722,42 +5320,45 @@ drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_s
 
 drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
 {
-    (void)onSeek;
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
 
-    // Pre: The bit stream should be sitting just past the 4-byte id header.
-
-    pInit->container = drflac_container_native;
-
-    // The first metadata block should be the STREAMINFO block.
     drflac_uint8 isLastBlock;
     drflac_uint8 blockType;
     drflac_uint32 blockSize;
+
+    (void)onSeek;
+
+    pInit->container = drflac_container_native;
+
+    /* The first metadata block should be the STREAMINFO block. */
     if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
         return DRFLAC_FALSE;
     }
 
     if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
         if (!relaxed) {
-            // We're opening in strict mode and the first block is not the STREAMINFO block. Error.
+            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
             return DRFLAC_FALSE;
         } else {
-            // Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
-            // for that frame.
+            /*
+            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            for that frame.
+            */
             pInit->hasStreamInfoBlock = DRFLAC_FALSE;
             pInit->hasMetadataBlocks  = DRFLAC_FALSE;
 
-            if (!drflac__read_next_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
-                return DRFLAC_FALSE;    // Couldn't find a frame.
+            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    /* Couldn't find a frame. */
             }
 
             if (pInit->firstFrameHeader.bitsPerSample == 0) {
-                return DRFLAC_FALSE;    // Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist.
+                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
             }
 
             pInit->sampleRate    = pInit->firstFrameHeader.sampleRate;
             pInit->channels      = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
             pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample;
-            pInit->maxBlockSize  = 65535;   // <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo
+            pInit->maxBlockSize  = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
             return DRFLAC_TRUE;
         }
     } else {
@@ -3771,8 +5372,8 @@ drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_
         pInit->channels           = streaminfo.channels;
         pInit->bitsPerSample      = streaminfo.bitsPerSample;
         pInit->totalSampleCount   = streaminfo.totalSampleCount;
-        pInit->maxBlockSize       = streaminfo.maxBlockSize;    // Don't care about the min block size - only the max (used for determining the size of the memory allocation).
-        pInit->hasMetadataBlocks = !isLastBlock;
+        pInit->maxBlockSize       = streaminfo.maxBlockSize;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
+        pInit->hasMetadataBlocks  = !isLastBlock;
 
         if (onMeta) {
             drflac_metadata metadata;
@@ -3789,7 +5390,7 @@ drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_
 
 #ifndef DR_FLAC_NO_OGG
 #define DRFLAC_OGG_MAX_PAGE_SIZE            65307
-#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  // CRC-32 of "OggS".
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
 
 typedef enum
 {
@@ -3797,7 +5398,7 @@ typedef enum
     drflac_ogg_fail_on_crc_mismatch
 } drflac_ogg_crc_mismatch_recovery;
 
-
+#ifndef DR_FLAC_NO_CRC
 static drflac_uint32 drflac__crc32_table[] = {
     0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
     0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
@@ -3864,6 +5465,7 @@ static drflac_uint32 drflac__crc32_table[] = {
     0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
     0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
 };
+#endif
 
 static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
 {
@@ -3895,8 +5497,9 @@ static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drfl
 
 static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
 {
-    // This can be optimized.
-    for (drflac_uint32 i = 0; i < dataSize; ++i) {
+    /* This can be optimized. */
+    drflac_uint32 i;
+    for (i = 0; i < dataSize; ++i) {
         crc32 = drflac_crc32_byte(crc32, pData[i]);
     }
     return crc32;
@@ -3916,7 +5519,9 @@ static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_p
 static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
 {
     drflac_uint32 pageBodySize = 0;
-    for (int i = 0; i < pHeader->segmentCount; ++i) {
+    int i;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
         pageBodySize += pHeader->segmentTable[i];
     }
 
@@ -3925,9 +5530,11 @@ static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_pag
 
 drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
 {
+    drflac_uint8 data[23];
+    drflac_uint32 i;
+
     drflac_assert(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
 
-    drflac_uint8 data[23];
     if (onRead(pUserData, data, 23) != 23) {
         return DRFLAC_END_OF_STREAM;
     }
@@ -3941,13 +5548,12 @@ drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_pro
     drflac_copy_memory(&pHeader->checksum,        &data[18], 4);
     pHeader->segmentCount     = data[22];
 
-    // Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0.
+    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
     data[18] = 0;
     data[19] = 0;
     data[20] = 0;
     data[21] = 0;
 
-    drflac_uint32 i;
     for (i = 0; i < 23; ++i) {
         *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
     }
@@ -3967,20 +5573,23 @@ drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_pro
 
 drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
 {
+    drflac_uint8 id[4];
+
     *pBytesRead = 0;
 
-    drflac_uint8 id[4];
     if (onRead(pUserData, id, 4) != 4) {
         return DRFLAC_END_OF_STREAM;
     }
     *pBytesRead += 4;
 
-    // We need to read byte-by-byte until we find the OggS capture pattern.
+    /* We need to read byte-by-byte until we find the OggS capture pattern. */
     for (;;) {
         if (drflac_ogg__is_capture_pattern(id)) {
+            drflac_result result;
+
             *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
 
-            drflac_result result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
             if (result == DRFLAC_SUCCESS) {
                 return DRFLAC_SUCCESS;
             } else {
@@ -3991,7 +5600,7 @@ drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserD
                 }
             }
         } else {
-            // The first 4 bytes did not equal the capture pattern. Read the next byte and try again.
+            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
             id[0] = id[1];
             id[1] = id[2];
             id[2] = id[3];
@@ -4004,25 +5613,27 @@ drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserD
 }
 
 
-// The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
-// in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
-// in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
-// dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
-// the physical Ogg bitstream are converted and delivered in native FLAC format.
+/*
+The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
+in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
+in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
+dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
+the physical Ogg bitstream are converted and delivered in native FLAC format.
+*/
 typedef struct
 {
-    drflac_read_proc onRead;    // The original onRead callback from drflac_open() and family.
-    drflac_seek_proc onSeek;    // The original onSeek callback from drflac_open() and family.
-    void* pUserData;            // The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family.
-    drflac_uint64 currentBytePos;   // The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking.
-    drflac_uint64 firstBytePos;     // The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page.
-    drflac_uint32 serialNumber;     // The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization.
-    drflac_ogg_page_header bosPageHeader;   // Used for seeking.
+    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
+    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
+    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
+    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
+    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
+    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
     drflac_ogg_page_header currentPageHeader;
     drflac_uint32 bytesRemainingInPage;
     drflac_uint32 pageDataSize;
     drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
-} drflac_oggbs; // oggbs = Ogg Bitstream
+} drflac_oggbs; /* oggbs = Ogg Bitstream */
 
 static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
 {
@@ -4059,7 +5670,7 @@ static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uin
             offset -= 0x7FFFFFFF;
         }
 
-        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    // <-- Safe cast thanks to the loop above.
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
             return DRFLAC_FALSE;
         }
         oggbs->currentBytePos += offset;
@@ -4074,18 +5685,23 @@ static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_og
     for (;;) {
         drflac_uint32 crc32 = 0;
         drflac_uint32 bytesRead;
+        drflac_uint32 pageBodySize;
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32;
+#endif
+
         if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
             return DRFLAC_FALSE;
         }
         oggbs->currentBytePos += bytesRead;
 
-        drflac_uint32 pageBodySize = drflac_ogg__get_page_body_size(&header);
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
         if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
-            continue;   // Invalid page size. Assume it's corrupted and just move to the next page.
+            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
         }
 
         if (header.serialNumber != oggbs->serialNumber) {
-            // It's not a FLAC page. Skip it.
+            /* It's not a FLAC page. Skip it. */
             if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
                 return DRFLAC_FALSE;
             }
@@ -4093,27 +5709,29 @@ static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_og
         }
 
 
-        // We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page.
+        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
         if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
             return DRFLAC_FALSE;
         }
         oggbs->pageDataSize = pageBodySize;
 
 #ifndef DR_FLAC_NO_CRC
-        drflac_uint32 actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
         if (actualCRC32 != header.checksum) {
             if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
-                continue;   // CRC mismatch. Skip this page.
+                continue;   /* CRC mismatch. Skip this page. */
             } else {
-                // Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
-                // go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
-                // seek did not fully complete.
+                /*
+                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                seek did not fully complete.
+                */
                 drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
                 return DRFLAC_FALSE;
             }
         }
 #else
-        (void)recoveryMethod;   // <-- Silence a warning.
+        (void)recoveryMethod;   /* <-- Silence a warning. */
 #endif
 
         oggbs->currentPageHeader = header;
@@ -4122,7 +5740,7 @@ static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_og
     }
 }
 
-// Function below is unused at the moment, but I might be re-adding it later.
+/* Function below is unused at the moment, but I might be re-adding it later. */
 #if 0
 static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
 {
@@ -4145,7 +5763,7 @@ static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs,
 
 static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
 {
-    // The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page.
+    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
     for (;;) {
         drflac_bool32 atEndOfPage = DRFLAC_FALSE;
 
@@ -4166,24 +5784,28 @@ static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
             bytesToEndOfPacketOrPage += segmentSize;
         }
 
-        // At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
-        // want to load the next page and keep searching for the end of the packet.
+        /*
+        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
+        want to load the next page and keep searching for the end of the packet.
+        */
         drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
         oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
 
         if (atEndOfPage) {
-            // We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
-            // straddle pages.
+            /*
+            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
+            straddle pages.
+            */
             if (!drflac_oggbs__goto_next_page(oggbs)) {
                 return DRFLAC_FALSE;
             }
 
-            // If it's a fresh packet it most likely means we're at the next packet.
+            /* If it's a fresh packet it most likely means we're at the next packet. */
             if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
                 return DRFLAC_TRUE;
             }
         } else {
-            // We're at the next packet.
+            /* We're at the next packet. */
             return DRFLAC_TRUE;
         }
     }
@@ -4191,9 +5813,9 @@ static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
 
 static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
 {
-    // The bitstream should be sitting on the first byte just after the header of the frame.
+    /* The bitstream should be sitting on the first byte just after the header of the frame. */
 
-    // What we're actually doing here is seeking to the start of the next packet.
+    /* What we're actually doing here is seeking to the start of the next packet. */
     return drflac_oggbs__seek_to_next_packet(oggbs);
 }
 #endif
@@ -4201,12 +5823,13 @@ static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
 static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
     drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    drflac_assert(oggbs != NULL);
-
     drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
-
-    // Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one.
     size_t bytesRead = 0;
+
+    drflac_assert(oggbs != NULL);
+    drflac_assert(pRunningBufferOut != NULL);
+
+    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
     while (bytesRead < bytesToRead) {
         size_t bytesRemainingToRead = bytesToRead - bytesRead;
 
@@ -4217,7 +5840,7 @@ static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytes
             break;
         }
 
-        // If we get here it means some of the requested data is contained in the next pages.
+        /* If we get here it means some of the requested data is contained in the next pages. */
         if (oggbs->bytesRemainingInPage > 0) {
             drflac_copy_memory(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
             bytesRead += oggbs->bytesRemainingInPage;
@@ -4227,7 +5850,7 @@ static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytes
 
         drflac_assert(bytesRemainingToRead > 0);
         if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            break;  // Failed to go to the next page. Might have simply hit the end of the stream.
+            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
         }
     }
 
@@ -4237,10 +5860,12 @@ static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytes
 static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
 {
     drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    drflac_assert(oggbs != NULL);
-    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    int bytesSeeked = 0;
 
-    // Seeking is always forward which makes things a lot simpler.
+    drflac_assert(oggbs != NULL);
+    drflac_assert(offset >= 0);  /* <-- Never seek backwards. */
+
+    /* Seeking is always forward which makes things a lot simpler. */
     if (origin == drflac_seek_origin_start) {
         if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
             return DRFLAC_FALSE;
@@ -4253,10 +5878,8 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
         return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
     }
 
-
     drflac_assert(origin == drflac_seek_origin_current);
 
-    int bytesSeeked = 0;
     while (bytesSeeked < offset) {
         int bytesRemainingToSeek = offset - bytesSeeked;
         drflac_assert(bytesRemainingToSeek >= 0);
@@ -4267,7 +5890,7 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
             break;
         }
 
-        // If we get here it means some of the requested data is contained in the next pages.
+        /* If we get here it means some of the requested data is contained in the next pages. */
         if (oggbs->bytesRemainingInPage > 0) {
             bytesSeeked += (int)oggbs->bytesRemainingInPage;
             oggbs->bytesRemainingInPage = 0;
@@ -4275,7 +5898,7 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
 
         drflac_assert(bytesRemainingToSeek > 0);
         if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            // Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch.
+            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
             return DRFLAC_FALSE;
         }
     }
@@ -4286,38 +5909,45 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
 drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
 {
     drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+    drflac_uint64 originalBytePos;
+    drflac_uint64 runningGranulePosition;
+    drflac_uint64 runningFrameBytePos;
+    drflac_uint64 runningSampleCount;
 
-    drflac_uint64 originalBytePos = oggbs->currentBytePos;   // For recovery.
+    drflac_assert(oggbs != NULL);
 
-    // First seek to the first frame.
+    originalBytePos = oggbs->currentBytePos;   /* For recovery. */
+
+    /* First seek to the first frame. */
     if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFramePos)) {
         return DRFLAC_FALSE;
     }
     oggbs->bytesRemainingInPage = 0;
 
-    drflac_uint64 runningGranulePosition = 0;
-    drflac_uint64 runningFrameBytePos = oggbs->currentBytePos;   // <-- Points to the OggS identifier.
+    runningGranulePosition = 0;
+    runningFrameBytePos = oggbs->currentBytePos;   /* <-- Points to the OggS identifier. */
     for (;;) {
         if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
             drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-            return DRFLAC_FALSE;   // Never did find that sample...
+            return DRFLAC_FALSE;   /* Never did find that sample... */
         }
 
         runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
         if (oggbs->currentPageHeader.granulePosition*pFlac->channels >= sampleIndex) {
-            break; // The sample is somewhere in the previous page.
+            break; /* The sample is somewhere in the previous page. */
         }
 
-
-        // At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
-        // disregard any pages that do not begin a fresh packet.
-        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    // <-- Is it a fresh page?
+        /*
+        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
+        disregard any pages that do not begin a fresh packet.
+        */
+        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
             if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
                 drflac_uint8 firstBytesInPage[2];
                 firstBytesInPage[0] = oggbs->pageData[0];
                 firstBytesInPage[1] = oggbs->pageData[1];
 
-                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    // <-- Does the page begin with a frame's sync code?
+                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
                     runningGranulePosition = oggbs->currentPageHeader.granulePosition*pFlac->channels;
                 }
 
@@ -4326,11 +5956,12 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde
         }
     }
 
-
-    // We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
-    // start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
-    // a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
-    // we find the one containing the target sample.
+    /*
+    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
+    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
+    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
+    we find the one containing the target sample.
+    */
     if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
         return DRFLAC_FALSE;
     }
@@ -4338,66 +5969,75 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde
         return DRFLAC_FALSE;
     }
 
-
-    // At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
-    // looping over these frames until we find the one containing the sample we're after.
-    drflac_uint64 runningSampleCount = runningGranulePosition;
+    /*
+    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
+    looping over these frames until we find the one containing the sample we're after.
+    */
+    runningSampleCount = runningGranulePosition;
     for (;;) {
-        // There are two ways to find the sample and seek past irrelevant frames:
-        //   1) Use the native FLAC decoder.
-        //   2) Use Ogg's framing system.
-        //
-        // Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
-        // do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
-        // duplication for the decoding of frame headers.
-        //
-        // Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
-        // bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
-        // standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
-        // the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
-        // using the native FLAC decoding APIs, such as drflac__read_next_frame_header(), need to be re-implemented so as to
-        // avoid the use of the drflac_bs object.
-        //
-        // Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
-        //   1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
-        //   2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
-        //   3) Simplicity.
-        if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+        /*
+        There are two ways to find the sample and seek past irrelevant frames:
+          1) Use the native FLAC decoder.
+          2) Use Ogg's framing system.
+        
+        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        duplication for the decoding of frame headers.
+        
+        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
+        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
+        avoid the use of the drflac_bs object.
+        
+        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
+          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+          3) Simplicity.
+        */
+        drflac_uint64 firstSampleInFrame = 0;
+        drflac_uint64 lastSampleInFrame = 0;
+        drflac_uint64 sampleCountInThisFrame;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
             return DRFLAC_FALSE;
         }
 
-        drflac_uint64 firstSampleInFrame = 0;
-        drflac_uint64 lastSampleInFrame = 0;
         drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame);
 
-        drflac_uint64 sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
+        sampleCountInThisFrame = (lastSampleInFrame - firstSampleInFrame) + 1;
         if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) {
-            // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            // it never existed and keep iterating.
-            drflac_result result = drflac__decode_frame(pFlac);
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_result result = drflac__decode_flac_frame(pFlac);
             if (result == DRFLAC_SUCCESS) {
-                // The frame is valid. We just need to skip over some samples to ensure it's sample-exact.
-                drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount);    // <-- Safe cast because the maximum number of samples in a frame is 65535.
+                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
                 if (samplesToDecode == 0) {
                     return DRFLAC_TRUE;
                 }
-                return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0;  // <-- If this fails, something bad has happened (it should never fail).
+                return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
             } else {
                 if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   // CRC mismatch. Pretend this frame never existed.
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
                 } else {
                     return DRFLAC_FALSE;
                 }
             }
         } else {
-            // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            // frame never existed and leave the running sample count untouched.
-            drflac_result result = drflac__seek_to_next_frame(pFlac);
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
             if (result == DRFLAC_SUCCESS) {
                 runningSampleCount += sampleCountInThisFrame;
             } else {
                 if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   // CRC mismatch. Pretend this frame never existed.
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
                 } else {
                     return DRFLAC_FALSE;
                 }
@@ -4409,45 +6049,48 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde
 
 drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
 {
-    // Pre: The bit stream should be sitting just past the 4-byte OggS capture pattern.
+    drflac_ogg_page_header header;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
     (void)relaxed;
 
     pInit->container = drflac_container_ogg;
     pInit->oggFirstBytePos = 0;
 
-    // We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
-    // stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
-    // any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
-    drflac_ogg_page_header header;
-
-    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
-    drflac_uint32 bytesRead = 0;
+    /*
+    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
+    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
+    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
+    */
     if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
         return DRFLAC_FALSE;
     }
     pInit->runningFilePos += bytesRead;
 
     for (;;) {
-        // Break if we're past the beginning of stream page.
+        int pageBodySize;
+
+        /* Break if we're past the beginning of stream page. */
         if ((header.headerType & 0x02) == 0) {
             return DRFLAC_FALSE;
         }
 
-
-        // Check if it's a FLAC header.
-        int pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize == 51) {   // 51 = the lacing value of the FLAC header packet.
-            // It could be a FLAC page...
+        /* Check if it's a FLAC header. */
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
+            /* It could be a FLAC page... */
             drflac_uint32 bytesRemainingInPage = pageBodySize;
-
             drflac_uint8 packetType;
+
             if (onRead(pUserData, &packetType, 1) != 1) {
                 return DRFLAC_FALSE;
             }
 
             bytesRemainingInPage -= 1;
             if (packetType == 0x7F) {
-                // Increasingly more likely to be a FLAC page...
+                /* Increasingly more likely to be a FLAC page... */
                 drflac_uint8 sig[4];
                 if (onRead(pUserData, sig, 4) != 4) {
                     return DRFLAC_FALSE;
@@ -4455,29 +6098,32 @@ drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_pro
 
                 bytesRemainingInPage -= 4;
                 if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
-                    // Almost certainly a FLAC page...
+                    /* Almost certainly a FLAC page... */
                     drflac_uint8 mappingVersion[2];
                     if (onRead(pUserData, mappingVersion, 2) != 2) {
                         return DRFLAC_FALSE;
                     }
 
                     if (mappingVersion[0] != 1) {
-                        return DRFLAC_FALSE;   // Only supporting version 1.x of the Ogg mapping.
+                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
                     }
 
-                    // The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
-                    // be handling it in a generic way based on the serial number and packet types.
+                    /*
+                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
+                    be handling it in a generic way based on the serial number and packet types.
+                    */
                     if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
                         return DRFLAC_FALSE;
                     }
 
-                    // Expecting the native FLAC signature "fLaC".
+                    /* Expecting the native FLAC signature "fLaC". */
                     if (onRead(pUserData, sig, 4) != 4) {
                         return DRFLAC_FALSE;
                     }
 
                     if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
-                        // The remaining data in the page should be the STREAMINFO block.
+                        /* The remaining data in the page should be the STREAMINFO block. */
+                        drflac_streaminfo streaminfo;
                         drflac_uint8 isLastBlock;
                         drflac_uint8 blockType;
                         drflac_uint32 blockSize;
@@ -4486,12 +6132,11 @@ drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_pro
                         }
 
                         if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-                            return DRFLAC_FALSE;    // Invalid block type. First block must be the STREAMINFO block.
+                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
                         }
 
-                        drflac_streaminfo streaminfo;
                         if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-                            // Success!
+                            /* Success! */
                             pInit->hasStreamInfoBlock = DRFLAC_TRUE;
                             pInit->sampleRate         = streaminfo.sampleRate;
                             pInit->channels           = streaminfo.channels;
@@ -4510,26 +6155,26 @@ drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_pro
                             }
 
                             pInit->runningFilePos  += pageBodySize;
-                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   // Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page.
+                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
                             pInit->oggSerial        = header.serialNumber;
                             pInit->oggBosHeader     = header;
                             break;
                         } else {
-                            // Failed to read STREAMINFO block. Aww, so close...
+                            /* Failed to read STREAMINFO block. Aww, so close... */
                             return DRFLAC_FALSE;
                         }
                     } else {
-                        // Invalid file.
+                        /* Invalid file. */
                         return DRFLAC_FALSE;
                     }
                 } else {
-                    // Not a FLAC header. Skip it.
+                    /* Not a FLAC header. Skip it. */
                     if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
-                // Not a FLAC header. Seek past the entire page and move on to the next.
+                /* Not a FLAC header. Seek past the entire page and move on to the next. */
                 if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
                     return DRFLAC_FALSE;
                 }
@@ -4543,24 +6188,28 @@ drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_pro
         pInit->runningFilePos += pageBodySize;
 
 
-        // Read the header of the next page.
+        /* Read the header of the next page. */
         if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
             return DRFLAC_FALSE;
         }
         pInit->runningFilePos += bytesRead;
     }
 
-
-    // If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
-    // packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
-    // Ogg bistream object.
-    pInit->hasMetadataBlocks = DRFLAC_TRUE;    // <-- Always have at least VORBIS_COMMENT metadata block.
+    /*
+    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
+    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
+    Ogg bistream object.
+    */
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
     return DRFLAC_TRUE;
 }
 #endif
 
 drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
 {
+    drflac_bool32 relaxed;
+    drflac_uint8 id[4];
+
     if (pInit == NULL || onRead == NULL || onSeek == NULL) {
         return DRFLAC_FALSE;
     }
@@ -4579,27 +6228,28 @@ drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onR
     drflac__reset_cache(&pInit->bs);
 
 
-    // If the container is explicitly defined then we can try opening in relaxed mode.
-    drflac_bool32 relaxed = container != drflac_container_unknown;
+    /* If the container is explicitly defined then we can try opening in relaxed mode. */
+    relaxed = container != drflac_container_unknown;
 
-    drflac_uint8 id[4];
-
-    // Skip over any ID3 tags.
+    /* Skip over any ID3 tags. */
     for (;;) {
         if (onRead(pUserData, id, 4) != 4) {
-            return DRFLAC_FALSE;    // Ran out of data.
+            return DRFLAC_FALSE;    /* Ran out of data. */
         }
         pInit->runningFilePos += 4;
 
         if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
             drflac_uint8 header[6];
+            drflac_uint8 flags;
+            drflac_uint32 headerSize;
+
             if (onRead(pUserData, header, 6) != 6) {
-                return DRFLAC_FALSE;    // Ran out of data.
+                return DRFLAC_FALSE;    /* Ran out of data. */
             }
             pInit->runningFilePos += 6;
 
-            drflac_uint8 flags = header[1];
-            drflac_uint32 headerSize;
+            flags = header[1];
+
             drflac_copy_memory(&headerSize, header+2, 4);
             headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
             if (flags & 0x10) {
@@ -4607,7 +6257,7 @@ drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onR
             }
 
             if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;    // Failed to seek past the tag.
+                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
             }
             pInit->runningFilePos += headerSize;
         } else {
@@ -4624,7 +6274,7 @@ drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onR
     }
 #endif
 
-    // If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable.
+    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
     if (relaxed) {
         if (container == drflac_container_native) {
             return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
@@ -4636,7 +6286,7 @@ drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onR
 #endif
     }
 
-    // Unsupported container.
+    /* Unsupported container. */
     return DRFLAC_FALSE;
 }
 
@@ -4646,61 +6296,76 @@ void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit)
     drflac_assert(pInit != NULL);
 
     drflac_zero_memory(pFlac, sizeof(*pFlac));
-    pFlac->bs               = pInit->bs;
-    pFlac->onMeta           = pInit->onMeta;
-    pFlac->pUserDataMD      = pInit->pUserDataMD;
-    pFlac->maxBlockSize     = pInit->maxBlockSize;
-    pFlac->sampleRate       = pInit->sampleRate;
-    pFlac->channels         = (drflac_uint8)pInit->channels;
-    pFlac->bitsPerSample    = (drflac_uint8)pInit->bitsPerSample;
-    pFlac->totalSampleCount = pInit->totalSampleCount;
-    pFlac->container        = pInit->container;
+    pFlac->bs                 = pInit->bs;
+    pFlac->onMeta             = pInit->onMeta;
+    pFlac->pUserDataMD        = pInit->pUserDataMD;
+    pFlac->maxBlockSize       = pInit->maxBlockSize;
+    pFlac->sampleRate         = pInit->sampleRate;
+    pFlac->channels           = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample      = (drflac_uint8)pInit->bitsPerSample;
+    pFlac->totalSampleCount   = pInit->totalSampleCount;
+    pFlac->totalPCMFrameCount = pInit->totalSampleCount / pFlac->channels;
+    pFlac->container          = pInit->container;
 }
 
 drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
 {
+    drflac_init_info init;
+    drflac_uint32 allocationSize;
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    drflac_uint32 decodedSamplesAllocationSize;
+#ifndef DR_FLAC_NO_OGG
+    drflac_uint32 oggbsAllocationSize;
+    drflac_oggbs oggbs;
+#endif
+    drflac_uint64 firstFramePos;
+    drflac_uint64 seektablePos;
+    drflac_uint32 seektableSize;
+    drflac* pFlac;
+
 #ifndef DRFLAC_NO_CPUID
-    // CPU support first.
+    /* CPU support first. */
     drflac__init_cpu_caps();
 #endif
 
-    drflac_init_info init;
     if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
         return NULL;
     }
 
-    // The size of the allocation for the drflac object needs to be large enough to fit the following:
-    //   1) The main members of the drflac structure
-    //   2) A block of memory large enough to store the decoded samples of the largest frame in the stream
-    //   3) If the container is Ogg, a drflac_oggbs object
-    //
-    // The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
-    // the different SIMD instruction sets.
-    drflac_uint32 allocationSize = sizeof(drflac);
+    /*
+    The size of the allocation for the drflac object needs to be large enough to fit the following:
+      1) The main members of the drflac structure
+      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+      3) If the container is Ogg, a drflac_oggbs object
+    
+    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    the different SIMD instruction sets.
+    */
+    allocationSize = sizeof(drflac);
 
-    // The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
-    // we are supporting.
-    drflac_uint32 wholeSIMDVectorCountPerChannel;
-    if ((init.maxBlockSize % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    /*
+    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    we are supporting.
+    */
+    if (((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = ((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
     } else {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+        wholeSIMDVectorCountPerChannel = ((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
     }
 
-    drflac_uint32 decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
 
     allocationSize += decodedSamplesAllocationSize;
-    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  // Allocate extra bytes to ensure we have enough for alignment.
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
 
 #ifndef DR_FLAC_NO_OGG
-    // There's additional data required for Ogg streams.
-    drflac_uint32 oggbsAllocationSize = 0;
+    /* There's additional data required for Ogg streams. */
+    oggbsAllocationSize = 0;
     if (init.container == drflac_container_ogg) {
         oggbsAllocationSize = sizeof(drflac_oggbs);
         allocationSize += oggbsAllocationSize;
     }
 
-    drflac_oggbs oggbs;
     drflac_zero_memory(&oggbs, sizeof(oggbs));
     if (init.container == drflac_container_ogg) {
         oggbs.onRead = onRead;
@@ -4714,12 +6379,14 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
     }
 #endif
 
-    // This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
-    // consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
-    // and decoding the metadata.
-    drflac_uint64 firstFramePos = 42;   // <-- We know we are at byte 42 at this point.
-    drflac_uint64 seektablePos  = 0;
-    drflac_uint32 seektableSize = 0;
+    /*
+    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
+    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
+    and decoding the metadata.
+    */
+    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos  = 0;
+    seektableSize = 0;
     if (init.hasMetadataBlocks) {
         drflac_read_proc onReadOverride = onRead;
         drflac_seek_proc onSeekOverride = onSeek;
@@ -4741,7 +6408,7 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
     }
 
 
-    drflac* pFlac = (drflac*)DRFLAC_MALLOC(allocationSize);
+    pFlac = (drflac*)DRFLAC_MALLOC(allocationSize);
     drflac__init_from_info(pFlac, &init);
     pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
 
@@ -4750,7 +6417,7 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
         drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
         *pInternalOggbs = oggbs;
 
-        // The Ogg bistream needs to be layered on top of the original bitstream.
+        /* The Ogg bistream needs to be layered on top of the original bitstream. */
         pFlac->bs.onRead = drflac__on_read_ogg;
         pFlac->bs.onSeek = drflac__on_seek_ogg;
         pFlac->bs.pUserData = (void*)pInternalOggbs;
@@ -4760,7 +6427,7 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
 
     pFlac->firstFramePos = firstFramePos;
 
-    // NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now.
+    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
 #ifndef DR_FLAC_NO_OGG
     if (init.container == drflac_container_ogg)
     {
@@ -4770,32 +6437,34 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
     else
 #endif
     {
-        // If we have a seektable we need to load it now, making sure we move back to where we were previously.
+        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
         if (seektablePos != 0) {
             pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
             pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
 
-            // Seek to the seektable, then just read directly into our seektable buffer.
+            /* Seek to the seektable, then just read directly into our seektable buffer. */
             if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
                 if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
-                    // Endian swap.
-                    for (drflac_uint32 iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                    /* Endian swap. */
+                    drflac_uint32 iSeekpoint;
+                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
                         pFlac->pSeekpoints[iSeekpoint].firstSample = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstSample);
                         pFlac->pSeekpoints[iSeekpoint].frameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].frameOffset);
                         pFlac->pSeekpoints[iSeekpoint].sampleCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].sampleCount);
                     }
                 } else {
-                    // Failed to read the seektable. Pretend we don't have one.
+                    /* Failed to read the seektable. Pretend we don't have one. */
                     pFlac->pSeekpoints = NULL;
                     pFlac->seekpointCount = 0;
                 }
 
-                // We need to seek back to where we were. If this fails it's a critical error.
+                /* We need to seek back to where we were. If this fails it's a critical error. */
                 if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFramePos, drflac_seek_origin_start)) {
+                    DRFLAC_FREE(pFlac);
                     return NULL;
                 }
             } else {
-                // Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one.
+                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
                 pFlac->pSeekpoints = NULL;
                 pFlac->seekpointCount = 0;
             }
@@ -4803,19 +6472,20 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
     }
 
     
-
-    // If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
-    // the first frame.
+    /*
+    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    the first frame.
+    */
     if (!init.hasStreamInfoBlock) {
         pFlac->currentFrame.header = init.firstFrameHeader;
         do
         {
-            drflac_result result = drflac__decode_frame(pFlac);
+            drflac_result result = drflac__decode_flac_frame(pFlac);
             if (result == DRFLAC_SUCCESS) {
                 break;
             } else {
                 if (result == DRFLAC_CRC_MISMATCH) {
-                    if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
+                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) {
                         DRFLAC_FREE(pFlac);
                         return NULL;
                     }
@@ -4834,9 +6504,6 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p
 
 
 #ifndef DR_FLAC_NO_STDIO
-typedef void* drflac_file;
-
-#if defined(DR_FLAC_NO_WIN32_IO) || !defined(_WIN32)
 #include <stdio.h>
 
 static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
@@ -4846,15 +6513,15 @@ static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t byt
 
 static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
 {
-    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
+    drflac_assert(offset >= 0);  /* <-- Never seek backwards. */
 
     return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
 }
 
-static drflac_file drflac__open_file_handle(const char* filename)
+static FILE* drflac__fopen(const char* filename)
 {
     FILE* pFile;
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     if (fopen_s(&pFile, filename, "rb") != 0) {
         return NULL;
     }
@@ -4865,65 +6532,23 @@ static drflac_file drflac__open_file_handle(const char* filename)
     }
 #endif
 
-    return (drflac_file)pFile;
+    return pFile;
 }
 
-static void drflac__close_file_handle(drflac_file file)
-{
-    fclose((FILE*)file);
-}
-#else
-#include <windows.h>
-
-// This doesn't seem to be defined for VC6.
-#ifndef INVALID_SET_FILE_POINTER
-#define INVALID_SET_FILE_POINTER ((DWORD)-1)
-#endif
-
-static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    drflac_assert(bytesToRead < 0xFFFFFFFF);   // dr_flac will never request huge amounts of data at a time. This is a safe assertion.
-
-    DWORD bytesRead;
-    ReadFile((HANDLE)pUserData, bufferOut, (DWORD)bytesToRead, &bytesRead, NULL);
-
-    return (size_t)bytesRead;
-}
-
-static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
-
-    return SetFilePointer((HANDLE)pUserData, offset, NULL, (origin == drflac_seek_origin_current) ? FILE_CURRENT : FILE_BEGIN) != INVALID_SET_FILE_POINTER;
-}
-
-static drflac_file drflac__open_file_handle(const char* filename)
-{
-    HANDLE hFile = CreateFileA(filename, FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-    if (hFile == INVALID_HANDLE_VALUE) {
-        return NULL;
-    }
-
-    return (drflac_file)hFile;
-}
-
-static void drflac__close_file_handle(drflac_file file)
-{
-    CloseHandle((HANDLE)file);
-}
-#endif
-
 
 drflac* drflac_open_file(const char* filename)
 {
-    drflac_file file = drflac__open_file_handle(filename);
-    if (file == NULL) {
+    drflac* pFlac;
+    FILE* pFile;
+
+    pFile = drflac__fopen(filename);
+    if (pFile == NULL) {
         return NULL;
     }
 
-    drflac* pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)file);
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile);
     if (pFlac == NULL) {
-        drflac__close_file_handle(file);
+        fclose(pFile);
         return NULL;
     }
 
@@ -4932,28 +6557,33 @@ drflac* drflac_open_file(const char* filename)
 
 drflac* drflac_open_file_with_metadata(const char* filename, drflac_meta_proc onMeta, void* pUserData)
 {
-    drflac_file file = drflac__open_file_handle(filename);
-    if (file == NULL) {
+    drflac* pFlac;
+    FILE* pFile;
+
+    pFile = drflac__fopen(filename);
+    if (pFile == NULL) {
         return NULL;
     }
 
-    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)file, pUserData);
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData);
     if (pFlac == NULL) {
-        drflac__close_file_handle(file);
+        fclose(pFile);
         return pFlac;
     }
 
     return pFlac;
 }
-#endif  //DR_FLAC_NO_STDIO
+#endif  /* DR_FLAC_NO_STDIO */
 
 static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
     drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
     drflac_assert(memoryStream != NULL);
     drflac_assert(memoryStream->dataSize >= memoryStream->currentReadPos);
 
-    size_t bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
+    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
     if (bytesToRead > bytesRemaining) {
         bytesToRead = bytesRemaining;
     }
@@ -4969,21 +6599,25 @@ static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t by
 static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
 {
     drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
     drflac_assert(memoryStream != NULL);
-    drflac_assert(offset > 0 || (offset == 0 && origin == drflac_seek_origin_start));
-    drflac_assert(offset <= (drflac_int64)memoryStream->dataSize);
+    drflac_assert(offset >= 0); /* <-- Never seek backwards. */
+
+    if (offset > (drflac_int64)memoryStream->dataSize) {
+        return DRFLAC_FALSE;
+    }
 
     if (origin == drflac_seek_origin_current) {
         if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
             memoryStream->currentReadPos += offset;
         } else {
-            memoryStream->currentReadPos = memoryStream->dataSize;  // Trying to seek too far forward.
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
         }
     } else {
         if ((drflac_uint32)offset <= memoryStream->dataSize) {
             memoryStream->currentReadPos = offset;
         } else {
-            memoryStream->currentReadPos = memoryStream->dataSize;  // Trying to seek too far forward.
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
         }
     }
 
@@ -4993,17 +6627,19 @@ static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_
 drflac* drflac_open_memory(const void* data, size_t dataSize)
 {
     drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
     memoryStream.data = (const unsigned char*)data;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
-    drflac* pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream);
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream);
     if (pFlac == NULL) {
         return NULL;
     }
 
     pFlac->memoryStream = memoryStream;
 
-    // This is an awful hack...
+    /* This is an awful hack... */
 #ifndef DR_FLAC_NO_OGG
     if (pFlac->container == drflac_container_ogg)
     {
@@ -5022,17 +6658,19 @@ drflac* drflac_open_memory(const void* data, size_t dataSize)
 drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drflac_meta_proc onMeta, void* pUserData)
 {
     drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
     memoryStream.data = (const unsigned char*)data;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
-    drflac* pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData);
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData);
     if (pFlac == NULL) {
         return NULL;
     }
 
     pFlac->memoryStream = memoryStream;
 
-    // This is an awful hack...
+    /* This is an awful hack... */
 #ifndef DR_FLAC_NO_OGG
     if (pFlac->container == drflac_container_ogg)
     {
@@ -5075,19 +6713,22 @@ void drflac_close(drflac* pFlac)
     }
 
 #ifndef DR_FLAC_NO_STDIO
-    // If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
-    // was used by looking at the callbacks.
+    /*
+    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
+    was used by looking at the callbacks.
+    */
     if (pFlac->bs.onRead == drflac__on_read_stdio) {
-        drflac__close_file_handle((drflac_file)pFlac->bs.pUserData);
+        fclose((FILE*)pFlac->bs.pUserData);
     }
 
 #ifndef DR_FLAC_NO_OGG
-    // Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained.
+    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
     if (pFlac->container == drflac_container_ogg) {
-        drflac_assert(pFlac->bs.onRead == drflac__on_read_ogg);
         drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        drflac_assert(pFlac->bs.onRead == drflac__on_read_ogg);
+
         if (oggbs->onRead == drflac__on_read_stdio) {
-            drflac__close_file_handle((drflac_file)oggbs->pUserData);
+            fclose((FILE*)oggbs->pUserData);
         }
     }
 #endif
@@ -5099,30 +6740,29 @@ void drflac_close(drflac* pFlac)
 drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut)
 {
     unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+    drflac_uint64 samplesRead;
 
-    // We should never be calling this when the number of samples to read is >= the sample count.
+    /* We should never be calling this when the number of samples to read is >= the sample count. */
     drflac_assert(samplesToRead < channelCount);
     drflac_assert(pFlac->currentFrame.samplesRemaining > 0 && samplesToRead <= pFlac->currentFrame.samplesRemaining);
 
-
-    drflac_uint64 samplesRead = 0;
+    samplesRead = 0;
     while (samplesToRead > 0) {
         drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
         drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
         drflac_uint64 channelIndex = samplesReadFromFrameSoFar % channelCount;
-
         drflac_uint64 nextSampleInFrame = samplesReadFromFrameSoFar / channelCount;
-
         int decodedSample = 0;
+
         switch (pFlac->currentFrame.header.channelAssignment)
         {
             case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
             {
                 if (channelIndex == 0) {
-                    decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame];
+                    decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
                 } else {
-                    int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame];
-                    int left = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame];
+                    int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
+                    int left = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex - 1].wastedBitsPerSample;
                     decodedSample = left - side;
                 }
             } break;
@@ -5130,11 +6770,11 @@ drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesT
             case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
             {
                 if (channelIndex == 0) {
-                    int side  = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame];
-                    int right = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame];
+                    int side  = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
+                    int right = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 1].wastedBitsPerSample;
                     decodedSample = side + right;
                 } else {
-                    decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame];
+                    decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
                 }
             } break;
 
@@ -5143,14 +6783,14 @@ drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesT
                 int mid;
                 int side;
                 if (channelIndex == 0) {
-                    mid  = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame];
-                    side = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame];
+                    mid  = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
+                    side = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 1].wastedBitsPerSample;
 
                     mid = (((unsigned int)mid) << 1) | (side & 0x01);
                     decodedSample = (mid + side) >> 1;
                 } else {
-                    mid  = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame];
-                    side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame];
+                    mid  = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex - 1].wastedBitsPerSample;
+                    side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
 
                     mid = (((unsigned int)mid) << 1) | (side & 0x01);
                     decodedSample = (mid - side) >> 1;
@@ -5160,12 +6800,11 @@ drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesT
             case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
             default:
             {
-                decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame];
+                decodedSample = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame] << pFlac->currentFrame.subframes[channelIndex + 0].wastedBitsPerSample;
             } break;
         }
 
-
-        decodedSample <<= ((32 - pFlac->bitsPerSample) + pFlac->currentFrame.subframes[channelIndex].wastedBitsPerSample);
+        decodedSample <<= (32 - pFlac->bitsPerSample);
 
         if (bufferOut) {
             *bufferOut++ = decodedSample;
@@ -5179,34 +6818,11 @@ drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesT
     return samplesRead;
 }
 
-drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead)
-{
-    drflac_uint64 samplesRead = 0;
-    while (samplesToRead > 0) {
-        if (pFlac->currentFrame.samplesRemaining == 0) {
-            if (!drflac__read_and_decode_next_frame(pFlac)) {
-                break;  // Couldn't read the next frame, so just break from the loop and return.
-            }
-        } else {
-            if (pFlac->currentFrame.samplesRemaining > samplesToRead) {
-                samplesRead   += samplesToRead;
-                pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead;   // <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536.
-                samplesToRead  = 0;
-            } else {
-                samplesRead   += pFlac->currentFrame.samplesRemaining;
-                samplesToRead -= pFlac->currentFrame.samplesRemaining;
-                pFlac->currentFrame.samplesRemaining = 0;
-            }
-        }
-    }
-
-    pFlac->currentSample += samplesRead;
-    return samplesRead;
-}
-
 drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut)
 {
-    // Note that <bufferOut> is allowed to be null, in which case this will act like a seek.
+    drflac_uint64 samplesRead;
+
+    /* Note that <bufferOut> is allowed to be null, in which case this will act like a seek. */
     if (pFlac == NULL || samplesToRead == 0) {
         return 0;
     }
@@ -5215,22 +6831,24 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac
         return drflac__seek_forward_by_samples(pFlac, samplesToRead);
     }
 
-
-    drflac_uint64 samplesRead = 0;
+    samplesRead = 0;
     while (samplesToRead > 0) {
-        // If we've run out of samples in this frame, go to the next.
+        /* If we've run out of samples in this frame, go to the next. */
         if (pFlac->currentFrame.samplesRemaining == 0) {
-            if (!drflac__read_and_decode_next_frame(pFlac)) {
-                break;  // Couldn't read the next frame, so just break from the loop and return.
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
             }
         } else {
-            // Here is where we grab the samples and interleave them.
-
+            /* Here is where we grab the samples and interleave them. */
             unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
             drflac_uint64 totalSamplesInFrame = pFlac->currentFrame.header.blockSize * channelCount;
             drflac_uint64 samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining;
-
             drflac_uint64 misalignedSampleCount = samplesReadFromFrameSoFar % channelCount;
+            drflac_uint64 alignedSampleCountPerChannel;
+            drflac_uint64 firstAlignedSampleInFrame;
+            unsigned int unusedBitsPerSample;
+            drflac_uint64 alignedSamplesRead;
+
             if (misalignedSampleCount > 0) {
                 drflac_uint64 misalignedSamplesRead = drflac__read_s32__misaligned(pFlac, misalignedSampleCount, bufferOut);
                 samplesRead               += misalignedSamplesRead;
@@ -5241,79 +6859,87 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac
             }
 
 
-            drflac_uint64 alignedSampleCountPerChannel = samplesToRead / channelCount;
+            alignedSampleCountPerChannel = samplesToRead / channelCount;
             if (alignedSampleCountPerChannel > pFlac->currentFrame.samplesRemaining / channelCount) {
                 alignedSampleCountPerChannel = pFlac->currentFrame.samplesRemaining / channelCount;
             }
 
-            drflac_uint64 firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount;
-            unsigned int unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+            firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount;
+            unusedBitsPerSample = 32 - pFlac->bitsPerSample;
 
             switch (pFlac->currentFrame.header.channelAssignment)
             {
                 case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
                 {
+                    drflac_uint64 i;
                     const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
                     const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
-                        int left  = pDecodedSamples0[i];
-                        int side  = pDecodedSamples1[i];
+                    for (i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        int left  = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+                        int side  = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
                         int right = left - side;
 
-                        bufferOut[i*2+0] = left  << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
-                        bufferOut[i*2+1] = right << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+                        bufferOut[i*2+0] = left;
+                        bufferOut[i*2+1] = right;
                     }
                 } break;
 
                 case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
                 {
+                    drflac_uint64 i;
                     const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
                     const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
-                        int side  = pDecodedSamples0[i];
-                        int right = pDecodedSamples1[i];
+                    for (i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        int side  = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+                        int right = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
                         int left  = right + side;
 
-                        bufferOut[i*2+0] = left  << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
-                        bufferOut[i*2+1] = right << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+                        bufferOut[i*2+0] = left;
+                        bufferOut[i*2+1] = right;
                     }
                 } break;
 
                 case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
                 {
+                    drflac_uint64 i;
                     const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
                     const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                    for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
-                        int side = pDecodedSamples1[i];
-                        int mid  = (((drflac_uint32)pDecodedSamples0[i]) << 1) | (side & 0x01);
+                    for (i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        int mid  = pDecodedSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+                        int side = pDecodedSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+                        
+                        mid = (((drflac_uint32)mid) << 1) | (side & 0x01);
 
-                        bufferOut[i*2+0] = ((mid + side) >> 1) << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
-                        bufferOut[i*2+1] = ((mid - side) >> 1) << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+                        bufferOut[i*2+0] = ((mid + side) >> 1) << (unusedBitsPerSample);
+                        bufferOut[i*2+1] = ((mid - side) >> 1) << (unusedBitsPerSample);
                     }
                 } break;
 
                 case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
                 default:
                 {
-                    if (pFlac->currentFrame.header.channelAssignment == 1) // 1 = Stereo
+                    if (pFlac->currentFrame.header.channelAssignment == 1) /* 1 = Stereo */
                     {
-                        // Stereo optimized inner loop unroll.
+                        /* Stereo optimized inner loop unroll. */
+                        drflac_uint64 i;
                         const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame;
                         const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame;
 
-                        for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
+                        for (i = 0; i < alignedSampleCountPerChannel; ++i) {
                             bufferOut[i*2+0] = pDecodedSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
                             bufferOut[i*2+1] = pDecodedSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
                         }
                     }
                     else
                     {
-                        // Generic interleaving.
-                        for (drflac_uint64 i = 0; i < alignedSampleCountPerChannel; ++i) {
-                            for (unsigned int j = 0; j < channelCount; ++j) {
+                        /* Generic interleaving. */
+                        drflac_uint64 i;
+                        for (i = 0; i < alignedSampleCountPerChannel; ++i) {
+                            unsigned int j;
+                            for (j = 0; j < channelCount; ++j) {
                                 bufferOut[(i*channelCount)+j] = (pFlac->currentFrame.subframes[j].pDecodedSamples[firstAlignedSampleInFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample);
                             }
                         }
@@ -5321,7 +6947,7 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac
                 } break;
             }
 
-            drflac_uint64 alignedSamplesRead = alignedSampleCountPerChannel * channelCount;
+            alignedSamplesRead = alignedSampleCountPerChannel * channelCount;
             samplesRead               += alignedSamplesRead;
             samplesReadFromFrameSoFar += alignedSamplesRead;
             bufferOut                 += alignedSamplesRead;
@@ -5330,7 +6956,7 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac
             pFlac->currentFrame.samplesRemaining -= (unsigned int)alignedSamplesRead;
 
 
-            // At this point we may still have some excess samples left to read.
+            /* At this point we may still have some excess samples left to read. */
             if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0) {
                 drflac_uint64 excessSamplesRead = 0;
                 if (samplesToRead < pFlac->currentFrame.samplesRemaining) {
@@ -5351,20 +6977,47 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac
     return samplesRead;
 }
 
+drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(push)
+    #pragma warning(disable:4996)   /* was declared deprecated */
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+    return drflac_read_s32(pFlac, framesToRead*pFlac->channels, pBufferOut) / pFlac->channels;
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(pop)
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic pop
+#endif
+}
+
+
 drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut)
 {
-    // This reads samples in 2 passes and can probably be optimized.
+    /* This reads samples in 2 passes and can probably be optimized. */
     drflac_uint64 totalSamplesRead = 0;
 
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(push)
+    #pragma warning(disable:4996)   /* was declared deprecated */
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
     while (samplesToRead > 0) {
+        drflac_uint64 i;
         drflac_int32 samples32[4096];
         drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32);
         if (samplesJustRead == 0) {
-            break;  // Reached the end.
+            break;  /* Reached the end. */
         }
 
-        // s32 -> s16
-        for (drflac_uint64 i = 0; i < samplesJustRead; ++i) {
+        /* s32 -> s16 */
+        for (i = 0; i < samplesJustRead; ++i) {
             pBufferOut[i] = (drflac_int16)(samples32[i] >> 16);
         }
 
@@ -5373,23 +7026,69 @@ drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac
         pBufferOut       += samplesJustRead;
     }
 
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(pop)
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic pop
+#endif
+
     return totalSamplesRead;
 }
 
+drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
+{
+    /* This reads samples in 2 passes and can probably be optimized. */
+    drflac_uint64 totalPCMFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drflac_uint64 iFrame;
+        drflac_int32 samples32[4096];
+        drflac_uint64 framesJustRead = drflac_read_pcm_frames_s32(pFlac, (framesToRead > 4096/pFlac->channels) ? 4096/pFlac->channels : framesToRead, samples32);
+        if (framesJustRead == 0) {
+            break;  /* Reached the end. */
+        }
+
+        /* s32 -> s16 */
+        for (iFrame = 0; iFrame < framesJustRead; ++iFrame) {
+            drflac_uint32 iChannel;
+            for (iChannel = 0; iChannel < pFlac->channels; ++iChannel) {
+                drflac_uint64 iSample = iFrame*pFlac->channels + iChannel;
+                pBufferOut[iSample] = (drflac_int16)(samples32[iSample] >> 16);
+            }
+        }
+
+        totalPCMFramesRead += framesJustRead;
+        framesToRead       -= framesJustRead;
+        pBufferOut         += framesJustRead * pFlac->channels;
+    }
+
+    return totalPCMFramesRead;
+}
+
+
 drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut)
 {
-    // This reads samples in 2 passes and can probably be optimized.
+    /* This reads samples in 2 passes and can probably be optimized. */
     drflac_uint64 totalSamplesRead = 0;
 
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(push)
+    #pragma warning(disable:4996)   /* was declared deprecated */
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
     while (samplesToRead > 0) {
+        drflac_uint64 i;
         drflac_int32 samples32[4096];
         drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32);
         if (samplesJustRead == 0) {
-            break;  // Reached the end.
+            break;  /* Reached the end. */
         }
 
-        // s32 -> f32
-        for (drflac_uint64 i = 0; i < samplesJustRead; ++i) {
+        /* s32 -> f32 */
+        for (i = 0; i < samplesJustRead; ++i) {
             pBufferOut[i] = (float)(samples32[i] / 2147483648.0);
         }
 
@@ -5398,17 +7097,717 @@ drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float*
         pBufferOut       += samplesJustRead;
     }
 
+#if defined(_MSC_VER) && !defined(__clang__)
+    #pragma warning(pop)
+#elif defined(__GNUC__) || defined(__clang__)
+    #pragma GCC diagnostic pop
+#endif
+
     return totalSamplesRead;
 }
 
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        int left  = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+        int side  = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+        int right = left - side;
+
+        pOutputSamples[i*2+0] = (float)(left / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)(right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+
+    float factor = 1 / 2147483648.0;
+
+    drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+    drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_int32 left0 = pInputSamples0[i*4+0] << shift0;
+        drflac_int32 left1 = pInputSamples0[i*4+1] << shift0;
+        drflac_int32 left2 = pInputSamples0[i*4+2] << shift0;
+        drflac_int32 left3 = pInputSamples0[i*4+3] << shift0;
+
+        drflac_int32 side0 = pInputSamples1[i*4+0] << shift1;
+        drflac_int32 side1 = pInputSamples1[i*4+1] << shift1;
+        drflac_int32 side2 = pInputSamples1[i*4+2] << shift1;
+        drflac_int32 side3 = pInputSamples1[i*4+3] << shift1;
+
+        drflac_int32 right0 = left0 - side0;
+        drflac_int32 right1 = left1 - side1;
+        drflac_int32 right2 = left2 - side2;
+        drflac_int32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = left0  * factor;
+        pOutputSamples[i*8+1] = right0 * factor;
+        pOutputSamples[i*8+2] = left1  * factor;
+        pOutputSamples[i*8+3] = right1 * factor;
+        pOutputSamples[i*8+4] = left2  * factor;
+        pOutputSamples[i*8+5] = right2 * factor;
+        pOutputSamples[i*8+6] = left3  * factor;
+        pOutputSamples[i*8+7] = right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        int left  = pInputSamples0[i] << shift0;
+        int side  = pInputSamples1[i] << shift1;
+        int right = left - side;
+
+        pOutputSamples[i*2+0] = (float)(left  * factor);
+        pOutputSamples[i*2+1] = (float)(right * factor);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 frameCount4;
+    __m128 factor;
+    int shift0;
+    int shift1;
+    drflac_uint64 i;
+
+    drflac_assert(pFlac->bitsPerSample <= 24);
+
+    frameCount4 = frameCount >> 2;
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+    shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8;
+    shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i);
+        __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i);
+
+        __m128i left  = _mm_slli_epi32(inputSample0, shift0);
+        __m128i side  = _mm_slli_epi32(inputSample1, shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        pOutputSamples[i*8+0] = ((float*)&leftf)[0];
+        pOutputSamples[i*8+1] = ((float*)&rightf)[0];
+        pOutputSamples[i*8+2] = ((float*)&leftf)[1];
+        pOutputSamples[i*8+3] = ((float*)&rightf)[1];
+        pOutputSamples[i*8+4] = ((float*)&leftf)[2];
+        pOutputSamples[i*8+5] = ((float*)&rightf)[2];
+        pOutputSamples[i*8+6] = ((float*)&leftf)[3];
+        pOutputSamples[i*8+7] = ((float*)&rightf)[3];
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        int left  = pInputSamples0[i] << shift0;
+        int side  = pInputSamples1[i] << shift1;
+        int right = left - side;
+
+        pOutputSamples[i*2+0] = (float)(left  / 8388608.0f);
+        pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        int side  = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+        int right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+        int left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)(left / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)(right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+
+    float factor = 1 / 2147483648.0;
+
+    drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+    drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_int32 side0  = pInputSamples0[i*4+0] << shift0;
+        drflac_int32 side1  = pInputSamples0[i*4+1] << shift0;
+        drflac_int32 side2  = pInputSamples0[i*4+2] << shift0;
+        drflac_int32 side3  = pInputSamples0[i*4+3] << shift0;
+
+        drflac_int32 right0 = pInputSamples1[i*4+0] << shift1;
+        drflac_int32 right1 = pInputSamples1[i*4+1] << shift1;
+        drflac_int32 right2 = pInputSamples1[i*4+2] << shift1;
+        drflac_int32 right3 = pInputSamples1[i*4+3] << shift1;
+
+        drflac_int32 left0 = right0 + side0;
+        drflac_int32 left1 = right1 + side1;
+        drflac_int32 left2 = right2 + side2;
+        drflac_int32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = left0  * factor;
+        pOutputSamples[i*8+1] = right0 * factor;
+        pOutputSamples[i*8+2] = left1  * factor;
+        pOutputSamples[i*8+3] = right1 * factor;
+        pOutputSamples[i*8+4] = left2  * factor;
+        pOutputSamples[i*8+5] = right2 * factor;
+        pOutputSamples[i*8+6] = left3  * factor;
+        pOutputSamples[i*8+7] = right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        int side  = pInputSamples0[i] << shift0;
+        int right = pInputSamples1[i] << shift1;
+        int left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)(left  * factor);
+        pOutputSamples[i*2+1] = (float)(right * factor);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 frameCount4;
+    __m128 factor;
+    int shift0;
+    int shift1;
+    drflac_uint64 i;
+
+    drflac_assert(pFlac->bitsPerSample <= 24);
+
+    frameCount4 = frameCount >> 2;
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+    shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8;
+    shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i);
+        __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i);
+
+        __m128i side  = _mm_slli_epi32(inputSample0, shift0);
+        __m128i right = _mm_slli_epi32(inputSample1, shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        pOutputSamples[i*8+0] = ((float*)&leftf)[0];
+        pOutputSamples[i*8+1] = ((float*)&rightf)[0];
+        pOutputSamples[i*8+2] = ((float*)&leftf)[1];
+        pOutputSamples[i*8+3] = ((float*)&rightf)[1];
+        pOutputSamples[i*8+4] = ((float*)&leftf)[2];
+        pOutputSamples[i*8+5] = ((float*)&rightf)[2];
+        pOutputSamples[i*8+6] = ((float*)&leftf)[3];
+        pOutputSamples[i*8+7] = ((float*)&rightf)[3];
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        int side  = pInputSamples0[i] << shift0;
+        int right = pInputSamples1[i] << shift1;
+        int left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)(left  / 8388608.0f);
+        pOutputSamples[i*2+1] = (float)(right / 8388608.0f);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        int mid  = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+        int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+                        
+        mid = (((drflac_uint32)mid) << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+
+    float factor = 1 / 2147483648.0;
+
+    int shift = unusedBitsPerSample;
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            int temp0L;
+            int temp1L;
+            int temp2L;
+            int temp3L;
+            int temp0R;
+            int temp1R;
+            int temp2R;
+            int temp3R;
+
+            int mid0  = pInputSamples0[i*4+0] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid1  = pInputSamples0[i*4+1] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid2  = pInputSamples0[i*4+2] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid3  = pInputSamples0[i*4+3] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+
+            int side0 = pInputSamples1[i*4+0] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side1 = pInputSamples1[i*4+1] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side2 = pInputSamples1[i*4+2] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side3 = pInputSamples1[i*4+3] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01);
+            mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01);
+            mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01);
+            mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01);
+
+            temp0L = ((mid0 + side0) << shift);
+            temp1L = ((mid1 + side1) << shift);
+            temp2L = ((mid2 + side2) << shift);
+            temp3L = ((mid3 + side3) << shift);
+
+            temp0R = ((mid0 - side0) << shift);
+            temp1R = ((mid1 - side1) << shift);
+            temp2R = ((mid2 - side2) << shift);
+            temp3R = ((mid3 - side3) << shift);
+
+            pOutputSamples[i*8+0] = (float)(temp0L * factor);
+            pOutputSamples[i*8+1] = (float)(temp0R * factor);
+            pOutputSamples[i*8+2] = (float)(temp1L * factor);
+            pOutputSamples[i*8+3] = (float)(temp1R * factor);
+            pOutputSamples[i*8+4] = (float)(temp2L * factor);
+            pOutputSamples[i*8+5] = (float)(temp2R * factor);
+            pOutputSamples[i*8+6] = (float)(temp3L * factor);
+            pOutputSamples[i*8+7] = (float)(temp3R * factor);
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            int temp0L;
+            int temp1L;
+            int temp2L;
+            int temp3L;
+            int temp0R;
+            int temp1R;
+            int temp2R;
+            int temp3R;
+
+            int mid0  = pInputSamples0[i*4+0] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid1  = pInputSamples0[i*4+1] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid2  = pInputSamples0[i*4+2] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int mid3  = pInputSamples0[i*4+3] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+
+            int side0 = pInputSamples1[i*4+0] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side1 = pInputSamples1[i*4+1] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side2 = pInputSamples1[i*4+2] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+            int side3 = pInputSamples1[i*4+3] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01);
+            mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01);
+            mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01);
+            mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01);
+
+            temp0L = ((mid0 + side0) >> 1);
+            temp1L = ((mid1 + side1) >> 1);
+            temp2L = ((mid2 + side2) >> 1);
+            temp3L = ((mid3 + side3) >> 1);
+
+            temp0R = ((mid0 - side0) >> 1);
+            temp1R = ((mid1 - side1) >> 1);
+            temp2R = ((mid2 - side2) >> 1);
+            temp3R = ((mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (float)(temp0L * factor);
+            pOutputSamples[i*8+1] = (float)(temp0R * factor);
+            pOutputSamples[i*8+2] = (float)(temp1L * factor);
+            pOutputSamples[i*8+3] = (float)(temp1R * factor);
+            pOutputSamples[i*8+4] = (float)(temp2L * factor);
+            pOutputSamples[i*8+5] = (float)(temp2R * factor);
+            pOutputSamples[i*8+6] = (float)(temp3L * factor);
+            pOutputSamples[i*8+7] = (float)(temp3R * factor);
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        int mid  = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+        int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+                        
+        mid = (((drflac_uint32)mid) << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << unusedBitsPerSample) * factor);
+        pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << unusedBitsPerSample) * factor);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4;
+    float factor;
+    int shift;
+    __m128 factor128;
+
+    drflac_assert(pFlac->bitsPerSample <= 24);
+
+    frameCount4 = frameCount >> 2;
+
+    factor = 1.0f / 8388608.0f;
+    factor128 = _mm_set1_ps(1.0f / 8388608.0f);
+
+    shift = unusedBitsPerSample - 8;
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i tempL;
+            __m128i tempR;
+            __m128  leftf;
+            __m128  rightf;
+
+            __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i);
+            __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i);
+
+            __m128i mid  = _mm_slli_epi32(inputSample0, pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+            __m128i side = _mm_slli_epi32(inputSample1, pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+
+            mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL = _mm_add_epi32(mid, side);
+            tempR = _mm_sub_epi32(mid, side);
+
+            /* Signed bit shift. */
+            tempL = _mm_or_si128(_mm_srli_epi32(tempL, 1), _mm_and_si128(tempL, _mm_set1_epi32(0x80000000)));
+            tempR = _mm_or_si128(_mm_srli_epi32(tempR, 1), _mm_and_si128(tempR, _mm_set1_epi32(0x80000000)));
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            pOutputSamples[i*8+0] = ((float*)&leftf)[0];
+            pOutputSamples[i*8+1] = ((float*)&rightf)[0];
+            pOutputSamples[i*8+2] = ((float*)&leftf)[1];
+            pOutputSamples[i*8+3] = ((float*)&rightf)[1];
+            pOutputSamples[i*8+4] = ((float*)&leftf)[2];
+            pOutputSamples[i*8+5] = ((float*)&rightf)[2];
+            pOutputSamples[i*8+6] = ((float*)&leftf)[3];
+            pOutputSamples[i*8+7] = ((float*)&rightf)[3];
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            int mid  = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+                        
+            mid = (((drflac_uint32)mid) << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor);
+            pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor);
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i inputSample0;
+            __m128i inputSample1;
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128 leftf;
+            __m128 rightf;
+
+            inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i);
+            inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i);
+
+            mid  = _mm_slli_epi32(inputSample0, pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+            side = _mm_slli_epi32(inputSample1, pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+
+            mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(mid, side), 1), shift);
+            tempR = _mm_slli_epi32(_mm_srli_epi32(_mm_sub_epi32(mid, side), 1), shift);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            pOutputSamples[i*8+0] = ((float*)&leftf)[0];
+            pOutputSamples[i*8+1] = ((float*)&rightf)[0];
+            pOutputSamples[i*8+2] = ((float*)&leftf)[1];
+            pOutputSamples[i*8+3] = ((float*)&rightf)[1];
+            pOutputSamples[i*8+4] = ((float*)&leftf)[2];
+            pOutputSamples[i*8+5] = ((float*)&rightf)[2];
+            pOutputSamples[i*8+6] = ((float*)&leftf)[3];
+            pOutputSamples[i*8+7] = ((float*)&rightf)[3];
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            int mid  = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample;
+            int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample;
+                        
+            mid = (((drflac_uint32)mid) << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << shift) * factor);
+            pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << shift) * factor);
+        }
+    }
+}
+#endif
+
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+
+    float factor = 1 / 2147483648.0;
+
+    int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample);
+    int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int tempL0 = pInputSamples0[i*4+0] << shift0;
+        int tempL1 = pInputSamples0[i*4+1] << shift0;
+        int tempL2 = pInputSamples0[i*4+2] << shift0;
+        int tempL3 = pInputSamples0[i*4+3] << shift0;
+
+        int tempR0 = pInputSamples1[i*4+0] << shift1;
+        int tempR1 = pInputSamples1[i*4+1] << shift1;
+        int tempR2 = pInputSamples1[i*4+2] << shift1;
+        int tempR3 = pInputSamples1[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (float)(tempL0 * factor);
+        pOutputSamples[i*8+1] = (float)(tempR0 * factor);
+        pOutputSamples[i*8+2] = (float)(tempL1 * factor);
+        pOutputSamples[i*8+3] = (float)(tempR1 * factor);
+        pOutputSamples[i*8+4] = (float)(tempL2 * factor);
+        pOutputSamples[i*8+5] = (float)(tempR2 * factor);
+        pOutputSamples[i*8+6] = (float)(tempL3 * factor);
+        pOutputSamples[i*8+7] = (float)(tempR3 * factor);
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor);
+        pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+
+    float factor = 1.0f / 8388608.0f;
+    __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f);
+
+    int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8;
+    int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i);
+        __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i);
+
+        __m128i i32L = _mm_slli_epi32(inputSample0, shift0);
+        __m128i i32R = _mm_slli_epi32(inputSample1, shift1);
+
+        __m128 f32L = _mm_mul_ps(_mm_cvtepi32_ps(i32L), factor128);
+        __m128 f32R = _mm_mul_ps(_mm_cvtepi32_ps(i32R), factor128);
+
+        pOutputSamples[i*8+0] = ((float*)&f32L)[0];
+        pOutputSamples[i*8+1] = ((float*)&f32R)[0];
+        pOutputSamples[i*8+2] = ((float*)&f32L)[1];
+        pOutputSamples[i*8+3] = ((float*)&f32R)[1];
+        pOutputSamples[i*8+4] = ((float*)&f32L)[2];
+        pOutputSamples[i*8+5] = ((float*)&f32R)[2];
+        pOutputSamples[i*8+6] = ((float*)&f32L)[3];
+        pOutputSamples[i*8+7] = ((float*)&f32R)[3];
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor);
+        pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut)
+{
+    drflac_uint64 framesRead;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFrame.samplesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+            drflac_uint64 totalFramesInPacket = pFlac->currentFrame.header.blockSize;
+            drflac_uint64 framesReadFromPacketSoFar = totalFramesInPacket - (pFlac->currentFrame.samplesRemaining/channelCount);
+            drflac_uint64 iFirstPCMFrame = framesReadFromPacketSoFar;
+            drflac_int32 unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+            drflac_uint64 samplesReadThisIteration;
+
+            if (frameCountThisIteration > pFlac->currentFrame.samplesRemaining / channelCount) {
+                frameCountThisIteration = pFlac->currentFrame.samplesRemaining / channelCount;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + iFirstPCMFrame;
+
+                switch (pFlac->currentFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        pBufferOut[(i*channelCount)+j] = (float)(((pFlac->currentFrame.subframes[j].pDecodedSamples[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample)) / 2147483648.0);
+                    }
+                }
+            }
+
+            samplesReadThisIteration = frameCountThisIteration * channelCount;
+            framesRead                += frameCountThisIteration;
+            framesReadFromPacketSoFar += frameCountThisIteration;
+            pBufferOut                += samplesReadThisIteration;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentSample      += samplesReadThisIteration;
+            pFlac->currentFrame.samplesRemaining -= (unsigned int)samplesReadThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
 drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
 {
     if (pFlac == NULL) {
         return DRFLAC_FALSE;
     }
 
-    // If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
-    // when the decoder was opened.
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
     if (pFlac->firstFramePos == 0) {
         return DRFLAC_FALSE;
     }
@@ -5419,14 +7818,14 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
     } else {
         drflac_bool32 wasSuccessful = DRFLAC_FALSE;
 
-        // Clamp the sample to the end.
+        /* Clamp the sample to the end. */
         if (sampleIndex >= pFlac->totalSampleCount) {
             sampleIndex  = pFlac->totalSampleCount - 1;
         }
 
-        // If the target sample and the current sample are in the same frame we just move the position forward.
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
         if (sampleIndex > pFlac->currentSample) {
-            // Forward.
+            /* Forward. */
             drflac_uint32 offset = (drflac_uint32)(sampleIndex - pFlac->currentSample);
             if (pFlac->currentFrame.samplesRemaining >  offset) {
                 pFlac->currentFrame.samplesRemaining -= offset;
@@ -5434,7 +7833,7 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
                 return DRFLAC_TRUE;
             }
         } else {
-            // Backward.
+            /* Backward. */
             drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentSample - sampleIndex);
             drflac_uint32 currentFrameSampleCount = pFlac->currentFrame.header.blockSize * drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
             drflac_uint32 currentFrameSamplesConsumed = (drflac_uint32)(currentFrameSampleCount - pFlac->currentFrame.samplesRemaining);
@@ -5445,17 +7844,19 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
             }
         }
 
-        // Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
-        // we'll instead use Ogg's natural seeking facility.
-    #ifndef DR_FLAC_NO_OGG
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
         if (pFlac->container == drflac_container_ogg)
         {
             wasSuccessful = drflac_ogg__seek_to_sample(pFlac, sampleIndex);
         }
         else
-    #endif
+#endif
         {
-            // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower.
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
             wasSuccessful = drflac__seek_to_sample__seek_table(pFlac, sampleIndex);
             if (!wasSuccessful) {
                 wasSuccessful = drflac__seek_to_sample__brute_force(pFlac, sampleIndex);
@@ -5467,42 +7868,118 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex)
     }
 }
 
+drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    if (pFlac == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
+    if (pFlac->firstFramePos == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    if (pcmFrameIndex == 0) {
+        pFlac->currentSample = 0;
+        return drflac__seek_to_first_frame(pFlac);
+    } else {
+        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+
+        /* Clamp the sample to the end. */
+        if (pcmFrameIndex >= pFlac->totalPCMFrameCount) {
+            pcmFrameIndex  = pFlac->totalPCMFrameCount - 1;
+        }
+
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
+        if (pcmFrameIndex*pFlac->channels > pFlac->currentSample) {
+            /* Forward. */
+            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex*pFlac->channels - pFlac->currentSample);
+            if (pFlac->currentFrame.samplesRemaining >  offset) {
+                pFlac->currentFrame.samplesRemaining -= offset;
+                pFlac->currentSample = pcmFrameIndex*pFlac->channels;
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* Backward. */
+            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentSample - pcmFrameIndex*pFlac->channels);
+            drflac_uint32 currentFrameSampleCount = pFlac->currentFrame.header.blockSize * drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment);
+            drflac_uint32 currentFrameSamplesConsumed = (drflac_uint32)(currentFrameSampleCount - pFlac->currentFrame.samplesRemaining);
+            if (currentFrameSamplesConsumed > offsetAbs) {
+                pFlac->currentFrame.samplesRemaining += offsetAbs;
+                pFlac->currentSample = pcmFrameIndex*pFlac->channels;
+                return DRFLAC_TRUE;
+            }
+        }
+
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
+        if (pFlac->container == drflac_container_ogg)
+        {
+            wasSuccessful = drflac_ogg__seek_to_sample(pFlac, pcmFrameIndex*pFlac->channels);
+        }
+        else
+#endif
+        {
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
+            wasSuccessful = drflac__seek_to_sample__seek_table(pFlac, pcmFrameIndex*pFlac->channels);
+            if (!wasSuccessful) {
+                wasSuccessful = drflac__seek_to_sample__brute_force(pFlac, pcmFrameIndex*pFlac->channels);
+            }
+        }
+
+        pFlac->currentSample = pcmFrameIndex*pFlac->channels;
+        return wasSuccessful;
+    }
+}
 
 
-//// High Level APIs ////
 
-// I couldn't figure out where SIZE_MAX was defined for VC6. If anybody knows, let me know.
-#if defined(_MSC_VER) && _MSC_VER <= 1200
-#ifdef DRFLAC_64BIT
-#define SIZE_MAX    ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+/* High Level APIs */
+
+#if defined(SIZE_MAX)
+    #define DRFLAC_SIZE_MAX  SIZE_MAX
 #else
-#define SIZE_MAX    0xFFFFFFFF
-#endif
+    #if defined(DRFLAC_64BIT)
+        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
+    #endif
 #endif
 
-// Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me.
-#define DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(extension, type) \
-static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)\
+
+/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
+#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
+static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
 {                                                                                                                                                                   \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
+                                                                                                                                                                    \
     drflac_assert(pFlac != NULL);                                                                                                                                   \
                                                                                                                                                                     \
-    type* pSampleData = NULL;                                                                                                                                       \
-    drflac_uint64 totalSampleCount = pFlac->totalSampleCount;                                                                                                       \
+    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
                                                                                                                                                                     \
-    if (totalSampleCount == 0) {                                                                                                                                    \
+    if (totalPCMFrameCount == 0) {                                                                                                                                  \
         type buffer[4096];                                                                                                                                          \
-                                                                                                                                                                    \
+        drflac_uint64 pcmFramesRead;                                                                                                                                \
         size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+                                                                                                                                                                    \
         pSampleData = (type*)DRFLAC_MALLOC(sampleDataBufferSize);                                                                                                   \
         if (pSampleData == NULL) {                                                                                                                                  \
             goto on_error;                                                                                                                                          \
         }                                                                                                                                                           \
                                                                                                                                                                     \
-        drflac_uint64 samplesRead;                                                                                                                                  \
-        while ((samplesRead = (drflac_uint64)drflac_read_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0) {                                       \
-            if (((totalSampleCount + samplesRead) * sizeof(type)) > sampleDataBufferSize) {                                                                         \
+        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
+            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
+                type* pNewSampleData;                                                                                                                               \
+                                                                                                                                                                    \
                 sampleDataBufferSize *= 2;                                                                                                                          \
-                type* pNewSampleData = (type*)DRFLAC_REALLOC(pSampleData, sampleDataBufferSize);                                                                    \
+                pNewSampleData = (type*)DRFLAC_REALLOC(pSampleData, sampleDataBufferSize);                                                                          \
                 if (pNewSampleData == NULL) {                                                                                                                       \
                     DRFLAC_FREE(pSampleData);                                                                                                                       \
                     goto on_error;                                                                                                                                  \
@@ -5511,16 +7988,16 @@ static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigne
                 pSampleData = pNewSampleData;                                                                                                                       \
             }                                                                                                                                                       \
                                                                                                                                                                     \
-            drflac_copy_memory(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(type)));                                                         \
-            totalSampleCount += samplesRead;                                                                                                                        \
+            drflac_copy_memory(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
+            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
         }                                                                                                                                                           \
                                                                                                                                                                     \
         /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
            protect those ears from random noise! */                                                                                                                 \
-        drflac_zero_memory(pSampleData + totalSampleCount, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(type)));                                         \
+        drflac_zero_memory(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
     } else {                                                                                                                                                        \
-        drflac_uint64 dataSize = totalSampleCount * sizeof(type);                                                                                                   \
-        if (dataSize > SIZE_MAX) {                                                                                                                                  \
+        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
+        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
             goto on_error;  /* The decoded data is too big. */                                                                                                      \
         }                                                                                                                                                           \
                                                                                                                                                                     \
@@ -5529,12 +8006,12 @@ static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigne
             goto on_error;                                                                                                                                          \
         }                                                                                                                                                           \
                                                                                                                                                                     \
-        totalSampleCount = drflac_read_##extension(pFlac, pFlac->totalSampleCount, pSampleData);                                                                    \
+        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
     }                                                                                                                                                               \
                                                                                                                                                                     \
     if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
     if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
-    if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount;                                                                                               \
+    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
                                                                                                                                                                     \
     drflac_close(pFlac);                                                                                                                                            \
     return pSampleData;                                                                                                                                             \
@@ -5544,141 +8021,533 @@ on_error:
     return NULL;                                                                                                                                                    \
 }
 
-DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s32, drflac_int32)
-DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s16, drflac_int16)
-DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(f32, float)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
 
-drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)
 {
-    // Safety.
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    drflac* pFlac;
 
-    drflac* pFlac = drflac_open(onRead, onSeek, pUserData);
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
 }
 
-drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
 {
-    // Safety.
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pResult;
 
-    drflac* pFlac = drflac_open(onRead, onSeek, pUserData);
-    if (pFlac == NULL) {
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_and_read_pcm_frames_s32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
 }
 
-float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
-{
-    // Safety.
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
 
-    drflac* pFlac = drflac_open(onRead, onSeek, pUserData);
+
+drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int16* pResult;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_and_read_pcm_frames_s16(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
+}
+
+
+float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    float* pResult;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_and_read_pcm_frames_f32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
 }
 
 #ifndef DR_FLAC_NO_STDIO
-drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    drflac* pFlac;
 
-    drflac* pFlac = drflac_open_file(filename);
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
 }
 
-drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pResult;
 
-    drflac* pFlac = drflac_open_file(filename);
-    if (pFlac == NULL) {
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_file_and_read_pcm_frames_s32(filename, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
 }
 
-float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
-{
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
 
-    drflac* pFlac = drflac_open_file(filename);
+drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int16* pResult;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+    
+    pResult = drflac_open_file_and_read_pcm_frames_s16(filename, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
+}
+
+
+float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    float* pResult;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_file_and_read_pcm_frames_f32(filename, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
 }
 #endif
 
-drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    drflac* pFlac;
 
-    drflac* pFlac = drflac_open_memory(data, dataSize);
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
 }
 
-drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pResult;
 
-    drflac* pFlac = drflac_open_memory(data, dataSize);
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_memory_and_read_pcm_frames_s32(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
+}
+
+
+drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
 }
 
-float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount)
+drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int16* pResult;
 
-    drflac* pFlac = drflac_open_memory(data, dataSize);
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_memory_and_read_pcm_frames_s16(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
+}
+
+
+float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize);
     if (pFlac == NULL) {
         return NULL;
     }
 
-    return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount);
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
 }
 
+float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    float* pResult;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = 0;
+    }
+
+    pResult = drflac_open_memory_and_read_pcm_frames_f32(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount);
+    if (pResult == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalSampleCountOut) {
+        *totalSampleCountOut = totalPCMFrameCount * channels;
+    }
+
+    return pResult;
+}
+
+
 void drflac_free(void* pSampleDataReturnedByOpenAndDecode)
 {
     DRFLAC_FREE(pSampleDataReturnedByOpenAndDecode);
@@ -5687,208 +8556,347 @@ void drflac_free(void* pSampleDataReturnedByOpenAndDecode)
 
 
 
-void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const char* pComments)
+void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
 {
     if (pIter == NULL) {
         return;
     }
 
     pIter->countRemaining = commentCount;
-    pIter->pRunningData   = pComments;
+    pIter->pRunningData   = (const char*)pComments;
 }
 
 const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
 {
-    // Safety.
-    if (pCommentLengthOut) *pCommentLengthOut = 0;
+    drflac_int32 length;
+    const char* pComment;
+    
+    /* Safety. */
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = 0;
+    }
 
     if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
         return NULL;
     }
 
-    drflac_uint32 length = drflac__le2host_32(*(drflac_uint32*)pIter->pRunningData);
+    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
     pIter->pRunningData += 4;
 
-    const char* pComment = pIter->pRunningData;
+    pComment = pIter->pRunningData;
     pIter->pRunningData += length;
     pIter->countRemaining -= 1;
 
-    if (pCommentLengthOut) *pCommentLengthOut = length;
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = length;
+    }
+
     return pComment;
 }
-#endif  //DR_FLAC_IMPLEMENTATION
 
 
-// REVISION HISTORY
-//
-// v0.9.7 - 2018-07-05
-//   - Fix a warning.
-//
-// v0.9.6 - 2018-06-29
-//   - Fix some typos.
-//
-// v0.9.5 - 2018-06-23
-//   - Fix some warnings.
-//
-// v0.9.4 - 2018-06-14
-//   - Optimizations to seeking.
-//   - Clean up.
-//
-// v0.9.3 - 2018-05-22
-//   - Bug fix.
-//
-// v0.9.2 - 2018-05-12
-//   - Fix a compilation error due to a missing break statement.
-//
-// v0.9.1 - 2018-04-29
-//   - Fix compilation error with Clang.
-//
-// v0.9 - 2018-04-24
-//   - Fix Clang build.
-//   - Start using major.minor.revision versioning.
-//
-// v0.8g - 2018-04-19
-//   - Fix build on non-x86/x64 architectures.
-//
-// v0.8f - 2018-02-02
-//   - Stop pretending to support changing rate/channels mid stream.
-//
-// v0.8e - 2018-02-01
-//   - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
-//   - Fix a crash the the Rice partition order is invalid.
-//
-// v0.8d - 2017-09-22
-//   - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
-//
-// v0.8c - 2017-09-07
-//   - Fix warning on non-x86/x64 architectures.
-//
-// v0.8b - 2017-08-19
-//   - Fix build on non-x86/x64 architectures.
-//
-// v0.8a - 2017-08-13
-//   - A small optimization for the Clang build.
-//
-// v0.8 - 2017-08-12
-//   - API CHANGE: Rename dr_* types to drflac_*.
-//   - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
-//   - Add support for custom implementations of malloc(), realloc(), etc.
-//   - Add CRC checking to Ogg encapsulated streams.
-//   - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
-//   - Bug fixes.
-//
-// v0.7 - 2017-07-23
-//   - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
-//
-// v0.6 - 2017-07-22
-//   - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
-//     never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
-//
-// v0.5 - 2017-07-16
-//   - Fix typos.
-//   - Change drflac_bool* types to unsigned.
-//   - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
-//
-// v0.4f - 2017-03-10
-//   - Fix a couple of bugs with the bitstreaming code.
-//
-// v0.4e - 2017-02-17
-//   - Fix some warnings.
-//
-// v0.4d - 2016-12-26
-//   - Add support for 32-bit floating-point PCM decoding.
-//   - Use drflac_int*/drflac_uint* sized types to improve compiler support.
-//   - Minor improvements to documentation.
-//
-// v0.4c - 2016-12-26
-//   - Add support for signed 16-bit integer PCM decoding.
-//
-// v0.4b - 2016-10-23
-//   - A minor change to drflac_bool8 and drflac_bool32 types.
-//
-// v0.4a - 2016-10-11
-//   - Rename drBool32 to drflac_bool32 for styling consistency.
-//
-// v0.4 - 2016-09-29
-//   - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
-//   - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
-//   - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
-//     keep it consistent with drflac_audio.
-//
-// v0.3f - 2016-09-21
-//   - Fix a warning with GCC.
-//
-// v0.3e - 2016-09-18
-//   - Fixed a bug where GCC 4.3+ was not getting properly identified.
-//   - Fixed a few typos.
-//   - Changed date formats to ISO 8601 (YYYY-MM-DD).
-//
-// v0.3d - 2016-06-11
-//   - Minor clean up.
-//
-// v0.3c - 2016-05-28
-//   - Fixed compilation error.
-//
-// v0.3b - 2016-05-16
-//   - Fixed Linux/GCC build.
-//   - Updated documentation.
-//
-// v0.3a - 2016-05-15
-//   - Minor fixes to documentation.
-//
-// v0.3 - 2016-05-11
-//   - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
-//   - Lots of clean up.
-//
-// v0.2b - 2016-05-10
-//   - Bug fixes.
-//
-// v0.2a - 2016-05-10
-//   - Made drflac_open_and_decode() more robust.
-//   - Removed an unused debugging variable
-//
-// v0.2 - 2016-05-09
-//   - Added support for Ogg encapsulation.
-//   - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
-//     should be relative to the start or the current position. Also changes the seeking rules such that
-//     seeking offsets will never be negative.
-//   - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
-//
-// v0.1b - 2016-05-07
-//   - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
-//   - Removed a stale comment.
-//
-// v0.1a - 2016-05-05
-//   - Minor formatting changes.
-//   - Fixed a warning on the GCC build.
-//
-// v0.1 - 2016-05-03
-//   - Initial versioned release.
+
+
+void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = trackCount;
+    pIter->pRunningData   = (const char*)pTrackData;
+}
+
+drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
+{
+    drflac_cuesheet_track cuesheetTrack;
+    const char* pRunningData;
+    drflac_uint64 offsetHi;
+    drflac_uint64 offsetLo;
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    pRunningData = pIter->pRunningData;
+
+    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
+    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
+    drflac_copy_memory(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
+    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
+    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
+    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
+    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
+
+    pIter->pRunningData = pRunningData;
+    pIter->countRemaining -= 1;
+
+    if (pCuesheetTrack) {
+        *pCuesheetTrack = cuesheetTrack;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__GNUC__)
+    #pragma GCC diagnostic pop
+#endif
+#endif  /* DR_FLAC_IMPLEMENTATION */
 
 
 /*
+REVISION HISTORY
+================
+v0.11.7 - 2019-05-06
+  - C89 fixes.
+
+v0.11.6 - 2019-05-05
+  - Add support for C89.
+  - Fix a compiler warning when CRC is disabled.
+  - Change license to choice of public domain or MIT-0.
+
+v0.11.5 - 2019-04-19
+  - Fix a compiler error with GCC. 
+
+v0.11.4 - 2019-04-17
+  - Fix some warnings with GCC when compiling with -std=c99.
+
+v0.11.3 - 2019-04-07
+  - Silence warnings with GCC.
+
+v0.11.2 - 2019-03-10
+  - Fix a warning.
+
+v0.11.1 - 2019-02-17
+  - Fix a potential bug with seeking.
+
+v0.11.0 - 2018-12-16
+  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with 
+    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
+    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
+    dividing it by the channel count, and then do the same with the return value.
+  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
+    the changes to drflac_read_*() apply.
+  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
+    the changes to drflac_read_*() apply.
+  - Optimizations.
+
+v0.10.0 - 2018-09-11
+  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
+    need to do it yourself via the callback API.
+  - Fix the clang build.
+  - Fix undefined behavior.
+  - Fix errors with CUESHEET metdata blocks.
+  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
+    Vorbis comment API.
+  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
+  - Minor optimizations.
+
+v0.9.11 - 2018-08-29
+  - Fix a bug with sample reconstruction.
+
+v0.9.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.9.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.9.8 - 2018-07-24
+  - Fix compilation errors.
+
+v0.9.7 - 2018-07-05
+  - Fix a warning.
+
+v0.9.6 - 2018-06-29
+  - Fix some typos.
+
+v0.9.5 - 2018-06-23
+  - Fix some warnings.
+
+v0.9.4 - 2018-06-14
+  - Optimizations to seeking.
+  - Clean up.
+
+v0.9.3 - 2018-05-22
+  - Bug fix.
+
+v0.9.2 - 2018-05-12
+  - Fix a compilation error due to a missing break statement.
+
+v0.9.1 - 2018-04-29
+  - Fix compilation error with Clang.
+
+v0.9 - 2018-04-24
+  - Fix Clang build.
+  - Start using major.minor.revision versioning.
+
+v0.8g - 2018-04-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8f - 2018-02-02
+  - Stop pretending to support changing rate/channels mid stream.
+
+v0.8e - 2018-02-01
+  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
+  - Fix a crash the the Rice partition order is invalid.
+
+v0.8d - 2017-09-22
+  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+
+v0.8c - 2017-09-07
+  - Fix warning on non-x86/x64 architectures.
+
+v0.8b - 2017-08-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8a - 2017-08-13
+  - A small optimization for the Clang build.
+
+v0.8 - 2017-08-12
+  - API CHANGE: Rename dr_* types to drflac_*.
+  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add CRC checking to Ogg encapsulated streams.
+  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+  - Bug fixes.
+
+v0.7 - 2017-07-23
+  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+
+v0.6 - 2017-07-22
+  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+
+v0.5 - 2017-07-16
+  - Fix typos.
+  - Change drflac_bool* types to unsigned.
+  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+
+v0.4f - 2017-03-10
+  - Fix a couple of bugs with the bitstreaming code.
+
+v0.4e - 2017-02-17
+  - Fix some warnings.
+
+v0.4d - 2016-12-26
+  - Add support for 32-bit floating-point PCM decoding.
+  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
+  - Minor improvements to documentation.
+
+v0.4c - 2016-12-26
+  - Add support for signed 16-bit integer PCM decoding.
+
+v0.4b - 2016-10-23
+  - A minor change to drflac_bool8 and drflac_bool32 types.
+
+v0.4a - 2016-10-11
+  - Rename drBool32 to drflac_bool32 for styling consistency.
+
+v0.4 - 2016-09-29
+  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
+  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
+  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
+    keep it consistent with drflac_audio.
+
+v0.3f - 2016-09-21
+  - Fix a warning with GCC.
+
+v0.3e - 2016-09-18
+  - Fixed a bug where GCC 4.3+ was not getting properly identified.
+  - Fixed a few typos.
+  - Changed date formats to ISO 8601 (YYYY-MM-DD).
+
+v0.3d - 2016-06-11
+  - Minor clean up.
+
+v0.3c - 2016-05-28
+  - Fixed compilation error.
+
+v0.3b - 2016-05-16
+  - Fixed Linux/GCC build.
+  - Updated documentation.
+
+v0.3a - 2016-05-15
+  - Minor fixes to documentation.
+
+v0.3 - 2016-05-11
+  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
+  - Lots of clean up.
+
+v0.2b - 2016-05-10
+  - Bug fixes.
+
+v0.2a - 2016-05-10
+  - Made drflac_open_and_decode() more robust.
+  - Removed an unused debugging variable
+
+v0.2 - 2016-05-09
+  - Added support for Ogg encapsulation.
+  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
+    should be relative to the start or the current position. Also changes the seeking rules such that
+    seeking offsets will never be negative.
+  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
+
+v0.1b - 2016-05-07
+  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
+  - Removed a stale comment.
+
+v0.1a - 2016-05-05
+  - Minor formatting changes.
+  - Fixed a warning on the GCC build.
+
+v0.1 - 2016-05-03
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
 This is free and unencumbered software released into the public domain.
 
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
 
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2018 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 */
diff --git a/src/external/dr_mp3.h b/src/external/dr_mp3.h
index 070f0c15..26aeec56 100644
--- a/src/external/dr_mp3.h
+++ b/src/external/dr_mp3.h
@@ -1,57 +1,61 @@
-// MP3 audio decoder. Public domain. See "unlicense" statement at the end of this file.
-// dr_mp3 - v0.4.0 - 2018-xx-xx
-//
-// David Reid - mackron@gmail.com
-//
-// Based off minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for
-// differences between minimp3 and dr_mp3.
+/*
+MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_mp3 - v0.4.4 - 2019-05-06
 
-// USAGE
-// =====
-// dr_mp3 is a single-file library. To use it, do something like the following in one .c file.
-//     #define DR_MP3_IMPLEMENTATION
-//     #include "dr_mp3.h"
-//
-// You can then #include this file in other parts of the program as you would with any other header file. To decode audio data,
-// do something like the following:
-//
-//     drmp3 mp3;
-//     if (!drmp3_init_file(&mp3, "MySong.mp3", NULL)) {
-//         // Failed to open file
-//     }
-//
-//     ...
-//
-//     drmp3_uint64 framesRead = drmp3_read_f32(pMP3, framesToRead, pFrames);
-//
-// The drmp3 object is transparent so you can get access to the channel count and sample rate like so:
-//
-//     drmp3_uint32 channels = mp3.channels;
-//     drmp3_uint32 sampleRate = mp3.sampleRate;
-//
-// The third parameter of drmp3_init_file() in the example above allows you to control the output channel count and sample rate. It
-// is a pointer to a drmp3_config object. Setting any of the variables of this object to 0 will cause dr_mp3 to use defaults.
-//
-// The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek
-// callbacks with drmp3_init_memory() and drmp3_init() respectively.
-//
-// You do need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request
-// any number of PCM frames in each call to drmp3_read_f32() and it will return as many PCM frames as it can, up to the requested
-// amount.
-//
-// You can also decode an entire file in one go with drmp3_open_and_decode_f32(), drmp3_open_and_decode_memory_f32() and
-// drmp3_open_and_decode_file_f32().
-//
-//
-// OPTIONS
-// =======
-// #define these options before including this file.
-//
-// #define DR_MP3_NO_STDIO
-//   Disable drmp3_init_file(), etc.
-//
-// #define DR_MP3_NO_SIMD
-//   Disable SIMD optimizations.
+David Reid - mackron@gmail.com
+
+Based off minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for
+differences between minimp3 and dr_mp3.
+*/
+
+/*
+USAGE
+=====
+dr_mp3 is a single-file library. To use it, do something like the following in one .c file.
+    #define DR_MP3_IMPLEMENTATION
+    #include "dr_mp3.h"
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data,
+do something like the following:
+
+    drmp3 mp3;
+    if (!drmp3_init_file(&mp3, "MySong.mp3", NULL)) {
+        // Failed to open file
+    }
+
+    ...
+
+    drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToRead, pFrames);
+
+The drmp3 object is transparent so you can get access to the channel count and sample rate like so:
+
+    drmp3_uint32 channels = mp3.channels;
+    drmp3_uint32 sampleRate = mp3.sampleRate;
+
+The third parameter of drmp3_init_file() in the example above allows you to control the output channel count and sample rate. It
+is a pointer to a drmp3_config object. Setting any of the variables of this object to 0 will cause dr_mp3 to use defaults.
+
+The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek
+callbacks with drmp3_init_memory() and drmp3_init() respectively.
+
+You do not need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request
+any number of PCM frames in each call to drmp3_read_pcm_frames_f32() and it will return as many PCM frames as it can, up to the
+requested amount.
+
+You can also decode an entire file in one go with drmp3_open_and_read_f32(), drmp3_open_memory_and_read_f32() and
+drmp3_open_file_and_read_f32().
+
+
+OPTIONS
+=======
+#define these options before including this file.
+
+#define DR_MP3_NO_STDIO
+  Disable drmp3_init_file(), etc.
+
+#define DR_MP3_NO_SIMD
+  Disable SIMD optimizations.
+*/
 
 #ifndef dr_mp3_h
 #define dr_mp3_h
@@ -90,9 +94,20 @@ typedef drmp3_uint32     drmp3_bool32;
 #define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME  1152
 #define DRMP3_MAX_SAMPLES_PER_FRAME         (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2)
 
+#ifdef _MSC_VER
+#define DRMP3_INLINE __forceinline
+#else
+#ifdef __GNUC__
+#define DRMP3_INLINE __inline__ __attribute__((always_inline))
+#else
+#define DRMP3_INLINE
+#endif
+#endif
 
-// Low Level Push API
-// ==================
+/*
+Low Level Push API
+==================
+*/
 typedef struct
 {
     int frame_bytes, channels, hz, layer, bitrate_kbps;
@@ -105,23 +120,30 @@ typedef struct
     unsigned char header[4], reserv_buf[511];
 } drmp3dec;
 
-// Initializes a low level decoder.
+/* Initializes a low level decoder. */
 void drmp3dec_init(drmp3dec *dec);
 
-// Reads a frame from a low level decoder.
+/* Reads a frame from a low level decoder. */
 int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info);
 
-// Helper for converting between f32 and s16.
+/* Helper for converting between f32 and s16. */
 void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples);
 
 
 
-
-// Main API (Pull API)
-// ===================
+/*
+Main API (Pull API)
+===================
+*/
+#ifndef DR_MP3_DEFAULT_CHANNELS
+#define DR_MP3_DEFAULT_CHANNELS         2
+#endif
+#ifndef DR_MP3_DEFAULT_SAMPLE_RATE
+#define DR_MP3_DEFAULT_SAMPLE_RATE      44100
+#endif
 
 typedef struct drmp3_src drmp3_src;
-typedef drmp3_uint64 (* drmp3_src_read_proc)(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData); // Returns the number of frames that were read.
+typedef drmp3_uint64 (* drmp3_src_read_proc)(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData); /* Returns the number of frames that were read. */
 
 typedef enum
 {
@@ -144,7 +166,7 @@ typedef struct
     drmp3_uint32 sampleRateOut;
     drmp3_uint32 channels;
     drmp3_src_algorithm algorithm;
-    drmp3_uint32 cacheSizeInFrames;  // The number of frames to read from the client at a time.
+    drmp3_uint32 cacheSizeInFrames;  /* The number of frames to read from the client at a time. */
 } drmp3_src_config;
 
 struct drmp3_src
@@ -153,12 +175,12 @@ struct drmp3_src
     drmp3_src_read_proc onRead;
     void* pUserData;
     float bin[256];
-    drmp3_src_cache cache;    // <-- For simplifying and optimizing client -> memory reading.
+    drmp3_src_cache cache;    /* <-- For simplifying and optimizing client -> memory reading. */
     union
     {
         struct
         {
-            float alpha;
+            double alpha;
             drmp3_bool32 isPrevFramesLoaded : 1;
             drmp3_bool32 isNextFramesLoaded : 1;
         } linear;
@@ -171,28 +193,40 @@ typedef enum
     drmp3_seek_origin_current
 } drmp3_seek_origin;
 
-// Callback for when data is read. Return value is the number of bytes actually read.
-//
-// pUserData   [in]  The user data that was passed to drmp3_init(), drmp3_open() and family.
-// pBufferOut  [out] The output buffer.
-// bytesToRead [in]  The number of bytes to read.
-//
-// Returns the number of bytes actually read.
-//
-// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
-// either the entire bytesToRead is filled or you have reached the end of the stream.
+typedef struct
+{
+    drmp3_uint64 seekPosInBytes;        /* Points to the first byte of an MP3 frame. */
+    drmp3_uint64 pcmFrameIndex;         /* The index of the PCM frame this seek point targets. */
+    drmp3_uint16 mp3FramesToDiscard;    /* The number of whole MP3 frames to be discarded before pcmFramesToDiscard. */
+    drmp3_uint16 pcmFramesToDiscard;    /* The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. */
+} drmp3_seek_point;
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drmp3_init(), drmp3_open() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
 typedef size_t (* drmp3_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
 
-// Callback for when data needs to be seeked.
-//
-// pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family.
-// offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-// origin    [in] The origin of the seek - the current position or the start of the stream.
-//
-// Returns whether or not the seek was successful.
-//
-// Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
-// will be either drmp3_seek_origin_start or drmp3_seek_origin_current.
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
+will be either drmp3_seek_origin_start or drmp3_seek_origin_current.
+*/
 typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin);
 
 typedef struct
@@ -210,13 +244,16 @@ typedef struct
     drmp3_read_proc onRead;
     drmp3_seek_proc onSeek;
     void* pUserData;
-    drmp3_uint32 mp3FrameChannels;      // The number of channels in the currently loaded MP3 frame. Internal use only.
-    drmp3_uint32 mp3FrameSampleRate;    // The sample rate of the currently loaded MP3 frame. Internal use only.
+    drmp3_uint32 mp3FrameChannels;      /* The number of channels in the currently loaded MP3 frame. Internal use only. */
+    drmp3_uint32 mp3FrameSampleRate;    /* The sample rate of the currently loaded MP3 frame. Internal use only. */
     drmp3_uint32 pcmFramesConsumedInMP3Frame;
     drmp3_uint32 pcmFramesRemainingInMP3Frame;
-    drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME];  // <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT.
-    drmp3_uint64 currentPCMFrame;       // The current PCM frame, globally, based on the output sample rate. Mainly used for seeking.
+    drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME];  /* <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. */
+    drmp3_uint64 currentPCMFrame;       /* The current PCM frame, globally, based on the output sample rate. Mainly used for seeking. */
+    drmp3_uint64 streamCursor;          /* The current byte the decoder is sitting on in the raw stream. */
     drmp3_src src;
+    drmp3_seek_point* pSeekPoints;      /* NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. */
+    drmp3_uint32 seekPointCount;        /* The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. */
     size_t dataSize;
     size_t dataCapacity;
     drmp3_uint8* pData;
@@ -226,94 +263,155 @@ typedef struct
         const drmp3_uint8* pData;
         size_t dataSize;
         size_t currentReadPos;
-    } memory;   // Only used for decoders that were opened against a block of memory.
+    } memory;   /* Only used for decoders that were opened against a block of memory. */
 } drmp3;
 
-// Initializes an MP3 decoder.
-//
-// onRead    [in]           The function to call when data needs to be read from the client.
-// onSeek    [in]           The function to call when the read position of the client data needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-//
-// Returns true if successful; false otherwise.
-//
-// Close the loader with drmp3_uninit().
-//
-// See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit()
+/*
+Initializes an MP3 decoder.
+
+onRead    [in]           The function to call when data needs to be read from the client.
+onSeek    [in]           The function to call when the read position of the client data needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drmp3_uninit().
+
+See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit()
+*/
 drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig);
 
-// Initializes an MP3 decoder from a block of memory.
-//
-// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-// the lifetime of the drmp3 object.
-//
-// The buffer should contain the contents of the entire MP3 file.
+/*
+Initializes an MP3 decoder from a block of memory.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drmp3 object.
+
+The buffer should contain the contents of the entire MP3 file.
+*/
 drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_config* pConfig);
 
 #ifndef DR_MP3_NO_STDIO
-// Initializes an MP3 decoder from a file.
-//
-// This holds the internal FILE object until drmp3_uninit() is called. Keep this in mind if you're caching drmp3
-// objects because the operating system may restrict the number of file handles an application can have open at
-// any given time.
+/*
+Initializes an MP3 decoder from a file.
+
+This holds the internal FILE object until drmp3_uninit() is called. Keep this in mind if you're caching drmp3
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
 drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* filePath, const drmp3_config* pConfig);
 #endif
 
-// Uninitializes an MP3 decoder.
+/*
+Uninitializes an MP3 decoder.
+*/
 void drmp3_uninit(drmp3* pMP3);
 
-// Reads PCM frames as interleaved 32-bit IEEE floating point PCM.
-//
-// Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames.
+/*
+Reads PCM frames as interleaved 32-bit IEEE floating point PCM.
+
+Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames.
+*/
 drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut);
 
-// Seeks to a specific frame.
-//
-// Note that this is _not_ an MP3 frame, but rather a PCM frame.
+/*
+Reads PCM frames as interleaved signed 16-bit integer PCM.
+
+Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames.
+*/
+drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut);
+
+/*
+Seeks to a specific frame.
+
+Note that this is _not_ an MP3 frame, but rather a PCM frame.
+*/
 drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex);
 
-// Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet
-// radio. Runs in linear time. Returns 0 on error.
+/*
+Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+*/
 drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3);
 
-// Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet
-// radio. Runs in linear time. Returns 0 on error.
+/*
+Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+*/
 drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3);
 
+/*
+Calculates the total number of MP3 and PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+
+This is equivalent to calling drmp3_get_mp3_frame_count() and drmp3_get_pcm_frame_count() except that it's more efficient.
+*/
+drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount);
+
+/*
+Calculates the seekpoints based on PCM frames. This is slow.
+
+pSeekpoint count is a pointer to a uint32 containing the seekpoint count. On input it contains the desired count.
+On output it contains the actual count. The reason for this design is that the client may request too many
+seekpoints, in which case dr_mp3 will return a corrected count.
+
+Note that seektable seeking is not quite sample exact when the MP3 stream contains inconsistent sample rates.
+*/
+drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints);
+
+/*
+Binds a seek table to the decoder.
+
+This does _not_ make a copy of pSeekPoints - it only references it. It is up to the application to ensure this
+remains valid while it is bound to the decoder.
+
+Use drmp3_calculate_seek_points() to calculate the seek points.
+*/
+drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints);
 
 
-// Opens an decodes an entire MP3 stream as a single operation.
-//
-// pConfig is both an input and output. On input it contains what you want. On output it contains what you got.
-//
-// Free the returned pointer with drmp3_free().
+/*
+Opens an decodes an entire MP3 stream as a single operation.
+
+pConfig is both an input and output. On input it contains what you want. On output it contains what you got.
+
+Free the returned pointer with drmp3_free().
+*/
 float* drmp3_open_and_read_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
+drmp3_int16* drmp3_open_and_read_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
+
 float* drmp3_open_memory_and_read_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
+drmp3_int16* drmp3_open_memory_and_read_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
+
 #ifndef DR_MP3_NO_STDIO
 float* drmp3_open_file_and_read_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
+drmp3_int16* drmp3_open_file_and_read_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount);
 #endif
 
-// Frees any memory that was allocated by a public drmp3 API.
+/*
+Frees any memory that was allocated by a public drmp3 API.
+*/
 void drmp3_free(void* p);
 
 #ifdef __cplusplus
 }
 #endif
-#endif  // dr_mp3_h
+#endif  /* dr_mp3_h */
 
 
-/////////////////////////////////////////////////////
-//
-// IMPLEMENTATION
-//
-/////////////////////////////////////////////////////
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
 #ifdef DR_MP3_IMPLEMENTATION
 #include <stdlib.h>
 #include <string.h>
-#include <stdint.h>
-#include <limits.h> // For INT_MAX
+#include <limits.h> /* For INT_MAX */
 
-// Disable SIMD when compiling with TCC for now.
+/* Disable SIMD when compiling with TCC for now. */
 #if defined(__TINYC__)
 #define DR_MP3_NO_SIMD
 #endif
@@ -365,7 +463,7 @@ void drmp3_free(void* p);
 #define DR_MP3_ONLY_SIMD
 #endif
 
-#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
+#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
 #if defined(_MSC_VER)
 #include <intrin.h>
 #endif
@@ -780,8 +878,8 @@ static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drm
 
     unsigned tables, scfsi = 0;
     int main_data_begin, part_23_sum = 0;
-    int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
     int gr_count = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
+    int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
 
     if (DRMP3_HDR_TEST_MPEG1(hdr))
     {
@@ -1070,7 +1168,7 @@ static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *g
                         lsb += DRMP3_PEEK_BITS(linbits);
                         DRMP3_FLUSH_BITS(linbits);
                         DRMP3_CHECK_BITS;
-                        *dst = one*drmp3_L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
+                        *dst = one*drmp3_L3_pow_43(lsb)*((drmp3_int32)bs_cache < 0 ? -1: 1);
                     } else
                     {
                         *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
@@ -1654,9 +1752,10 @@ typedef drmp3_int16 drmp3d_sample_t;
 
 static drmp3_int16 drmp3d_scale_pcm(float sample)
 {
+    drmp3_int16 s;
     if (sample >=  32766.5) return (drmp3_int16) 32767;
     if (sample <= -32767.5) return (drmp3_int16)-32768;
-    drmp3_int16 s = (drmp3_int16)(sample + .5f);
+    s = (drmp3_int16)(sample + .5f);
     s -= (s < 0);   /* away from zero, to be compliant */
     return (drmp3_int16)s;
 }
@@ -1964,11 +2063,6 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes
     info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr);
     info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr);
 
-    if (!pcm)
-    {
-        return drmp3_hdr_frame_samples(hdr);
-    }
-
     drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE);
     if (DRMP3_HDR_IS_CRC(hdr))
     {
@@ -1984,7 +2078,7 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes
             return 0;
         }
         success = drmp3_L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
-        if (success)
+        if (success && pcm != NULL)
         {
             for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
             {
@@ -2000,6 +2094,11 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes
         return 0;
 #else
         drmp3_L12_scale_info sci[1];
+
+        if (pcm == NULL) {
+            return drmp3_hdr_frame_samples(hdr);
+        }
+
         drmp3_L12_read_scale_info(hdr, bs_frame, sci);
 
         memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
@@ -2021,6 +2120,7 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes
         }
 #endif
     }
+
     return success*drmp3_hdr_frame_samples(dec->header);
 }
 
@@ -2085,11 +2185,11 @@ void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples)
 
 
 
-///////////////////////////////////////////////////////////////////////////////
-//
-// Main Public API
-//
-///////////////////////////////////////////////////////////////////////////////
+/************************************************************************************************************************************************************
+
+ Main Public API
+
+ ************************************************************************************************************************************************************/
 
 #if defined(SIZE_MAX)
     #define DRMP3_SIZE_MAX  SIZE_MAX
@@ -2101,16 +2201,13 @@ void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples)
     #endif
 #endif
 
-// Options.
-#ifndef DR_MP3_DEFAULT_CHANNELS
-#define DR_MP3_DEFAULT_CHANNELS      2
-#endif
-#ifndef DR_MP3_DEFAULT_SAMPLE_RATE
-#define DR_MP3_DEFAULT_SAMPLE_RATE   44100
+/* Options. */
+#ifndef DRMP3_SEEK_LEADING_MP3_FRAMES
+#define DRMP3_SEEK_LEADING_MP3_FRAMES   2
 #endif
 
 
-// Standard library stuff.
+/* Standard library stuff. */
 #ifndef DRMP3_ASSERT
 #include <assert.h>
 #define DRMP3_ASSERT(expression)           assert(expression)
@@ -2143,16 +2240,17 @@ void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples)
 #define drmp3_max(x, y)   (((x) > (y)) ? (x) : (y))
 #define drmp3_min(x, y)   (((x) < (y)) ? (x) : (y))
 
-#define DRMP3_DATA_CHUNK_SIZE  16384    // The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends 16K.
+#define DRMP3_DATA_CHUNK_SIZE  16384    /* The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends 16K. */
 
-static inline float drmp3_mix_f32(float x, float y, float a)
+static DRMP3_INLINE float drmp3_mix_f32(float x, float y, float a)
 {
     return x*(1-a) + y*a;
 }
 
 static void drmp3_blend_f32(float* pOut, float* pInA, float* pInB, float factor, drmp3_uint32 channels)
 {
-    for (drmp3_uint32 i = 0; i < channels; ++i) {
+    drmp3_uint32 i;
+    for (i = 0; i < channels; ++i) {
         pOut[i] = drmp3_mix_f32(pInA[i], pInB[i], factor);
     }
 }
@@ -2169,17 +2267,20 @@ void drmp3_src_cache_init(drmp3_src* pSRC, drmp3_src_cache* pCache)
 
 drmp3_uint64 drmp3_src_cache_read_frames(drmp3_src_cache* pCache, drmp3_uint64 frameCount, float* pFramesOut)
 {
+    drmp3_uint32 channels;
+    drmp3_uint64 totalFramesRead = 0;
+
     drmp3_assert(pCache != NULL);
     drmp3_assert(pCache->pSRC != NULL);
     drmp3_assert(pCache->pSRC->onRead != NULL);
     drmp3_assert(frameCount > 0);
     drmp3_assert(pFramesOut != NULL);
 
-    drmp3_uint32 channels = pCache->pSRC->config.channels;
+    channels = pCache->pSRC->config.channels;
 
-    drmp3_uint64 totalFramesRead = 0;
     while (frameCount > 0) {
-        // If there's anything in memory go ahead and copy that over first.
+        /* If there's anything in memory go ahead and copy that over first. */
+        drmp3_uint32 framesToReadFromClient;
         drmp3_uint64 framesRemainingInMemory = pCache->cachedFrameCount - pCache->iNextFrame;
         drmp3_uint64 framesToReadFromMemory = frameCount;
         if (framesToReadFromMemory > framesRemainingInMemory) {
@@ -2196,14 +2297,14 @@ drmp3_uint64 drmp3_src_cache_read_frames(drmp3_src_cache* pCache, drmp3_uint64 f
         }
 
 
-        // At this point there are still more frames to read from the client, so we'll need to reload the cache with fresh data.
+        /* At this point there are still more frames to read from the client, so we'll need to reload the cache with fresh data. */
         drmp3_assert(frameCount > 0);
         pFramesOut += framesToReadFromMemory * channels;
 
         pCache->iNextFrame = 0;
         pCache->cachedFrameCount = 0;
 
-        drmp3_uint32 framesToReadFromClient = drmp3_countof(pCache->pCachedFrames) / pCache->pSRC->config.channels;
+        framesToReadFromClient = drmp3_countof(pCache->pCachedFrames) / pCache->pSRC->config.channels;
         if (framesToReadFromClient > pCache->pSRC->config.cacheSizeInFrames) {
             framesToReadFromClient = pCache->pSRC->config.cacheSizeInFrames;
         }
@@ -2211,7 +2312,7 @@ drmp3_uint64 drmp3_src_cache_read_frames(drmp3_src_cache* pCache, drmp3_uint64 f
         pCache->cachedFrameCount = (drmp3_uint32)pCache->pSRC->onRead(pCache->pSRC, framesToReadFromClient, pCache->pCachedFrames, pCache->pSRC->pUserData);
 
 
-        // Get out of this loop if nothing was able to be retrieved.
+        /* Get out of this loop if nothing was able to be retrieved. */
         if (pCache->cachedFrameCount == 0) {
             break;
         }
@@ -2226,11 +2327,19 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou
 
 drmp3_bool32 drmp3_src_init(const drmp3_src_config* pConfig, drmp3_src_read_proc onRead, void* pUserData, drmp3_src* pSRC)
 {
-    if (pSRC == NULL) return DRMP3_FALSE;
+    if (pSRC == NULL) {
+        return DRMP3_FALSE;
+    }
+
     drmp3_zero_object(pSRC);
 
-    if (pConfig == NULL || onRead == NULL) return DRMP3_FALSE;
-    if (pConfig->channels == 0 || pConfig->channels > 2) return DRMP3_FALSE;
+    if (pConfig == NULL || onRead == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    if (pConfig->channels == 0 || pConfig->channels > 2) {
+        return DRMP3_FALSE;
+    }
 
     pSRC->config = *pConfig;
     pSRC->onRead = onRead;
@@ -2246,9 +2355,11 @@ drmp3_bool32 drmp3_src_init(const drmp3_src_config* pConfig, drmp3_src_read_proc
 
 drmp3_bool32 drmp3_src_set_input_sample_rate(drmp3_src* pSRC, drmp3_uint32 sampleRateIn)
 {
-    if (pSRC == NULL) return DRMP3_FALSE;
+    if (pSRC == NULL) {
+        return DRMP3_FALSE;
+    }
 
-    // Must have a sample rate of > 0.
+    /* Must have a sample rate of > 0. */
     if (sampleRateIn == 0) {
         return DRMP3_FALSE;
     }
@@ -2259,9 +2370,11 @@ drmp3_bool32 drmp3_src_set_input_sample_rate(drmp3_src* pSRC, drmp3_uint32 sampl
 
 drmp3_bool32 drmp3_src_set_output_sample_rate(drmp3_src* pSRC, drmp3_uint32 sampleRateOut)
 {
-    if (pSRC == NULL) return DRMP3_FALSE;
+    if (pSRC == NULL) {
+        return DRMP3_FALSE;
+    }
 
-    // Must have a sample rate of > 0.
+    /* Must have a sample rate of > 0. */
     if (sampleRateOut == 0) {
         return DRMP3_FALSE;
     }
@@ -2272,16 +2385,20 @@ drmp3_bool32 drmp3_src_set_output_sample_rate(drmp3_src* pSRC, drmp3_uint32 samp
 
 drmp3_uint64 drmp3_src_read_frames_ex(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush)
 {
-    if (pSRC == NULL || frameCount == 0 || pFramesOut == NULL) return 0;
+    drmp3_src_algorithm algorithm;
 
-    drmp3_src_algorithm algorithm = pSRC->config.algorithm;
+    if (pSRC == NULL || frameCount == 0 || pFramesOut == NULL) {
+        return 0;
+    }
 
-    // Always use passthrough if the sample rates are the same.
+    algorithm = pSRC->config.algorithm;
+
+    /* Always use passthrough if the sample rates are the same. */
     if (pSRC->config.sampleRateIn == pSRC->config.sampleRateOut) {
         algorithm = drmp3_src_algorithm_none;
     }
 
-    // Could just use a function pointer instead of a switch for this...
+    /* Could just use a function pointer instead of a switch for this... */
     switch (algorithm)
     {
         case drmp3_src_algorithm_none:   return drmp3_src_read_frames_passthrough(pSRC, frameCount, pFramesOut, flush);
@@ -2301,19 +2418,22 @@ drmp3_uint64 drmp3_src_read_frames_passthrough(drmp3_src* pSRC, drmp3_uint64 fra
     drmp3_assert(frameCount > 0);
     drmp3_assert(pFramesOut != NULL);
 
-    (void)flush;    // Passthrough need not care about flushing.
+    (void)flush;    /* Passthrough need not care about flushing. */
     return pSRC->onRead(pSRC, frameCount, pFramesOut, pSRC->pUserData);
 }
 
 drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, drmp3_bool32 flush)
 {
+    double factor;
+    drmp3_uint64 totalFramesRead;
+
     drmp3_assert(pSRC != NULL);
     drmp3_assert(frameCount > 0);
     drmp3_assert(pFramesOut != NULL);
 
-    // For linear SRC, the bin is only 2 frames: 1 prior, 1 future.
+    /* For linear SRC, the bin is only 2 frames: 1 prior, 1 future. */
 
-    // Load the bin if necessary.
+    /* Load the bin if necessary. */
     if (!pSRC->algo.linear.isPrevFramesLoaded) {
         drmp3_uint64 framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pSRC->bin);
         if (framesRead == 0) {
@@ -2329,31 +2449,38 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou
         pSRC->algo.linear.isNextFramesLoaded = DRMP3_TRUE;
     }
 
-    float factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
+    factor = (double)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
 
-    drmp3_uint64 totalFramesRead = 0;
+    totalFramesRead = 0;
     while (frameCount > 0) {
-        // The bin is where the previous and next frames are located.
+        drmp3_uint32 i;
+        drmp3_uint32 framesToReadFromClient;
+
+        /* The bin is where the previous and next frames are located. */
         float* pPrevFrame = pSRC->bin;
         float* pNextFrame = pSRC->bin + pSRC->config.channels;
 
-        drmp3_blend_f32((float*)pFramesOut, pPrevFrame, pNextFrame, pSRC->algo.linear.alpha, pSRC->config.channels);
+        drmp3_blend_f32((float*)pFramesOut, pPrevFrame, pNextFrame, (float)pSRC->algo.linear.alpha, pSRC->config.channels);
 
         pSRC->algo.linear.alpha += factor;
 
-        // The new alpha value is how we determine whether or not we need to read fresh frames.
-        drmp3_uint32 framesToReadFromClient = (drmp3_uint32)pSRC->algo.linear.alpha;
+        /* The new alpha value is how we determine whether or not we need to read fresh frames. */
+        framesToReadFromClient = (drmp3_uint32)pSRC->algo.linear.alpha;
         pSRC->algo.linear.alpha = pSRC->algo.linear.alpha - framesToReadFromClient;
 
-        for (drmp3_uint32 i = 0; i < framesToReadFromClient; ++i) {
-            for (drmp3_uint32 j = 0; j < pSRC->config.channels; ++j) {
+        for (i = 0; i < framesToReadFromClient; ++i) {
+            drmp3_uint64 framesRead;
+            drmp3_uint32 j;
+
+            for (j = 0; j < pSRC->config.channels; ++j) {
                 pPrevFrame[j] = pNextFrame[j];
             }
 
-            drmp3_uint64 framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pNextFrame);
+            framesRead = drmp3_src_cache_read_frames(&pSRC->cache, 1, pNextFrame);
             if (framesRead == 0) {
-                for (drmp3_uint32 j = 0; j < pSRC->config.channels; ++j) {
-                    pNextFrame[j] = 0;
+                drmp3_uint32 k;
+                for (k = 0; k < pSRC->config.channels; ++k) {
+                    pNextFrame[k] = 0;
                 }
 
                 if (pSRC->algo.linear.isNextFramesLoaded) {
@@ -2372,7 +2499,7 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou
         frameCount -= 1;
         totalFramesRead += 1;
 
-        // If there's no frames available we need to get out of this loop.
+        /* If there's no frames available we need to get out of this loop. */
         if (!pSRC->algo.linear.isNextFramesLoaded && (!flush || !pSRC->algo.linear.isPrevFramesLoaded)) {
             break;
         }
@@ -2384,152 +2511,93 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou
 
 static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead)
 {
-    return pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead);
+    size_t bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead);
+    pMP3->streamCursor += bytesRead;
+    return bytesRead;
 }
 
 static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin)
 {
     drmp3_assert(offset >= 0);
-    return pMP3->onSeek(pMP3->pUserData, offset, origin);
-}
 
-
-static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
-{
-    drmp3_assert(pMP3 != NULL);
-    drmp3_assert(pMP3->onRead != NULL);
-
-    if (pMP3->atEnd) {
-        return 0;
+    if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) {
+        return DRMP3_FALSE;
     }
 
-    drmp3_uint32 pcmFramesRead = 0;
-    do {
-        // minimp3 recommends doing data submission in 16K chunks. If we don't have at least 16K bytes available, get more.
-        if (pMP3->dataSize < DRMP3_DATA_CHUNK_SIZE) {
-            if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) {
-                pMP3->dataCapacity = DRMP3_DATA_CHUNK_SIZE;
-                drmp3_uint8* pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity);
-                if (pNewData == NULL) {
-                    return 0; // Out of memory.
-                }
+    if (origin == drmp3_seek_origin_start) {
+        pMP3->streamCursor = (drmp3_uint64)offset;
+    } else {
+        pMP3->streamCursor += offset;
+    }
 
-                pMP3->pData = pNewData;
+    return DRMP3_TRUE;
+}
+
+static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_seek_origin origin)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return drmp3__on_seek(pMP3, (int)offset, origin);
+    }
+
+
+    /* Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. */
+    if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE;
+    }
+
+    offset -= 0x7FFFFFFF;
+    while (offset > 0) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!drmp3__on_seek(pMP3, (int)offset, drmp3_seek_origin_current)) {
+                return DRMP3_FALSE;
             }
-
-            size_t bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
-            if (bytesRead == 0) {
-                if (pMP3->dataSize == 0) {
-                    pMP3->atEnd = DRMP3_TRUE;
-                    return 0; // No data.
-                }
-            }
-
-            pMP3->dataSize += bytesRead;
-        }
-
-        if (pMP3->dataSize > INT_MAX) {
-            pMP3->atEnd = DRMP3_TRUE;
-            return 0; // File too big.
-        }
-
-        drmp3dec_frame_info info;
-        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData, (int)pMP3->dataSize, pPCMFrames, &info);    // <-- Safe size_t -> int conversion thanks to the check above.
-        if (pcmFramesRead != 0) {
-            size_t leftoverDataSize = (pMP3->dataSize - (size_t)info.frame_bytes);
-            for (size_t i = 0; i < leftoverDataSize; ++i) {
-                pMP3->pData[i] = pMP3->pData[i + (size_t)info.frame_bytes];
-            }
-                
-            pMP3->dataSize = leftoverDataSize;
-            pMP3->pcmFramesConsumedInMP3Frame = 0;
-            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
-            pMP3->mp3FrameChannels = info.channels;
-            pMP3->mp3FrameSampleRate = info.hz;
-            drmp3_src_set_input_sample_rate(&pMP3->src, pMP3->mp3FrameSampleRate);
-            break;
+            offset = 0;
         } else {
-            // Need more data. minimp3 recommends doing data submission in 16K chunks.
-            if (pMP3->dataCapacity == pMP3->dataSize) {
-                // No room. Expand.
-                pMP3->dataCapacity += DRMP3_DATA_CHUNK_SIZE;
-                drmp3_uint8* pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity);
-                if (pNewData == NULL) {
-                    return 0; // Out of memory.
-                }
-
-                pMP3->pData = pNewData;
+            if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_current)) {
+                return DRMP3_FALSE;
             }
-
-            // Fill in a chunk.
-            size_t bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
-            if (bytesRead == 0) {
-                pMP3->atEnd = DRMP3_TRUE;
-                return 0; // Error reading more data.
-            }
-
-            pMP3->dataSize += bytesRead;
+            offset -= 0x7FFFFFFF;
         }
-    } while (DRMP3_TRUE);
-
-    return pcmFramesRead;
-}
-
-static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3)
-{
-    drmp3_assert(pMP3 != NULL);
-    return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames);
-}
-
-static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3)
-{
-    drmp3_assert(pMP3 != NULL);
-
-    drmp3_uint32 pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL);
-    if (pcmFrameCount == 0) {
-        return 0;
     }
 
-    // We have essentially just skipped past the frame, so just set the remaining samples to 0.
-    pMP3->currentPCMFrame             += pcmFrameCount;
-    pMP3->pcmFramesConsumedInMP3Frame  = pcmFrameCount;
-    pMP3->pcmFramesRemainingInMP3Frame = 0;
-
-    return pcmFrameCount;
+    return DRMP3_TRUE;
 }
 
+static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3_bool32 discard);
+static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3);
+
 static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData)
 {
     drmp3* pMP3 = (drmp3*)pUserData;
-    drmp3_assert(pMP3 != NULL);
-    drmp3_assert(pMP3->onRead != NULL);
-
     float* pFramesOutF = (float*)pFramesOut;
     drmp3_uint64 totalFramesRead = 0;
 
+    drmp3_assert(pMP3 != NULL);
+    drmp3_assert(pMP3->onRead != NULL);
+
     while (frameCount > 0) {
-        // Read from the in-memory buffer first.
+        /* Read from the in-memory buffer first. */
         while (pMP3->pcmFramesRemainingInMP3Frame > 0 && frameCount > 0) {
             drmp3d_sample_t* frames = (drmp3d_sample_t*)pMP3->pcmFrames;
 #ifndef DR_MP3_FLOAT_OUTPUT
             if (pMP3->mp3FrameChannels == 1) {
                 if (pMP3->channels == 1) {
-                    // Mono -> Mono.
+                    /* Mono -> Mono. */
                     pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f;
                 } else {
-                    // Mono -> Stereo.
+                    /* Mono -> Stereo. */
                     pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f;
                     pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f;
                 }
             } else {
                 if (pMP3->channels == 1) {
-                    // Stereo -> Mono
+                    /* Stereo -> Mono */
                     float sample = 0;
                     sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f;
                     sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f;
                     pFramesOutF[0] = sample * 0.5f;
                 } else {
-                    // Stereo -> Stereo
+                    /* Stereo -> Stereo */
                     pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f;
                     pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f;
                 }
@@ -2537,22 +2605,22 @@ static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, voi
 #else
             if (pMP3->mp3FrameChannels == 1) {
                 if (pMP3->channels == 1) {
-                    // Mono -> Mono.
+                    /* Mono -> Mono. */
                     pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame];
                 } else {
-                    // Mono -> Stereo.
+                    /* Mono -> Stereo. */
                     pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame];
                     pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame];
                 }
             } else {
                 if (pMP3->channels == 1) {
-                    // Stereo -> Mono
+                    /* Stereo -> Mono */
                     float sample = 0;
                     sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0];
                     sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1];
                     pFramesOutF[0] = sample * 0.5f;
                 } else {
-                    // Stereo -> Stereo
+                    /* Stereo -> Stereo */
                     pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0];
                     pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1];
                 }
@@ -2572,8 +2640,10 @@ static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, voi
 
         drmp3_assert(pMP3->pcmFramesRemainingInMP3Frame == 0);
 
-        // At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed
-        // at this point which means we'll also need to update our sample rate conversion pipeline.
+        /*
+        At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed
+        at this point which means we'll also need to update our sample rate conversion pipeline.
+        */
         if (drmp3_decode_next_frame(pMP3) == 0) {
             break;
         }
@@ -2582,42 +2652,8 @@ static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, voi
     return totalFramesRead;
 }
 
-drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig)
+static drmp3_bool32 drmp3_init_src(drmp3* pMP3)
 {
-    drmp3_assert(pMP3 != NULL);
-    drmp3_assert(onRead != NULL);
-
-    // This function assumes the output object has already been reset to 0. Do not do that here, otherwise things will break.
-    drmp3dec_init(&pMP3->decoder);
-
-    // The config can be null in which case we use defaults.
-    drmp3_config config;
-    if (pConfig != NULL) {
-        config = *pConfig;
-    } else {
-        drmp3_zero_object(&config);
-    }
-
-    pMP3->channels = config.outputChannels;
-    if (pMP3->channels == 0) {
-        pMP3->channels = DR_MP3_DEFAULT_CHANNELS;
-    }
-
-    // Cannot have more than 2 channels.
-    if (pMP3->channels > 2) {
-        pMP3->channels = 2;
-    }
-
-    pMP3->sampleRate = config.outputSampleRate;
-    if (pMP3->sampleRate == 0) {
-        pMP3->sampleRate = DR_MP3_DEFAULT_SAMPLE_RATE;
-    }
-
-    pMP3->onRead = onRead;
-    pMP3->onSeek = onSeek;
-    pMP3->pUserData = pUserData;
-
-    // We need a sample rate converter for converting the sample rate from the MP3 frames to the requested output sample rate.
     drmp3_src_config srcConfig;
     drmp3_zero_object(&srcConfig);
     srcConfig.sampleRateIn = DR_MP3_DEFAULT_SAMPLE_RATE;
@@ -2628,11 +2664,190 @@ drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek
         drmp3_uninit(pMP3);
         return DRMP3_FALSE;
     }
+
+    return DRMP3_TRUE;
+}
+
+static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3_bool32 discard)
+{
+    drmp3_uint32 pcmFramesRead = 0;
+
+    drmp3_assert(pMP3 != NULL);
+    drmp3_assert(pMP3->onRead != NULL);
+
+    if (pMP3->atEnd) {
+        return 0;
+    }
+
+    do {
+        drmp3dec_frame_info info;
+        size_t leftoverDataSize;
+
+        /* minimp3 recommends doing data submission in 16K chunks. If we don't have at least 16K bytes available, get more. */
+        if (pMP3->dataSize < DRMP3_DATA_CHUNK_SIZE) {
+            size_t bytesRead;
+
+            if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) {
+                drmp3_uint8* pNewData;
+
+                pMP3->dataCapacity = DRMP3_DATA_CHUNK_SIZE;
+                pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity);
+                if (pNewData == NULL) {
+                    return 0; /* Out of memory. */
+                }
+
+                pMP3->pData = pNewData;
+            }
+
+            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            if (bytesRead == 0) {
+                if (pMP3->dataSize == 0) {
+                    pMP3->atEnd = DRMP3_TRUE;
+                    return 0; /* No data. */
+                }
+            }
+
+            pMP3->dataSize += bytesRead;
+        }
+
+        if (pMP3->dataSize > INT_MAX) {
+            pMP3->atEnd = DRMP3_TRUE;
+            return 0; /* File too big. */
+        }
+
+        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData, (int)pMP3->dataSize, pPCMFrames, &info);    /* <-- Safe size_t -> int conversion thanks to the check above. */
+        
+        /* Consume the data. */
+        leftoverDataSize = (pMP3->dataSize - (size_t)info.frame_bytes);
+        if (info.frame_bytes > 0) {
+            memmove(pMP3->pData, pMP3->pData + info.frame_bytes, leftoverDataSize);
+            pMP3->dataSize = leftoverDataSize;
+        }
+
+        /*
+        pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully
+        decoded the frame. A special case is if we are wanting to discard the frame, in which case we return successfully.
+        */
+        if (pcmFramesRead > 0 || (info.frame_bytes > 0 && discard)) {
+            pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header);
+            pMP3->pcmFramesConsumedInMP3Frame = 0;
+            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
+            pMP3->mp3FrameChannels = info.channels;
+            pMP3->mp3FrameSampleRate = info.hz;
+
+            /* We need to initialize the resampler if we don't yet have the channel count or sample rate. */
+            if (pMP3->channels == 0 || pMP3->sampleRate == 0) {
+                if (pMP3->channels == 0) {
+                    pMP3->channels = info.channels;
+                }
+                if (pMP3->sampleRate == 0) {
+                    pMP3->sampleRate = info.hz;
+                }
+                drmp3_init_src(pMP3);
+            }
+
+            drmp3_src_set_input_sample_rate(&pMP3->src, pMP3->mp3FrameSampleRate);
+            break;
+        } else if (info.frame_bytes == 0) {
+            size_t bytesRead;
+
+            /* Need more data. minimp3 recommends doing data submission in 16K chunks. */
+            if (pMP3->dataCapacity == pMP3->dataSize) {
+                drmp3_uint8* pNewData;
+
+                /* No room. Expand. */
+                pMP3->dataCapacity += DRMP3_DATA_CHUNK_SIZE;
+                pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity);
+                if (pNewData == NULL) {
+                    return 0; /* Out of memory. */
+                }
+
+                pMP3->pData = pNewData;
+            }
+
+            /* Fill in a chunk. */
+            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            if (bytesRead == 0) {
+                pMP3->atEnd = DRMP3_TRUE;
+                return 0; /* Error reading more data. */
+            }
+
+            pMP3->dataSize += bytesRead;
+        }
+    } while (DRMP3_TRUE);
+
+    return pcmFramesRead;
+}
+
+static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3)
+{
+    drmp3_assert(pMP3 != NULL);
+    return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, DRMP3_FALSE);
+}
+
+#if 0
+static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3)
+{
+    drmp3_uint32 pcmFrameCount;
+
+    drmp3_assert(pMP3 != NULL);
+
+    pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL);
+    if (pcmFrameCount == 0) {
+        return 0;
+    }
+
+    /* We have essentially just skipped past the frame, so just set the remaining samples to 0. */
+    pMP3->currentPCMFrame             += pcmFrameCount;
+    pMP3->pcmFramesConsumedInMP3Frame  = pcmFrameCount;
+    pMP3->pcmFramesRemainingInMP3Frame = 0;
+
+    return pcmFrameCount;
+}
+#endif
+
+drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_config* pConfig)
+{
+    drmp3_config config;
+
+    drmp3_assert(pMP3 != NULL);
+    drmp3_assert(onRead != NULL);
+
+    /* This function assumes the output object has already been reset to 0. Do not do that here, otherwise things will break. */
+    drmp3dec_init(&pMP3->decoder);
+
+    /* The config can be null in which case we use defaults. */
+    if (pConfig != NULL) {
+        config = *pConfig;
+    } else {
+        drmp3_zero_object(&config);
+    }
+
+    pMP3->channels = config.outputChannels;
+
+    /* Cannot have more than 2 channels. */
+    if (pMP3->channels > 2) {
+        pMP3->channels = 2;
+    }
+
+    pMP3->sampleRate = config.outputSampleRate;
+
+    pMP3->onRead = onRead;
+    pMP3->onSeek = onSeek;
+    pMP3->pUserData = pUserData;
+
+    /*
+    We need a sample rate converter for converting the sample rate from the MP3 frames to the requested output sample rate. Note that if
+    we don't yet know the channel count or sample rate we defer this until the first frame is read.
+    */
+    if (pMP3->channels != 0 && pMP3->sampleRate != 0) {
+        drmp3_init_src(pMP3);
+    }
     
-    // Decode the first frame to confirm that it is indeed a valid MP3 stream.
+    /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
     if (!drmp3_decode_next_frame(pMP3)) {
         drmp3_uninit(pMP3);
-        return DRMP3_FALSE; // Not a valid MP3 stream.
+        return DRMP3_FALSE; /* Not a valid MP3 stream. */
     }
 
     return DRMP3_TRUE;
@@ -2652,10 +2867,12 @@ drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onS
 static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
 {
     drmp3* pMP3 = (drmp3*)pUserData;
+    size_t bytesRemaining;
+
     drmp3_assert(pMP3 != NULL);
     drmp3_assert(pMP3->memory.dataSize >= pMP3->memory.currentReadPos);
 
-    size_t bytesRemaining = pMP3->memory.dataSize - pMP3->memory.currentReadPos;
+    bytesRemaining = pMP3->memory.dataSize - pMP3->memory.currentReadPos;
     if (bytesToRead > bytesRemaining) {
         bytesToRead = bytesRemaining;
     }
@@ -2671,26 +2888,27 @@ static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t by
 static drmp3_bool32 drmp3__on_seek_memory(void* pUserData, int byteOffset, drmp3_seek_origin origin)
 {
     drmp3* pMP3 = (drmp3*)pUserData;
+
     drmp3_assert(pMP3 != NULL);
 
     if (origin == drmp3_seek_origin_current) {
         if (byteOffset > 0) {
             if (pMP3->memory.currentReadPos + byteOffset > pMP3->memory.dataSize) {
-                byteOffset = (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos);  // Trying to seek too far forward.
+                byteOffset = (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos);  /* Trying to seek too far forward. */
             }
         } else {
             if (pMP3->memory.currentReadPos < (size_t)-byteOffset) {
-                byteOffset = -(int)pMP3->memory.currentReadPos;  // Trying to seek too far backwards.
+                byteOffset = -(int)pMP3->memory.currentReadPos;  /* Trying to seek too far backwards. */
             }
         }
 
-        // This will never underflow thanks to the clamps above.
+        /* This will never underflow thanks to the clamps above. */
         pMP3->memory.currentReadPos += byteOffset;
     } else {
         if ((drmp3_uint32)byteOffset <= pMP3->memory.dataSize) {
             pMP3->memory.currentReadPos = byteOffset;
         } else {
-            pMP3->memory.currentReadPos = pMP3->memory.dataSize;  // Trying to seek too far forward.
+            pMP3->memory.currentReadPos = pMP3->memory.dataSize;  /* Trying to seek too far forward. */
         }
     }
 
@@ -2765,21 +2983,22 @@ void drmp3_uninit(drmp3* pMP3)
 
 drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut)
 {
+    drmp3_uint64 totalFramesRead = 0;
+
     if (pMP3 == NULL || pMP3->onRead == NULL) {
         return 0;
     }
 
-    drmp3_uint64 totalFramesRead = 0;
-
     if (pBufferOut == NULL) {
         float temp[4096];
         while (framesToRead > 0) {
+            drmp3_uint64 framesJustRead;
             drmp3_uint64 framesToReadRightNow = sizeof(temp)/sizeof(temp[0]) / pMP3->channels;
             if (framesToReadRightNow > framesToRead) {
                 framesToReadRightNow = framesToRead;
             }
 
-            drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp);
+            framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp);
             if (framesJustRead == 0) {
                 break;
             }
@@ -2795,22 +3014,144 @@ drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, f
     return totalFramesRead;
 }
 
-drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3)
+drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut)
 {
-    drmp3_assert(pMP3 != NULL);
-    drmp3_assert(pMP3->onSeek != NULL);
+    float tempF32[4096];
+    drmp3_uint64 pcmFramesJustRead;
+    drmp3_uint64 totalPCMFramesRead = 0;
 
-    // Seek to the start of the stream to begin with.
-    if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) {
-        return DRMP3_FALSE;
+    if (pMP3 == NULL || pMP3->onRead == NULL) {
+        return 0;
     }
 
-    // Clear any cached data.
+    /* Naive implementation: read into a temp f32 buffer, then convert. */
+    for (;;) {
+        drmp3_uint64 pcmFramesToReadThisIteration = (framesToRead - totalPCMFramesRead);
+        if (pcmFramesToReadThisIteration > drmp3_countof(tempF32)/pMP3->channels) {
+            pcmFramesToReadThisIteration = drmp3_countof(tempF32)/pMP3->channels;
+        }
+
+        pcmFramesJustRead = drmp3_read_pcm_frames_f32(pMP3, pcmFramesToReadThisIteration, tempF32);
+        if (pcmFramesJustRead == 0) {
+            break;
+        }
+
+        drmp3dec_f32_to_s16(tempF32, pBufferOut, (int)(pcmFramesJustRead * pMP3->channels));    /* <-- Safe cast since pcmFramesJustRead will be clamped based on the size of tempF32 which is always small. */
+        pBufferOut += pcmFramesJustRead * pMP3->channels;
+
+        totalPCMFramesRead += pcmFramesJustRead;
+
+        if (pcmFramesJustRead < pcmFramesToReadThisIteration) {
+            break;
+        }
+    }
+
+    return totalPCMFramesRead;
+}
+
+void drmp3_reset(drmp3* pMP3)
+{
+    drmp3_assert(pMP3 != NULL);
+
     pMP3->pcmFramesConsumedInMP3Frame = 0;
     pMP3->pcmFramesRemainingInMP3Frame = 0;
     pMP3->currentPCMFrame = 0;
     pMP3->dataSize = 0;
     pMP3->atEnd = DRMP3_FALSE;
+    pMP3->src.bin[0] = 0;
+    pMP3->src.bin[1] = 0;
+    pMP3->src.bin[2] = 0;
+    pMP3->src.bin[3] = 0;
+    pMP3->src.cache.cachedFrameCount = 0;
+    pMP3->src.cache.iNextFrame = 0;
+    pMP3->src.algo.linear.alpha = 0;
+    pMP3->src.algo.linear.isNextFramesLoaded = 0;
+    pMP3->src.algo.linear.isPrevFramesLoaded = 0;
+    drmp3dec_init(&pMP3->decoder);
+}
+
+drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3)
+{
+    drmp3_assert(pMP3 != NULL);
+    drmp3_assert(pMP3->onSeek != NULL);
+
+    /* Seek to the start of the stream to begin with. */
+    if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE;
+    }
+
+    /* Clear any cached data. */
+    drmp3_reset(pMP3);
+    return DRMP3_TRUE;
+}
+
+float drmp3_get_cached_pcm_frame_count_from_src(drmp3* pMP3)
+{
+    return (pMP3->src.cache.cachedFrameCount - pMP3->src.cache.iNextFrame) + (float)pMP3->src.algo.linear.alpha;
+}
+
+float drmp3_get_pcm_frames_remaining_in_mp3_frame(drmp3* pMP3)
+{
+    float factor = (float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn;
+    float frameCountPreSRC = drmp3_get_cached_pcm_frame_count_from_src(pMP3) + pMP3->pcmFramesRemainingInMP3Frame;
+    return frameCountPreSRC * factor;
+}
+
+/*
+NOTE ON SEEKING
+===============
+The seeking code below is a complete mess and is broken for cases when the sample rate changes. The problem
+is with the resampling and the crappy resampler used by dr_mp3. What needs to happen is the following:
+
+1) The resampler needs to be replaced.
+2) The resampler has state which needs to be updated whenever an MP3 frame is decoded outside of
+   drmp3_read_pcm_frames_f32(). The resampler needs an API to "flush" some imaginary input so that it's
+   state is updated accordingly.
+*/
+drmp3_bool32 drmp3_seek_forward_by_pcm_frames__brute_force(drmp3* pMP3, drmp3_uint64 frameOffset)
+{
+    drmp3_uint64 framesRead;
+
+#if 0
+    /*
+    MP3 is a bit annoying when it comes to seeking because of the bit reservoir. It basically means that an MP3 frame can possibly
+    depend on some of the data of prior frames. This means it's not as simple as seeking to the first byte of the MP3 frame that
+    contains the sample because that MP3 frame will need the data from the previous MP3 frame (which we just seeked past!). To
+    resolve this we seek past a number of MP3 frames up to a point, and then read-and-discard the remainder.
+    */
+    drmp3_uint64 maxFramesToReadAndDiscard = (drmp3_uint64)(DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME * 3 * ((float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn));
+
+    /* Now get rid of leading whole frames. */
+    while (frameOffset > maxFramesToReadAndDiscard) {
+        float        pcmFramesRemainingInCurrentMP3FrameF = drmp3_get_pcm_frames_remaining_in_mp3_frame(pMP3);
+        drmp3_uint32 pcmFramesRemainingInCurrentMP3Frame  = (drmp3_uint32)pcmFramesRemainingInCurrentMP3FrameF;
+        if (frameOffset > pcmFramesRemainingInCurrentMP3Frame) {
+            frameOffset                       -= pcmFramesRemainingInCurrentMP3Frame;
+            pMP3->currentPCMFrame             += pcmFramesRemainingInCurrentMP3Frame;
+            pMP3->pcmFramesConsumedInMP3Frame += pMP3->pcmFramesRemainingInMP3Frame;
+            pMP3->pcmFramesRemainingInMP3Frame = 0;
+        } else {
+            break;
+        }
+
+        drmp3_uint32 pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, pMP3->pcmFrames, DRMP3_FALSE);
+        if (pcmFrameCount == 0) {
+            break;
+        }
+    }
+
+    /* The last step is to read-and-discard any remaining PCM frames to make it sample-exact. */
+    framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL);
+    if (framesRead != frameOffset) {
+        return DRMP3_FALSE;
+    }
+#else
+    /* Just using a dumb read-and-discard for now pending updates to the resampler. */
+    framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL);
+    if (framesRead != frameOffset) {
+        return DRMP3_FALSE;
+    }
+#endif
 
     return DRMP3_TRUE;
 }
@@ -2823,107 +3164,208 @@ drmp3_bool32 drmp3_seek_to_pcm_frame__brute_force(drmp3* pMP3, drmp3_uint64 fram
         return DRMP3_TRUE;
     }
 
-    // If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of
-    // the stream and read from the beginning.
-    drmp3_uint64 framesToReadAndDiscard;
-    if (frameIndex >= pMP3->currentPCMFrame) {
-        // Moving foward.
-        framesToReadAndDiscard = frameIndex - pMP3->currentPCMFrame;
-    } else {
-        // Moving backward. Move to the start of the stream and then move forward.
-        framesToReadAndDiscard = frameIndex;
+    /*
+    If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of
+    the stream and read from the beginning.
+    */
+    if (frameIndex < pMP3->currentPCMFrame) {
+        /* Moving backward. Move to the start of the stream and then move forward. */
         if (!drmp3_seek_to_start_of_stream(pMP3)) {
             return DRMP3_FALSE;
         }
     }
 
-    // MP3 is a bit annoying when it comes to seeking because of the bit reservoir. It basically means that an MP3 frame can possibly
-    // depend on some of the data of prior frames. This means it's not as simple as seeking to the first byte of the MP3 frame that
-    // contains the sample because that MP3 frame will need the data from the previous MP3 frame (which we just seeked past!). To
-    // resolve this we seek past a number of MP3 frames up to a point, and then read-and-discard the remainder.
-    drmp3_uint64 maxFramesToReadAndDiscard = DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME * 3;
+    drmp3_assert(frameIndex >= pMP3->currentPCMFrame);
+    return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, (frameIndex - pMP3->currentPCMFrame));
+}
 
-    // First get rid of anything that's still sitting in the buffer.
-    if (framesToReadAndDiscard > maxFramesToReadAndDiscard && framesToReadAndDiscard > pMP3->pcmFramesRemainingInMP3Frame) {
-        framesToReadAndDiscard            -= pMP3->pcmFramesRemainingInMP3Frame;
-        pMP3->currentPCMFrame             += pMP3->pcmFramesRemainingInMP3Frame;
-        pMP3->pcmFramesConsumedInMP3Frame += pMP3->pcmFramesRemainingInMP3Frame;
-        pMP3->pcmFramesRemainingInMP3Frame = 0;
-    }
+drmp3_bool32 drmp3_find_closest_seek_point(drmp3* pMP3, drmp3_uint64 frameIndex, drmp3_uint32* pSeekPointIndex)
+{
+    drmp3_uint32 iSeekPoint;
 
-    // Now get rid of leading whole frames.
-    while (framesToReadAndDiscard > maxFramesToReadAndDiscard) {
-        drmp3_uint32 pcmFramesSeeked = drmp3_seek_next_frame(pMP3);
-        if (pcmFramesSeeked == 0) {
-            break;
-        }
+    drmp3_assert(pSeekPointIndex != NULL);
 
-        framesToReadAndDiscard -= pcmFramesSeeked;
-    }
+    *pSeekPointIndex = 0;
 
-    // The last step is to read-and-discard any remaining PCM frames to make it sample-exact.
-    drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadAndDiscard, NULL);
-    if (framesRead != framesToReadAndDiscard) {
+    if (frameIndex < pMP3->pSeekPoints[0].pcmFrameIndex) {
         return DRMP3_FALSE;
     }
 
+    /* Linear search for simplicity to begin with while I'm getting this thing working. Once it's all working change this to a binary search. */
+    for (iSeekPoint = 0; iSeekPoint < pMP3->seekPointCount; ++iSeekPoint) {
+        if (pMP3->pSeekPoints[iSeekPoint].pcmFrameIndex > frameIndex) {
+            break;  /* Found it. */
+        }
+
+        *pSeekPointIndex = iSeekPoint;
+    }
+
     return DRMP3_TRUE;
 }
 
+drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint64 frameIndex)
+{
+    drmp3_seek_point seekPoint;
+    drmp3_uint32 priorSeekPointIndex;
+    drmp3_uint16 iMP3Frame;
+    drmp3_uint64 leftoverFrames;
+
+    drmp3_assert(pMP3 != NULL);
+    drmp3_assert(pMP3->pSeekPoints != NULL);
+    drmp3_assert(pMP3->seekPointCount > 0);
+
+    /* If there is no prior seekpoint it means the target PCM frame comes before the first seek point. Just assume a seekpoint at the start of the file in this case. */
+    if (drmp3_find_closest_seek_point(pMP3, frameIndex, &priorSeekPointIndex)) {
+        seekPoint = pMP3->pSeekPoints[priorSeekPointIndex];
+    } else {
+        seekPoint.seekPosInBytes     = 0;
+        seekPoint.pcmFrameIndex      = 0;
+        seekPoint.mp3FramesToDiscard = 0;
+        seekPoint.pcmFramesToDiscard = 0;
+    }
+
+    /* First thing to do is seek to the first byte of the relevant MP3 frame. */
+    if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE; /* Failed to seek. */
+    }
+
+    /* Clear any cached data. */
+    drmp3_reset(pMP3);
+
+    /* Whole MP3 frames need to be discarded first. */
+    for (iMP3Frame = 0; iMP3Frame < seekPoint.mp3FramesToDiscard; ++iMP3Frame) {
+        drmp3_uint32 pcmFramesReadPreSRC;
+        drmp3d_sample_t* pPCMFrames;
+
+        /* Pass in non-null for the last frame because we want to ensure the sample rate converter is preloaded correctly. */
+        pPCMFrames = NULL;
+        if (iMP3Frame == seekPoint.mp3FramesToDiscard-1) {
+            pPCMFrames = (drmp3d_sample_t*)pMP3->pcmFrames;
+        }
+
+        /* We first need to decode the next frame, and then we need to flush the resampler. */
+        pcmFramesReadPreSRC = drmp3_decode_next_frame_ex(pMP3, pPCMFrames, DRMP3_TRUE);
+        if (pcmFramesReadPreSRC == 0) {
+            return DRMP3_FALSE;
+        }
+    }
+
+    /* We seeked to an MP3 frame in the raw stream so we need to make sure the current PCM frame is set correctly. */
+    pMP3->currentPCMFrame = seekPoint.pcmFrameIndex - seekPoint.pcmFramesToDiscard;
+
+    /*
+    Update resampler. This is wrong. Need to instead update it on a per MP3 frame basis. Also broken for cases when
+    the sample rate is being reduced in my testing. Should work fine when the input and output sample rate is the same
+    or a clean multiple.
+    */
+    pMP3->src.algo.linear.alpha = (drmp3_int64)pMP3->currentPCMFrame * ((double)pMP3->src.config.sampleRateIn / pMP3->src.config.sampleRateOut); /* <-- Cast to int64 is required for VC6. */
+    pMP3->src.algo.linear.alpha = pMP3->src.algo.linear.alpha - (drmp3_uint32)(pMP3->src.algo.linear.alpha);
+    if (pMP3->src.algo.linear.alpha > 0) {
+        pMP3->src.algo.linear.isPrevFramesLoaded = 1;
+    }
+
+    /*
+    Now at this point we can follow the same process as the brute force technique where we just skip over unnecessary MP3 frames and then
+    read-and-discard at least 2 whole MP3 frames.
+    */
+    leftoverFrames = frameIndex - pMP3->currentPCMFrame;
+    return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, leftoverFrames);
+}
+
 drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex)
 {
     if (pMP3 == NULL || pMP3->onSeek == NULL) {
         return DRMP3_FALSE;
     }
 
-    // We currently only support brute force seeking.
-    return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex);
+    if (frameIndex == 0) {
+        return drmp3_seek_to_start_of_stream(pMP3);
+    }
+
+    /* Use the seek table if we have one. */
+    if (pMP3->pSeekPoints != NULL && pMP3->seekPointCount > 0) {
+        return drmp3_seek_to_pcm_frame__seek_table(pMP3, frameIndex);
+    } else {
+        return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex);
+    }
 }
 
-drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3)
+drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount)
 {
+    drmp3_uint64 currentPCMFrame;
+    drmp3_uint64 totalPCMFrameCount;
+    drmp3_uint64 totalMP3FrameCount;
+    float totalPCMFrameCountFractionalPart;
+
     if (pMP3 == NULL) {
-        return 0;
+        return DRMP3_FALSE;
     }
 
-    // The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based
-    // on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function.
+    /*
+    The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based
+    on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function.
+    */
 
-    // The stream must support seeking for this to work.
+    /* The stream must support seeking for this to work. */
     if (pMP3->onSeek == NULL) {
-        return 0;
+        return DRMP3_FALSE;
     }
 
-    // We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later.
-    drmp3_uint64 currentPCMFrame = pMP3->currentPCMFrame;
+    /* We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. */
+    currentPCMFrame = pMP3->currentPCMFrame;
     
     if (!drmp3_seek_to_start_of_stream(pMP3)) {
-        return 0;
+        return DRMP3_FALSE;
     }
 
-    drmp3_uint64 totalPCMFrameCount = 0;
-    float totalPCMFrameCountFractionalPart = 0; // <-- With resampling there will be a fractional part to each MP3 frame that we need to accumulate.
+    totalPCMFrameCount = 0;
+    totalMP3FrameCount = 0;
+
+    totalPCMFrameCountFractionalPart = 0; /* <-- With resampling there will be a fractional part to each MP3 frame that we need to accumulate. */
     for (;;) {
-        drmp3_uint32 pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);   // <-- Passing in NULL here will prevent decoding of the MP3 frame which should save time.
+        drmp3_uint32 pcmFramesInCurrentMP3FrameIn;
+        float srcRatio;
+        float pcmFramesInCurrentMP3FrameOutF;
+        drmp3_uint32 pcmFramesInCurrentMP3FrameOut;
+
+        pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE);
         if (pcmFramesInCurrentMP3FrameIn == 0) {
             break;
         }
 
-        float srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate;
+        srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate;
         drmp3_assert(srcRatio > 0);
 
-        float        pcmFramesInCurrentMP3FrameOutF = totalPCMFrameCountFractionalPart + (pcmFramesInCurrentMP3FrameIn / srcRatio);
-        drmp3_uint32 pcmFramesInCurrentMP3FrameOut  = (drmp3_uint32)pcmFramesInCurrentMP3FrameOutF;
+        pcmFramesInCurrentMP3FrameOutF = totalPCMFrameCountFractionalPart + (pcmFramesInCurrentMP3FrameIn / srcRatio);
+        pcmFramesInCurrentMP3FrameOut  = (drmp3_uint32)pcmFramesInCurrentMP3FrameOutF;
         totalPCMFrameCountFractionalPart = pcmFramesInCurrentMP3FrameOutF - pcmFramesInCurrentMP3FrameOut;
         totalPCMFrameCount += pcmFramesInCurrentMP3FrameOut;
+        totalMP3FrameCount += 1;
     }
 
-    // Finally, we need to seek back to where we were.
+    /* Finally, we need to seek back to where we were. */
     if (!drmp3_seek_to_start_of_stream(pMP3)) {
-        return 0;
+        return DRMP3_FALSE;
     }
 
     if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) {
+        return DRMP3_FALSE;
+    }
+
+    if (pMP3FrameCount != NULL) {
+        *pMP3FrameCount = totalMP3FrameCount;
+    }
+    if (pPCMFrameCount != NULL) {
+        *pPCMFrameCount = totalPCMFrameCount;
+    }
+
+    return DRMP3_TRUE;
+}
+
+drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3)
+{
+    drmp3_uint64 totalPCMFrameCount;
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) {
         return 0;
     }
 
@@ -2932,56 +3374,204 @@ drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3)
 
 drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3)
 {
-    if (pMP3 == NULL) {
-        return 0;
-    }
-
-    // This works the same way as drmp3_get_pcm_frame_count() - move to the start, count MP3 frames, move back to the previous position.
-
-    // The stream must support seeking for this to work.
-    if (pMP3->onSeek == NULL) {
-        return 0;
-    }
-
-    // We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later.
-    drmp3_uint64 currentPCMFrame = pMP3->currentPCMFrame;
-    
-    if (!drmp3_seek_to_start_of_stream(pMP3)) {
-        return 0;
-    }
-
-    drmp3_uint64 totalMP3FrameCount = 0;
-    for (;;) {
-        drmp3_uint32 pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);
-        if (pcmFramesInCurrentMP3FrameIn == 0) {
-            break;
-        }
-
-        totalMP3FrameCount += 1;
-    }
-
-    // Finally, we need to seek back to where we were.
-    if (!drmp3_seek_to_start_of_stream(pMP3)) {
-        return 0;
-    }
-
-    if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) {
+    drmp3_uint64 totalMP3FrameCount;
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, NULL)) {
         return 0;
     }
 
     return totalMP3FrameCount;
 }
 
+void drmp3__accumulate_running_pcm_frame_count(drmp3* pMP3, drmp3_uint32 pcmFrameCountIn, drmp3_uint64* pRunningPCMFrameCount, float* pRunningPCMFrameCountFractionalPart)
+{
+    float srcRatio;
+    float pcmFrameCountOutF;
+    drmp3_uint32 pcmFrameCountOut;
+
+    srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate;
+    drmp3_assert(srcRatio > 0);
+
+    pcmFrameCountOutF = *pRunningPCMFrameCountFractionalPart + (pcmFrameCountIn / srcRatio);
+    pcmFrameCountOut  = (drmp3_uint32)pcmFrameCountOutF;
+    *pRunningPCMFrameCountFractionalPart = pcmFrameCountOutF - pcmFrameCountOut;
+    *pRunningPCMFrameCount += pcmFrameCountOut;
+}
+
+typedef struct
+{
+    drmp3_uint64 bytePos;
+    drmp3_uint64 pcmFrameIndex; /* <-- After sample rate conversion. */
+} drmp3__seeking_mp3_frame_info;
+
+drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints)
+{
+    drmp3_uint32 seekPointCount;
+    drmp3_uint64 currentPCMFrame;
+    drmp3_uint64 totalMP3FrameCount;
+    drmp3_uint64 totalPCMFrameCount;
+
+    if (pMP3 == NULL || pSeekPointCount == NULL || pSeekPoints == NULL) {
+        return DRMP3_FALSE; /* Invalid args. */
+    }
+
+    seekPointCount = *pSeekPointCount;
+    if (seekPointCount == 0) {
+        return DRMP3_FALSE;  /* The client has requested no seek points. Consider this to be invalid arguments since the client has probably not intended this. */
+    }
+
+    /* We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. */
+    currentPCMFrame = pMP3->currentPCMFrame;
+    
+    /* We never do more than the total number of MP3 frames and we limit it to 32-bits. */
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) {
+        return DRMP3_FALSE;
+    }
+
+    /* If there's less than DRMP3_SEEK_LEADING_MP3_FRAMES+1 frames we just report 1 seek point which will be the very start of the stream. */
+    if (totalMP3FrameCount < DRMP3_SEEK_LEADING_MP3_FRAMES+1) {
+        seekPointCount = 1;
+        pSeekPoints[0].seekPosInBytes     = 0;
+        pSeekPoints[0].pcmFrameIndex      = 0;
+        pSeekPoints[0].mp3FramesToDiscard = 0;
+        pSeekPoints[0].pcmFramesToDiscard = 0;
+    } else {
+        drmp3_uint64 pcmFramesBetweenSeekPoints;
+        drmp3__seeking_mp3_frame_info mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES+1];
+        drmp3_uint64 runningPCMFrameCount = 0;
+        float runningPCMFrameCountFractionalPart = 0;
+        drmp3_uint64 nextTargetPCMFrame;
+        drmp3_uint32 iMP3Frame;
+        drmp3_uint32 iSeekPoint;
+
+        if (seekPointCount > totalMP3FrameCount-1) {
+            seekPointCount = (drmp3_uint32)totalMP3FrameCount-1;
+        }
+
+        pcmFramesBetweenSeekPoints = totalPCMFrameCount / (seekPointCount+1);
+
+        /*
+        Here is where we actually calculate the seek points. We need to start by moving the start of the stream. We then enumerate over each
+        MP3 frame.
+        */
+        if (!drmp3_seek_to_start_of_stream(pMP3)) {
+            return DRMP3_FALSE;
+        }
+
+        /*
+        We need to cache the byte positions of the previous MP3 frames. As a new MP3 frame is iterated, we cycle the byte positions in this
+        array. The value in the first item in this array is the byte position that will be reported in the next seek point.
+        */
+
+        /* We need to initialize the array of MP3 byte positions for the leading MP3 frames. */
+        for (iMP3Frame = 0; iMP3Frame < DRMP3_SEEK_LEADING_MP3_FRAMES+1; ++iMP3Frame) {
+            drmp3_uint32 pcmFramesInCurrentMP3FrameIn;
+
+            /* The byte position of the next frame will be the stream's cursor position, minus whatever is sitting in the buffer. */
+            drmp3_assert(pMP3->streamCursor >= pMP3->dataSize);
+            mp3FrameInfo[iMP3Frame].bytePos       = pMP3->streamCursor - pMP3->dataSize;
+            mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount;
+
+            /* We need to get information about this frame so we can know how many samples it contained. */
+            pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE);
+            if (pcmFramesInCurrentMP3FrameIn == 0) {
+                return DRMP3_FALSE; /* This should never happen. */
+            }
+
+            drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart);
+        }
+
+        /*
+        At this point we will have extracted the byte positions of the leading MP3 frames. We can now start iterating over each seek point and
+        calculate them.
+        */
+        nextTargetPCMFrame = 0;
+        for (iSeekPoint = 0; iSeekPoint < seekPointCount; ++iSeekPoint) {
+            nextTargetPCMFrame += pcmFramesBetweenSeekPoints;
+
+            for (;;) {
+                if (nextTargetPCMFrame < runningPCMFrameCount) {
+                    /* The next seek point is in the current MP3 frame. */
+                    pSeekPoints[iSeekPoint].seekPosInBytes     = mp3FrameInfo[0].bytePos;
+                    pSeekPoints[iSeekPoint].pcmFrameIndex      = nextTargetPCMFrame;
+                    pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES;
+                    pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex);
+                    break;
+                } else {
+                    size_t i;
+                    drmp3_uint32 pcmFramesInCurrentMP3FrameIn;
+
+                    /*
+                    The next seek point is not in the current MP3 frame, so continue on to the next one. The first thing to do is cycle the cached
+                    MP3 frame info.
+                    */
+                    for (i = 0; i < drmp3_countof(mp3FrameInfo)-1; ++i) {
+                        mp3FrameInfo[i] = mp3FrameInfo[i+1];
+                    }
+
+                    /* Cache previous MP3 frame info. */
+                    mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].bytePos       = pMP3->streamCursor - pMP3->dataSize;
+                    mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].pcmFrameIndex = runningPCMFrameCount;
+
+                    /*
+                    Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it
+                    should only ever do it for the last seek point.
+                    */
+                    pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_TRUE);
+                    if (pcmFramesInCurrentMP3FrameIn == 0) {
+                        pSeekPoints[iSeekPoint].seekPosInBytes     = mp3FrameInfo[0].bytePos;
+                        pSeekPoints[iSeekPoint].pcmFrameIndex      = nextTargetPCMFrame;
+                        pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES;
+                        pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex);
+                        break;
+                    }
+
+                    drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart);
+                }
+            }
+        }
+
+        /* Finally, we need to seek back to where we were. */
+        if (!drmp3_seek_to_start_of_stream(pMP3)) {
+            return DRMP3_FALSE;
+        }
+        if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) {
+            return DRMP3_FALSE;
+        }
+    }
+
+    *pSeekPointCount = seekPointCount;
+    return DRMP3_TRUE;
+}
+
+drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints)
+{
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    if (seekPointCount == 0 || pSeekPoints == NULL) {
+        /* Unbinding. */
+        pMP3->seekPointCount = 0;
+        pMP3->pSeekPoints = NULL;
+    } else {
+        /* Binding. */
+        pMP3->seekPointCount = seekPointCount;
+        pMP3->pSeekPoints = pSeekPoints;
+    }
+
+    return DRMP3_TRUE;
+}
+
 
 float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
 {
-    drmp3_assert(pMP3 != NULL);
-
     drmp3_uint64 totalFramesRead = 0;
     drmp3_uint64 framesCapacity = 0;
     float* pFrames = NULL;
-
     float temp[4096];
+
+    drmp3_assert(pMP3 != NULL);
+
     for (;;) {
         drmp3_uint64 framesToReadRightNow = drmp3_countof(temp) / pMP3->channels;
         drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp);
@@ -2989,19 +3579,22 @@ float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_
             break;
         }
 
-        // Reallocate the output buffer if there's not enough room.
+        /* Reallocate the output buffer if there's not enough room. */
         if (framesCapacity < totalFramesRead + framesJustRead) {
+            drmp3_uint64 newFramesBufferSize;
+            float* pNewFrames;
+
             framesCapacity *= 2;
             if (framesCapacity < totalFramesRead + framesJustRead) {
                 framesCapacity = totalFramesRead + framesJustRead;
             }
 
-            drmp3_uint64 newFramesBufferSize = framesCapacity*pMP3->channels*sizeof(float);
+            newFramesBufferSize = framesCapacity*pMP3->channels*sizeof(float);
             if (newFramesBufferSize > DRMP3_SIZE_MAX) {
                 break;
             }
 
-            float* pNewFrames = (float*)drmp3_realloc(pFrames, (size_t)newFramesBufferSize);
+            pNewFrames = (float*)drmp3_realloc(pFrames, (size_t)newFramesBufferSize);
             if (pNewFrames == NULL) {
                 drmp3_free(pFrames);
                 break;
@@ -3013,7 +3606,7 @@ float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_
         drmp3_copy_memory(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(float)));
         totalFramesRead += framesJustRead;
 
-        // If the number of frames we asked for is less that what we actually read it means we've reached the end.
+        /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */
         if (framesJustRead != framesToReadRightNow) {
             break;
         }
@@ -3026,10 +3619,77 @@ float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_
 
     drmp3_uninit(pMP3);
 
-    if (pTotalFrameCount) *pTotalFrameCount = totalFramesRead;
+    if (pTotalFrameCount) {
+        *pTotalFrameCount = totalFramesRead;
+    }
+
     return pFrames;
 }
 
+drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3_uint64 totalFramesRead = 0;
+    drmp3_uint64 framesCapacity = 0;
+    drmp3_int16* pFrames = NULL;
+    drmp3_int16 temp[4096];
+
+    drmp3_assert(pMP3 != NULL);
+
+    for (;;) {
+        drmp3_uint64 framesToReadRightNow = drmp3_countof(temp) / pMP3->channels;
+        drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_s16(pMP3, framesToReadRightNow, temp);
+        if (framesJustRead == 0) {
+            break;
+        }
+
+        /* Reallocate the output buffer if there's not enough room. */
+        if (framesCapacity < totalFramesRead + framesJustRead) {
+            drmp3_uint64 newFramesBufferSize;
+            drmp3_int16* pNewFrames;
+
+            framesCapacity *= 2;
+            if (framesCapacity < totalFramesRead + framesJustRead) {
+                framesCapacity = totalFramesRead + framesJustRead;
+            }
+
+            newFramesBufferSize = framesCapacity*pMP3->channels*sizeof(drmp3_int16);
+            if (newFramesBufferSize > DRMP3_SIZE_MAX) {
+                break;
+            }
+
+            pNewFrames = (drmp3_int16*)drmp3_realloc(pFrames, (size_t)newFramesBufferSize);
+            if (pNewFrames == NULL) {
+                drmp3_free(pFrames);
+                break;
+            }
+
+            pFrames = pNewFrames;
+        }
+
+        drmp3_copy_memory(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(drmp3_int16)));
+        totalFramesRead += framesJustRead;
+
+        /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */
+        if (framesJustRead != framesToReadRightNow) {
+            break;
+        }
+    }
+
+    if (pConfig != NULL) {
+        pConfig->outputChannels = pMP3->channels;
+        pConfig->outputSampleRate = pMP3->sampleRate;
+    }
+
+    drmp3_uninit(pMP3);
+
+    if (pTotalFrameCount) {
+        *pTotalFrameCount = totalFramesRead;
+    }
+
+    return pFrames;
+}
+
+
 float* drmp3_open_and_read_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
 {
     drmp3 mp3;
@@ -3040,6 +3700,17 @@ float* drmp3_open_and_read_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, v
     return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
 }
 
+drmp3_int16* drmp3_open_and_read_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3 mp3;
+    if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pConfig)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
+
+
 float* drmp3_open_memory_and_read_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
 {
     drmp3 mp3;
@@ -3050,6 +3721,17 @@ float* drmp3_open_memory_and_read_f32(const void* pData, size_t dataSize, drmp3_
     return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
 }
 
+drmp3_int16* drmp3_open_memory_and_read_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3 mp3;
+    if (!drmp3_init_memory(&mp3, pData, dataSize, pConfig)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
+
+
 #ifndef DR_MP3_NO_STDIO
 float* drmp3_open_file_and_read_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
 {
@@ -3060,6 +3742,16 @@ float* drmp3_open_file_and_read_f32(const char* filePath, drmp3_config* pConfig,
 
     return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
 }
+
+drmp3_int16* drmp3_open_file_and_read_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3 mp3;
+    if (!drmp3_init_file(&mp3, filePath, pConfig)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
 #endif
 
 void drmp3_free(void* p)
@@ -3069,127 +3761,172 @@ void drmp3_free(void* p)
 
 #endif /*DR_MP3_IMPLEMENTATION*/
 
-
-// DIFFERENCES BETWEEN minimp3 AND dr_mp3
-// ======================================
-// - First, keep in mind that minimp3 (https://github.com/lieff/minimp3) is where all the real work was done. All of the
-//   code relating to the actual decoding remains mostly unmodified, apart from some namespacing changes.
-// - dr_mp3 adds a pulling style API which allows you to deliver raw data via callbacks. So, rather than pushing data
-//   to the decoder, the decoder _pulls_ data from your callbacks.
-// - In addition to callbacks, a decoder can be initialized from a block of memory and a file.
-// - The dr_mp3 pull API reads PCM frames rather than whole MP3 frames.
-// - dr_mp3 adds convenience APIs for opening and decoding entire files in one go.
-// - dr_mp3 is fully namespaced, including the implementation section, which is more suitable when compiling projects
-//   as a single translation unit (aka unity builds). At the time of writing this, a unity build is not possible when
-//   using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this.
-
-
-// REVISION HISTORY
-// ================
-//
-// v0.4.0 - 2018-xx-xx
-//   - API CHANGE: Rename some APIs:
-//     - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32
-//     - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame
-//     - drmp3_open_and_decode_f32 -> drmp3_open_and_read_f32
-//     - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_f32
-//     - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_f32
-//   - Add drmp3_get_pcm_frame_count().
-//   - Add drmp3_get_mp3_frame_count().
-//   - Improve seeking performance.
-//
-// v0.3.2 - 2018-09-11
-//   - Fix a couple of memory leaks.
-//   - Bring up to date with minimp3.
-//
-// v0.3.1 - 2018-08-25
-//   - Fix C++ build.
-//
-// v0.3.0 - 2018-08-25
-//   - Bring up to date with minimp3. This has a minor API change: the "pcm" parameter of drmp3dec_decode_frame() has
-//     been changed from short* to void* because it can now output both s16 and f32 samples, depending on whether or
-//     not the DR_MP3_FLOAT_OUTPUT option is set.
-//
-// v0.2.11 - 2018-08-08
-//   - Fix a bug where the last part of a file is not read.
-//
-// v0.2.10 - 2018-08-07
-//   - Improve 64-bit detection.
-//
-// v0.2.9 - 2018-08-05
-//   - Fix C++ build on older versions of GCC.
-//   - Bring up to date with minimp3.
-//
-// v0.2.8 - 2018-08-02
-//   - Fix compilation errors with older versions of GCC.
-//
-// v0.2.7 - 2018-07-13
-//   - Bring up to date with minimp3.
-//
-// v0.2.6 - 2018-07-12
-//   - Bring up to date with minimp3.
-//
-// v0.2.5 - 2018-06-22
-//   - Bring up to date with minimp3.
-//
-// v0.2.4 - 2018-05-12
-//   - Bring up to date with minimp3.
-//
-// v0.2.3 - 2018-04-29
-//   - Fix TCC build.
-//
-// v0.2.2 - 2018-04-28
-//   - Fix bug when opening a decoder from memory.
-//
-// v0.2.1 - 2018-04-27
-//   - Efficiency improvements when the decoder reaches the end of the stream.
-//
-// v0.2 - 2018-04-21
-//   - Bring up to date with minimp3.
-//   - Start using major.minor.revision versioning.
-//
-// v0.1d - 2018-03-30
-//   - Bring up to date with minimp3.
-//
-// v0.1c - 2018-03-11
-//   - Fix C++ build error.
-//
-// v0.1b - 2018-03-07
-//   - Bring up to date with minimp3.
-//
-// v0.1a - 2018-02-28
-//   - Fix compilation error on GCC/Clang.
-//   - Fix some warnings.
-//
-// v0.1 - 2018-02-xx
-//   - Initial versioned release.
-
+/*
+DIFFERENCES BETWEEN minimp3 AND dr_mp3
+======================================
+- First, keep in mind that minimp3 (https://github.com/lieff/minimp3) is where all the real work was done. All of the
+  code relating to the actual decoding remains mostly unmodified, apart from some namespacing changes.
+- dr_mp3 adds a pulling style API which allows you to deliver raw data via callbacks. So, rather than pushing data
+  to the decoder, the decoder _pulls_ data from your callbacks.
+- In addition to callbacks, a decoder can be initialized from a block of memory and a file.
+- The dr_mp3 pull API reads PCM frames rather than whole MP3 frames.
+- dr_mp3 adds convenience APIs for opening and decoding entire files in one go.
+- dr_mp3 is fully namespaced, including the implementation section, which is more suitable when compiling projects
+  as a single translation unit (aka unity builds). At the time of writing this, a unity build is not possible when
+  using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this.
+*/
 
 /*
+REVISION HISTORY
+================
+v0.4.4 - 2019-05-06
+  - Fixes to the VC6 build.
+
+v0.4.3 - 2019-05-05
+  - Use the channel count and/or sample rate of the first MP3 frame instead of DR_MP3_DEFAULT_CHANNELS and
+    DR_MP3_DEFAULT_SAMPLE_RATE when they are set to 0. To use the old behaviour, just set the relevant property to
+    DR_MP3_DEFAULT_CHANNELS or DR_MP3_DEFAULT_SAMPLE_RATE.
+  - Add s16 reading APIs
+    - drmp3_read_pcm_frames_s16
+    - drmp3_open_memory_and_read_s16
+    - drmp3_open_and_read_s16
+    - drmp3_open_file_and_read_s16
+  - Add drmp3_get_mp3_and_pcm_frame_count() to the public header section.
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.4.2 - 2019-02-21
+  - Fix a warning.
+
+v0.4.1 - 2018-12-30
+  - Fix a warning.
+
+v0.4.0 - 2018-12-16
+  - API CHANGE: Rename some APIs:
+    - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32
+    - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame
+    - drmp3_open_and_decode_f32 -> drmp3_open_and_read_f32
+    - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_f32
+    - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_f32
+  - Add drmp3_get_pcm_frame_count().
+  - Add drmp3_get_mp3_frame_count().
+  - Improve seeking performance.
+
+v0.3.2 - 2018-09-11
+  - Fix a couple of memory leaks.
+  - Bring up to date with minimp3.
+
+v0.3.1 - 2018-08-25
+  - Fix C++ build.
+
+v0.3.0 - 2018-08-25
+  - Bring up to date with minimp3. This has a minor API change: the "pcm" parameter of drmp3dec_decode_frame() has
+    been changed from short* to void* because it can now output both s16 and f32 samples, depending on whether or
+    not the DR_MP3_FLOAT_OUTPUT option is set.
+
+v0.2.11 - 2018-08-08
+  - Fix a bug where the last part of a file is not read.
+
+v0.2.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.2.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+  - Bring up to date with minimp3.
+
+v0.2.8 - 2018-08-02
+  - Fix compilation errors with older versions of GCC.
+
+v0.2.7 - 2018-07-13
+  - Bring up to date with minimp3.
+
+v0.2.6 - 2018-07-12
+  - Bring up to date with minimp3.
+
+v0.2.5 - 2018-06-22
+  - Bring up to date with minimp3.
+
+v0.2.4 - 2018-05-12
+  - Bring up to date with minimp3.
+
+v0.2.3 - 2018-04-29
+  - Fix TCC build.
+
+v0.2.2 - 2018-04-28
+  - Fix bug when opening a decoder from memory.
+
+v0.2.1 - 2018-04-27
+  - Efficiency improvements when the decoder reaches the end of the stream.
+
+v0.2 - 2018-04-21
+  - Bring up to date with minimp3.
+  - Start using major.minor.revision versioning.
+
+v0.1d - 2018-03-30
+  - Bring up to date with minimp3.
+
+v0.1c - 2018-03-11
+  - Fix C++ build error.
+
+v0.1b - 2018-03-07
+  - Bring up to date with minimp3.
+
+v0.1a - 2018-02-28
+  - Fix compilation error on GCC/Clang.
+  - Fix some warnings.
+
+v0.1 - 2018-02-xx
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
 This is free and unencumbered software released into the public domain.
 
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
 
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2018 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 */
 
 /*
diff --git a/src/external/dr_wav.h b/src/external/dr_wav.h
index b071d00e..b2395bfb 100644
--- a/src/external/dr_wav.h
+++ b/src/external/dr_wav.h
@@ -1,115 +1,154 @@
-// WAV audio loader and writer. Public domain. See "unlicense" statement at the end of this file.
-// dr_wav - v0.8.1 - 2018-06-29
-//
-// David Reid - mackron@gmail.com
+/*
+WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_wav - v0.9.1 - 2019-05-05
 
-// USAGE
-//
-// This is a single-file library. To use it, do something like the following in one .c file.
-//     #define DR_WAV_IMPLEMENTATION
-//     #include "dr_wav.h"
-//
-// You can then #include this file in other parts of the program as you would with any other header file. Do something
-// like the following to read audio data:
-//
-//     drwav wav;
-//     if (!drwav_init_file(&wav, "my_song.wav")) {
-//         // Error opening WAV file.
-//     }
-//
-//     drwav_int32* pDecodedInterleavedSamples = malloc(wav.totalSampleCount * sizeof(drwav_int32));
-//     size_t numberOfSamplesActuallyDecoded = drwav_read_s32(&wav, wav.totalSampleCount, pDecodedInterleavedSamples);
-//
-//     ...
-//
-//     drwav_uninit(&wav);
-//
-// You can also use drwav_open() to allocate and initialize the loader for you:
-//
-//     drwav* pWav = drwav_open_file("my_song.wav");
-//     if (pWav == NULL) {
-//         // Error opening WAV file.
-//     }
-//
-//     ...
-//
-//     drwav_close(pWav);
-//
-// If you just want to quickly open and read the audio data in a single operation you can do something like this:
-//
-//     unsigned int channels;
-//     unsigned int sampleRate;
-//     drwav_uint64 totalSampleCount;
-//     float* pSampleData = drwav_open_and_read_file_s32("my_song.wav", &channels, &sampleRate, &totalSampleCount);
-//     if (pSampleData == NULL) {
-//         // Error opening and reading WAV file.
-//     }
-//
-//     ...
-//
-//     drwav_free(pSampleData);
-//
-// The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in
-// this case), but you can still output the audio data in its internal format (see notes below for supported formats):
-//
-//     size_t samplesRead = drwav_read(&wav, wav.totalSampleCount, pDecodedInterleavedSamples);
-//
-// You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for
-// a particular data format:
-//
-//     size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
-//
-//
-// dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work
-// without any manual intervention.
-//
-//
-// dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at
-// drwav_open_write(), drwav_open_file_write(), etc. Use drwav_write() to write samples, or drwav_write_raw() to write
-// raw data in the "data" chunk.
-//
-//     drwav_data_format format;
-//     format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
-//     format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
-//     format.channels = 2;
-//     format.sampleRate = 44100;
-//     format.bitsPerSample = 16;
-//     drwav* pWav = drwav_open_file_write("data/recording.wav", &format);
-//
-//     ...
-//
-//     drwav_uint64 samplesWritten = drwav_write(pWav, sampleCount, pSamples);
-//
-//
-//
-// OPTIONS
-// #define these options before including this file.
-//
-// #define DR_WAV_NO_CONVERSION_API
-//   Disables conversion APIs such as drwav_read_f32() and drwav_s16_to_f32().
-//
-// #define DR_WAV_NO_STDIO
-//   Disables drwav_open_file(), drwav_open_file_write(), etc.
-//
-//
-//
-// QUICK NOTES
-// - Samples are always interleaved.
-// - The default read function does not do any data conversion. Use drwav_read_f32() to read and convert audio data
-//   to IEEE 32-bit floating point samples, drwav_read_s32() to read samples as signed 32-bit PCM and drwav_read_s16()
-//   to read samples as signed 16-bit PCM. Tested and supported internal formats include the following:
-//   - Unsigned 8-bit PCM
-//   - Signed 12-bit PCM
-//   - Signed 16-bit PCM
-//   - Signed 24-bit PCM
-//   - Signed 32-bit PCM
-//   - IEEE 32-bit floating point
-//   - IEEE 64-bit floating point
-//   - A-law and u-law
-//   - Microsoft ADPCM
-//   - IMA ADPCM (DVI, format code 0x11)
-// - dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+David Reid - mackron@gmail.com
+*/
 
+/*
+DEPRECATED APIS
+===============
+Version 0.9.0 deprecated the per-sample reading and seeking APIs and replaced them with versions that work on the resolution
+of a PCM frame instead. For example, given a stereo WAV file, previously you would pass 2 to drwav_read_f32() to read one
+PCM frame, whereas now you would pass in 1 to drwav_read_pcm_frames_f32(). The old APIs would return the number of samples
+read, whereas now it will return the number of PCM frames. Below is a list of APIs that have been deprecated and their
+replacements.
+
+    drwav_read()                     -> drwav_read_pcm_frames()
+    drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
+    drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
+    drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
+    drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
+    drwav_write()                    -> drwav_write_pcm_frames()
+    drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
+    drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
+    drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
+    drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
+    drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
+    drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
+    drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
+    drwav::totalSampleCount          -> drwav::totalPCMFrameCount
+
+Rationale:
+    1) Most programs will want to read in multiples of the channel count which demands a per-frame reading API. Per-sample
+       reading just adds complexity and maintenance costs for no practical benefit.
+    2) This is consistent with my other decoders - dr_flac and dr_mp3.
+
+These APIs will be removed completely in version 0.10.0. You can continue to use drwav_read_raw() if you need per-sample
+reading.
+*/
+
+/*
+USAGE
+=====
+This is a single-file library. To use it, do something like the following in one .c file.
+    #define DR_WAV_IMPLEMENTATION
+    #include "dr_wav.h"
+
+You can then #include this file in other parts of the program as you would with any other header file. Do something
+like the following to read audio data:
+
+    drwav wav;
+    if (!drwav_init_file(&wav, "my_song.wav")) {
+        // Error opening WAV file.
+    }
+
+    drwav_int32* pDecodedInterleavedSamples = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
+    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedSamples);
+
+    ...
+
+    drwav_uninit(&wav);
+
+You can also use drwav_open() to allocate and initialize the loader for you:
+
+    drwav* pWav = drwav_open_file("my_song.wav");
+    if (pWav == NULL) {
+        // Error opening WAV file.
+    }
+
+    ...
+
+    drwav_close(pWav);
+
+If you just want to quickly open and read the audio data in a single operation you can do something like this:
+
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalPCMFrameCount;
+    float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount);
+    if (pSampleData == NULL) {
+        // Error opening and reading WAV file.
+    }
+
+    ...
+
+    drwav_free(pSampleData);
+
+The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in
+this case), but you can still output the audio data in its internal format (see notes below for supported formats):
+
+    size_t samplesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedSamples);
+
+You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for
+a particular data format:
+
+    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
+
+
+dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at
+drwav_open_write(), drwav_open_file_write(), etc. Use drwav_write_pcm_frames() to write samples, or drwav_write_raw()
+to write raw data in the "data" chunk.
+
+    drwav_data_format format;
+    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
+    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
+    format.channels = 2;
+    format.sampleRate = 44100;
+    format.bitsPerSample = 16;
+    drwav* pWav = drwav_open_file_write("data/recording.wav", &format);
+
+    ...
+
+    drwav_uint64 samplesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
+
+
+dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work
+without any manual intervention.
+
+
+OPTIONS
+=======
+#define these options before including this file.
+
+#define DR_WAV_NO_CONVERSION_API
+  Disables conversion APIs such as drwav_read_pcm_frames_f32() and drwav_s16_to_f32().
+
+#define DR_WAV_NO_STDIO
+  Disables drwav_open_file(), drwav_open_file_write(), etc.
+
+
+
+QUICK NOTES
+===========
+- Samples are always interleaved.
+- The default read function does not do any data conversion. Use drwav_read_pcm_frames_f32(), drwav_read_pcm_frames_s32()
+  and drwav_read_pcm_frames_s16() to read and convert audio data to 32-bit floating point, signed 32-bit integer and
+  signed 16-bit integer samples respectively. Tested and supported internal formats include the following:
+  - Unsigned 8-bit PCM
+  - Signed 12-bit PCM
+  - Signed 16-bit PCM
+  - Signed 24-bit PCM
+  - Signed 32-bit PCM
+  - IEEE 32-bit floating point
+  - IEEE 64-bit floating point
+  - A-law and u-law
+  - Microsoft ADPCM
+  - IMA ADPCM (DVI, format code 0x11)
+- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+*/
 
 #ifndef dr_wav_h
 #define dr_wav_h
@@ -145,7 +184,7 @@ typedef drwav_uint32     drwav_bool32;
 extern "C" {
 #endif
 
-// Common data formats.
+/* Common data formats. */
 #define DR_WAVE_FORMAT_PCM          0x1
 #define DR_WAVE_FORMAT_ADPCM        0x2
 #define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
@@ -154,6 +193,14 @@ extern "C" {
 #define DR_WAVE_FORMAT_DVI_ADPCM    0x11
 #define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
 
+/* Constants. */
+#ifndef DRWAV_MAX_SMPL_LOOPS
+#define DRWAV_MAX_SMPL_LOOPS        1
+#endif
+
+/* Flags to pass into drwav_init_ex(), etc. */
+#define DRWAV_SEQUENTIAL            0x00000001
+
 typedef enum
 {
     drwav_seek_origin_start,
@@ -166,42 +213,84 @@ typedef enum
     drwav_container_w64
 } drwav_container;
 
-// Callback for when data is read. Return value is the number of bytes actually read.
-//
-// pUserData   [in]  The user data that was passed to drwav_init(), drwav_open() and family.
-// pBufferOut  [out] The output buffer.
-// bytesToRead [in]  The number of bytes to read.
-//
-// Returns the number of bytes actually read.
-//
-// A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
-// either the entire bytesToRead is filled or you have reached the end of the stream.
+typedef struct
+{
+    union
+    {
+        drwav_uint8 fourcc[4];
+        drwav_uint8 guid[16];
+    } id;
+
+    /* The size in bytes of the chunk. */
+    drwav_uint64 sizeInBytes;
+
+    /*
+    RIFF = 2 byte alignment.
+    W64  = 8 byte alignment.
+    */
+    unsigned int paddingSize;
+} drwav_chunk_header;
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drwav_init(), drwav_open() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
 typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
 
-// Callback for when data is written. Returns value is the number of bytes actually written.
-//
-// pUserData    [in]  The user data that was passed to drwav_init_write(), drwav_open_write() and family.
-// pData        [out] A pointer to the data to write.
-// bytesToWrite [in]  The number of bytes to write.
-//
-// Returns the number of bytes actually written.
-//
-// If the return value differs from bytesToWrite, it indicates an error.
+/*
+Callback for when data is written. Returns value is the number of bytes actually written.
+
+pUserData    [in]  The user data that was passed to drwav_init_write(), drwav_open_write() and family.
+pData        [out] A pointer to the data to write.
+bytesToWrite [in]  The number of bytes to write.
+
+Returns the number of bytes actually written.
+
+If the return value differs from bytesToWrite, it indicates an error.
+*/
 typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
 
-// Callback for when data needs to be seeked.
-//
-// pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family.
-// offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-// origin    [in] The origin of the seek - the current position or the start of the stream.
-//
-// Returns whether or not the seek was successful.
-//
-// Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
-// will be either drwav_seek_origin_start or drwav_seek_origin_current.
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
+will be either drwav_seek_origin_start or drwav_seek_origin_current.
+*/
 typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
 
-// Structure for internal use. Only used for loaders opened with drwav_open_memory().
+/*
+Callback for when drwav_init_ex/drwav_open_ex finds a chunk.
+
+pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex(), drwav_open_ex() and family.
+onRead            [in] A pointer to the function to call when reading.
+onSeek            [in] A pointer to the function to call when seeking.
+pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex(), drwav_open_ex() and family.
+pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
+
+Returns the number of bytes read + seeked.
+
+To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same
+for seeking with onSeek(). The return value must be the total number of bytes you have read _plus_ seeked.
+
+You must not attempt to read beyond the boundary of the chunk.
+*/
+typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader);
+
+/* Structure for internal use. Only used for loaders opened with drwav_open_memory(). */
 typedef struct
 {
     const drwav_uint8* data;
@@ -209,7 +298,7 @@ typedef struct
     size_t currentReadPos;
 } drwav__memory_stream;
 
-// Structure for internal use. Only used for writers opened with drwav_open_memory_write().
+/* Structure for internal use. Only used for writers opened with drwav_open_memory_write(). */
 typedef struct
 {
     void** ppData;
@@ -221,8 +310,8 @@ typedef struct
 
 typedef struct
 {
-    drwav_container container;  // RIFF, W64.
-    drwav_uint32 format;        // DR_WAVE_FORMAT_*
+    drwav_container container;  /* RIFF, W64. */
+    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
     drwav_uint32 channels;
     drwav_uint32 sampleRate;
     drwav_uint32 bitsPerSample;
@@ -230,473 +319,591 @@ typedef struct
 
 typedef struct
 {
-    // The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
-    // that require support for data formats not natively supported by dr_wav.
+    /*
+    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
+    that require support for data formats not natively supported by dr_wav.
+    */
     drwav_uint16 formatTag;
 
-    // The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc.
+    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
     drwav_uint16 channels;
 
-    // The sample rate. Usually set to something like 44100.
+    /* The sample rate. Usually set to something like 44100. */
     drwav_uint32 sampleRate;
 
-    // Average bytes per second. You probably don't need this, but it's left here for informational purposes.
+    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
     drwav_uint32 avgBytesPerSec;
 
-    // Block align. This is equal to the number of channels * bytes per sample.
+    /* Block align. This is equal to the number of channels * bytes per sample. */
     drwav_uint16 blockAlign;
 
-    // Bits per sample.
+    /* Bits per sample. */
     drwav_uint16 bitsPerSample;
 
-    // The size of the extended data. Only used internally for validation, but left here for informational purposes.
+    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
     drwav_uint16 extendedSize;
 
-    // The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
-    // is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
-    // many bits a valid per sample. Mainly used for informational purposes.
+    /*
+    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
+    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
+    many bits a valid per sample. Mainly used for informational purposes.
+    */
     drwav_uint16 validBitsPerSample;
 
-    // The channel mask. Not used at the moment.
+    /* The channel mask. Not used at the moment. */
     drwav_uint32 channelMask;
 
-    // The sub-format, exactly as specified by the wave file.
+    /* The sub-format, exactly as specified by the wave file. */
     drwav_uint8 subFormat[16];
 } drwav_fmt;
 
 typedef struct
 {
-    // A pointer to the function to call when more data is needed.
+    drwav_uint32 cuePointId;
+    drwav_uint32 type;
+    drwav_uint32 start;
+    drwav_uint32 end;
+    drwav_uint32 fraction;
+    drwav_uint32 playCount;
+} drwav_smpl_loop;
+
+ typedef struct
+{
+    drwav_uint32 manufacturer;
+    drwav_uint32 product;
+    drwav_uint32 samplePeriod;
+    drwav_uint32 midiUnityNotes;
+    drwav_uint32 midiPitchFraction;
+    drwav_uint32 smpteFormat;
+    drwav_uint32 smpteOffset;
+    drwav_uint32 numSampleLoops;
+    drwav_uint32 samplerData;
+    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
+} drwav_smpl;
+
+typedef struct
+{
+    /* A pointer to the function to call when more data is needed. */
     drwav_read_proc onRead;
 
-    // A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode.
+    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
     drwav_write_proc onWrite;
 
-    // A pointer to the function to call when the wav file needs to be seeked.
+    /* A pointer to the function to call when the wav file needs to be seeked. */
     drwav_seek_proc onSeek;
 
-    // The user data to pass to callbacks.
+    /* The user data to pass to callbacks. */
     void* pUserData;
 
 
-    // Whether or not the WAV file is formatted as a standard RIFF file or W64.
+    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
     drwav_container container;
 
 
-    // Structure containing format information exactly as specified by the wav file.
+    /* Structure containing format information exactly as specified by the wav file. */
     drwav_fmt fmt;
 
-    // The sample rate. Will be set to something like 44100.
+    /* The sample rate. Will be set to something like 44100. */
     drwav_uint32 sampleRate;
 
-    // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc.
+    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
     drwav_uint16 channels;
 
-    // The bits per sample. Will be set to something like 16, 24, etc.
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
     drwav_uint16 bitsPerSample;
 
-    // The number of bytes per sample.
-    drwav_uint16 bytesPerSample;
-
-    // Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE).
+    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
     drwav_uint16 translatedFormatTag;
 
-    // The total number of samples making up the audio data. Use <totalSampleCount> * <bytesPerSample> to calculate
-    // the required size of a buffer to hold the entire audio data.
-    drwav_uint64 totalSampleCount;
+    /* The total number of PCM frames making up the audio data. */
+    drwav_uint64 totalPCMFrameCount;
 
 
-    // The size in bytes of the data chunk.
+    /* The size in bytes of the data chunk. */
     drwav_uint64 dataChunkDataSize;
     
-    // The position in the stream of the first byte of the data chunk. This is used for seeking.
+    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
     drwav_uint64 dataChunkDataPos;
 
-    // The number of bytes remaining in the data chunk.
+    /* The number of bytes remaining in the data chunk. */
     drwav_uint64 bytesRemaining;
 
 
-    // Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
-    // set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
+    /*
+    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
+    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
+    */
     drwav_uint64 dataChunkDataSizeTargetWrite;
 
-    // Keeps track of whether or not the wav writer was initialized in sequential mode.
+    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
     drwav_bool32 isSequentialWrite;
 
 
-    // A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory().
+    /* smpl chunk. */
+    drwav_smpl smpl;
+
+
+    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory(). */
     drwav__memory_stream memoryStream;
     drwav__memory_stream_write memoryStreamWrite;
 
-    // Generic data for compressed formats. This data is shared across all block-compressed formats.
+    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
     struct
     {
-        drwav_uint64 iCurrentSample;    // The index of the next sample that will be read by drwav_read_*(). This is used with "totalSampleCount" to ensure we don't read excess samples at the end of the last block.
+        drwav_uint64 iCurrentSample;    /* The index of the next sample that will be read by drwav_read_*(). This is used with "totalSampleCount" to ensure we don't read excess samples at the end of the last block. */
     } compressed;
     
-    // Microsoft ADPCM specific data.
+    /* Microsoft ADPCM specific data. */
     struct
     {
         drwav_uint32 bytesRemainingInBlock;
         drwav_uint16 predictor[2];
         drwav_int32  delta[2];
-        drwav_int32  cachedSamples[4];  // Samples are stored in this cache during decoding.
+        drwav_int32  cachedSamples[4];  /* Samples are stored in this cache during decoding. */
         drwav_uint32 cachedSampleCount;
-        drwav_int32  prevSamples[2][2]; // The previous 2 samples for each channel (2 channels at most).
+        drwav_int32  prevSamples[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
     } msadpcm;
 
-    // IMA ADPCM specific data.
+    /* IMA ADPCM specific data. */
     struct
     {
         drwav_uint32 bytesRemainingInBlock;
         drwav_int32  predictor[2];
         drwav_int32  stepIndex[2];
-        drwav_int32  cachedSamples[16]; // Samples are stored in this cache during decoding.
+        drwav_int32  cachedSamples[16]; /* Samples are stored in this cache during decoding. */
         drwav_uint32 cachedSampleCount;
     } ima;
+
+
+    drwav_uint64 totalSampleCount;  /* <-- DEPRECATED. Will be removed in a future version. */
 } drwav;
 
 
-// Initializes a pre-allocated drwav object.
-//
-// onRead    [in]           The function to call when data needs to be read from the client.
-// onSeek    [in]           The function to call when the read position of the client data needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-//
-// Returns true if successful; false otherwise.
-//
-// Close the loader with drwav_uninit().
-//
-// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
-// to open the stream from a file or from a block of memory respectively.
-//
-// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
-// a drwav object on the heap and return a pointer to it.
-//
-// See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
-drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+/*
+Initializes a pre-allocated drwav object.
 
-// Initializes a pre-allocated drwav object for writing.
-//
-// onWrite   [in]           The function to call when data needs to be written.
-// onSeek    [in]           The function to call when the write position needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
-//
-// Returns true if successful; false otherwise.
-//
-// Close the writer with drwav_uninit().
-//
-// This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
-// to open the stream from a file or from a block of memory respectively.
-//
-// If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
-// a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
-//
-// If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
-// a drwav object on the heap and return a pointer to it.
-//
-// See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+pWav                         [out]          A pointer to the drwav object being initialized.
+onRead                       [in]           The function to call when data needs to be read from the client.
+onSeek                       [in]           The function to call when the read position of the client data needs to move.
+onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
+pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
+flags                        [in, optional] A set of flags for controlling how things are loaded.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+to open the stream from a file or from a block of memory respectively.
+
+If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
+a drwav object on the heap and return a pointer to it.
+
+Possible values for flags:
+  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
+                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
+
+drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
+
+The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
+after the function returns.
+
+See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
+*/
+drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags);
+
+/*
+Initializes a pre-allocated drwav object for writing.
+
+onWrite   [in]           The function to call when data needs to be written.
+onSeek    [in]           The function to call when the write position needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+
+Returns true if successful; false otherwise.
+
+Close the writer with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+to open the stream from a file or from a block of memory respectively.
+
+If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
+a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
+
+If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate
+a drwav object on the heap and return a pointer to it.
+
+See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+*/
 drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
 drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData);
 
-// Uninitializes the given drwav object.
-//
-// Use this only for objects initialized with drwav_init().
+/*
+Uninitializes the given drwav object.
+
+Use this only for objects initialized with drwav_init().
+*/
 void drwav_uninit(drwav* pWav);
 
 
-// Opens a wav file using the given callbacks.
-//
-// onRead    [in]           The function to call when data needs to be read from the client.
-// onSeek    [in]           The function to call when the read position of the client data needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-//
-// Returns null on error.
-//
-// Close the loader with drwav_close().
-//
-// You can also use drwav_open_file() and drwav_open_memory() to open the stream from a file or from a block of
-// memory respectively.
-//
-// This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
-// initializing it.
-//
-// See also: drwav_open_file(), drwav_open_memory(), drwav_close()
-drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+/*
+Opens a wav file using the given callbacks.
 
-// Opens a wav file for writing using the given callbacks.
-//
-// onWrite   [in]           The function to call when data needs to be written.
-// onSeek    [in]           The function to call when the write position needs to move.
-// pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
-//
-// Returns null on error.
-//
-// Close the loader with drwav_close().
-//
-// You can also use drwav_open_file_write() and drwav_open_memory_write() to open the stream from a file or from a block
-// of memory respectively.
-//
-// This is different from drwav_init_write() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
-// initializing it.
-//
-// See also: drwav_open_file_write(), drwav_open_memory_write(), drwav_close()
+onRead    [in]           The function to call when data needs to be read from the client.
+onSeek    [in]           The function to call when the read position of the client data needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+
+Returns null on error.
+
+Close the loader with drwav_close().
+
+You can also use drwav_open_file() and drwav_open_memory() to open the stream from a file or from a block of
+memory respectively.
+
+This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
+initializing it.
+
+See also: drwav_init(), drwav_open_file(), drwav_open_memory(), drwav_close()
+*/
+drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData);
+drwav* drwav_open_ex(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags);
+
+/*
+Opens a wav file for writing using the given callbacks.
+
+onWrite   [in]           The function to call when data needs to be written.
+onSeek    [in]           The function to call when the write position needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+
+Returns null on error.
+
+Close the loader with drwav_close().
+
+You can also use drwav_open_file_write() and drwav_open_memory_write() to open the stream from a file or from a block
+of memory respectively.
+
+This is different from drwav_init_write() in that it will allocate the drwav object for you via DRWAV_MALLOC() before
+initializing it.
+
+See also: drwav_open_file_write(), drwav_open_memory_write(), drwav_close()
+*/
 drwav* drwav_open_write(const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
 drwav* drwav_open_write_sequential(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData);
 
-// Uninitializes and deletes the the given drwav object.
-//
-// Use this only for objects created with drwav_open().
+/*
+Uninitializes and deletes the the given drwav object.
+
+Use this only for objects created with drwav_open().
+*/
 void drwav_close(drwav* pWav);
 
 
-// Reads raw audio data.
-//
-// This is the lowest level function for reading audio data. It simply reads the given number of
-// bytes of the raw internal sample data.
-//
-// Consider using drwav_read_s16(), drwav_read_s32() or drwav_read_f32() for reading sample data in
-// a consistent format.
-//
-// Returns the number of bytes actually read.
+/*
+Reads raw audio data.
+
+This is the lowest level function for reading audio data. It simply reads the given number of
+bytes of the raw internal sample data.
+
+Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
+reading sample data in a consistent format.
+
+Returns the number of bytes actually read.
+*/
 size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
 
-// Reads a chunk of audio data in the native internal format.
-//
-// This is typically the most efficient way to retrieve audio data, but it does not do any format
-// conversions which means you'll need to convert the data manually if required.
-//
-// If the return value is less than <samplesToRead> it means the end of the file has been reached or
-// you have requested more samples than can possibly fit in the output buffer.
-//
-// This function will only work when sample data is of a fixed size and uncompressed. If you are
-// using a compressed format consider using drwav_read_raw() or drwav_read_s16/s32/f32/etc().
-drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut);
+/*
+Reads a chunk of audio data in the native internal format.
 
-// Seeks to the given sample.
-//
-// Returns true if successful; false otherwise.
-drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample);
+This is typically the most efficient way to retrieve audio data, but it does not do any format
+conversions which means you'll need to convert the data manually if required.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached or
+you have requested more samples than can possibly fit in the output buffer.
+
+This function will only work when sample data is of a fixed size and uncompressed. If you are
+using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32/etc().
+*/
+drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+
+/*
+Seeks to the given PCM frame.
+
+Returns true if successful; false otherwise.
+*/
+drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
 
 
-// Writes raw audio data.
-//
-// Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+/*
+Writes raw audio data.
+
+Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+*/
 size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
 
-// Writes audio data based on sample counts.
-//
-// Returns the number of samples written.
-drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData);
+/*
+Writes PCM frames.
+
+Returns the number of PCM frames written.
+*/
+drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
 
 
-
-//// Conversion Utilities ////
+/* Conversion Utilities */
 #ifndef DR_WAV_NO_CONVERSION_API
 
-// Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
-//
-// Returns the number of samples actually read.
-//
-// If the return value is less than <samplesToRead> it means the end of the file has been reached.
-drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+/*
+Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
 
-// Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples.
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
 void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples.
+/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
 void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples.
+/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
 void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
 
-// Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples.
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
 void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount);
 
-// Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples.
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
 void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
 
-// Low-level function for converting A-law samples to signed 16-bit PCM samples.
+/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
 void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting u-law samples to signed 16-bit PCM samples.
+/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
 void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
 
-// Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
-//
-// Returns the number of samples actually read.
-//
-// If the return value is less than <samplesToRead> it means the end of the file has been reached.
-drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut);
+/*
+Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
 
-// Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples.
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
 void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
 void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
 void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
 void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount);
 
-// Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
 void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount);
 
-// Low-level function for converting A-law samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
 void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting u-law samples to IEEE 32-bit floating point samples.
+/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
 void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
 
-// Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
-//
-// Returns the number of samples actually read.
-//
-// If the return value is less than <samplesToRead> it means the end of the file has been reached.
-drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut);
+/*
+Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
 
-// Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples.
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
 void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples.
+/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
 void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
 
-// Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples.
+/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
 void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples.
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
 void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount);
 
-// Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples.
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
 void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
 
-// Low-level function for converting A-law samples to signed 32-bit PCM samples.
+/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
 void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-// Low-level function for converting u-law samples to signed 32-bit PCM samples.
+/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
 void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
 
-#endif  //DR_WAV_NO_CONVERSION_API
+#endif  /* DR_WAV_NO_CONVERSION_API */
 
 
-//// High-Level Convenience Helpers ////
+/* High-Level Convenience Helpers */
 
 #ifndef DR_WAV_NO_STDIO
+/*
+Helper for initializing a wave file using stdio.
 
-// Helper for initializing a wave file using stdio.
-//
-// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-// objects because the operating system may restrict the number of file handles an application can have open at
-// any given time.
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
 drwav_bool32 drwav_init_file(drwav* pWav, const char* filename);
+drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags);
 
-// Helper for initializing a wave file for writing using stdio.
-//
-// This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-// objects because the operating system may restrict the number of file handles an application can have open at
-// any given time.
+/*
+Helper for initializing a wave file for writing using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
 drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat);
 drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
 
-// Helper for opening a wave file using stdio.
-//
-// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
-// objects because the operating system may restrict the number of file handles an application can have open at
-// any given time.
-drwav* drwav_open_file(const char* filename);
+/*
+Helper for opening a wave file using stdio.
 
-// Helper for opening a wave file for writing using stdio.
-//
-// This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
-// objects because the operating system may restrict the number of file handles an application can have open at
-// any given time.
+This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+drwav* drwav_open_file(const char* filename);
+drwav* drwav_open_file_ex(const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags);
+
+/*
+Helper for opening a wave file for writing using stdio.
+
+This holds the internal FILE object until drwav_close() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
 drwav* drwav_open_file_write(const char* filename, const drwav_data_format* pFormat);
 drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
 
-#endif  //DR_WAV_NO_STDIO
+#endif  /* DR_WAV_NO_STDIO */
 
-// Helper for initializing a loader from a pre-allocated memory buffer.
-//
-// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-// the lifetime of the drwav object.
-//
-// The buffer should contain the contents of the entire wave file, not just the sample data.
+/*
+Helper for initializing a loader from a pre-allocated memory buffer.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drwav object.
+
+The buffer should contain the contents of the entire wave file, not just the sample data.
+*/
 drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize);
+drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags);
 
-// Helper for initializing a writer which outputs data to a memory buffer.
-//
-// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
-//
-// The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
-// considered valid until after drwav_uninit() has been called anyway.
+/*
+Helper for initializing a writer which outputs data to a memory buffer.
+
+dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+
+The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
+considered valid until after drwav_uninit() has been called anyway.
+*/
 drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat);
 drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
 
-// Helper for opening a loader from a pre-allocated memory buffer.
-//
-// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-// the lifetime of the drwav object.
-//
-// The buffer should contain the contents of the entire wave file, not just the sample data.
-drwav* drwav_open_memory(const void* data, size_t dataSize);
+/*
+Helper for opening a loader from a pre-allocated memory buffer.
 
-// Helper for opening a writer which outputs data to a memory buffer.
-//
-// dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
-//
-// The buffer will remain allocated even after drwav_close() is called. Indeed, the buffer should not be
-// considered valid until after drwav_close() has been called anyway.
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drwav object.
+
+The buffer should contain the contents of the entire wave file, not just the sample data.
+*/
+drwav* drwav_open_memory(const void* data, size_t dataSize);
+drwav* drwav_open_memory_ex(const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags);
+
+/*
+Helper for opening a writer which outputs data to a memory buffer.
+
+dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+
+The buffer will remain allocated even after drwav_close() is called. Indeed, the buffer should not be
+considered valid until after drwav_close() has been called anyway.
+*/
 drwav* drwav_open_memory_write(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat);
 drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
 
 
 #ifndef DR_WAV_NO_CONVERSION_API
-// Opens and reads a wav file in a single operation.
+/* Opens and reads a wav file in a single operation. */
+drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+#ifndef DR_WAV_NO_STDIO
+/* Opens and decodes a wav file in a single operation. */
+drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+#endif
+
+/* Opens and decodes a wav file from a block of memory in a single operation. */
+drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount);
+#endif
+
+/* Frees data that was allocated internally by dr_wav. */
+void drwav_free(void* pDataReturnedByOpenAndRead);
+
+
+/* DEPRECATED APIS */
+drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut);
+drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut);
+drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut);
+drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample);
+drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData);
+#ifndef DR_WAV_NO_CONVERSION_API
 drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
 float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
 drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
 #ifndef DR_WAV_NO_STDIO
-// Opens and decodes a wav file in a single operation.
-drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
-float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
-drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int16* drwav_open_memory_and_read_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+float* drwav_open_file_and_read_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int32* drwav_open_file_and_read_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+#endif
+drwav_int16* drwav_open_memory_and_read_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+float* drwav_open_memory_and_read_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
+drwav_int32* drwav_open_memory_and_read_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
 #endif
 
-// Opens and decodes a wav file from a block of memory in a single operation.
-drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
-float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
-drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount);
-#endif
-
-// Frees data that was allocated internally by dr_wav.
-void drwav_free(void* pDataReturnedByOpenAndRead);
 
 #ifdef __cplusplus
 }
 #endif
-#endif  // dr_wav_h
+#endif  /* dr_wav_h */
 
 
-/////////////////////////////////////////////////////
-//
-// IMPLEMENTATION
-//
-/////////////////////////////////////////////////////
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
 
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
 #ifdef DR_WAV_IMPLEMENTATION
 #include <stdlib.h>
-#include <string.h> // For memcpy(), memset()
-#include <limits.h> // For INT_MAX
+#include <string.h> /* For memcpy(), memset() */
+#include <limits.h> /* For INT_MAX */
 
 #ifndef DR_WAV_NO_STDIO
 #include <stdio.h>
 #endif
 
-// Standard library stuff.
+/* Standard library stuff. */
 #ifndef DRWAV_ASSERT
 #include <assert.h>
 #define DRWAV_ASSERT(expression)           assert(expression)
@@ -727,34 +934,43 @@ void drwav_free(void* pDataReturnedByOpenAndRead);
 #define drwav_copy_memory                  DRWAV_COPY_MEMORY
 #define drwav_zero_memory                  DRWAV_ZERO_MEMORY
 
+typedef drwav_int32 drwav_result;
+#define DRWAV_SUCCESS            0
+#define DRWAV_ERROR             -1
+#define DRWAV_INVALID_ARGS      -2
+#define DRWAV_INVALID_OPERATION -3
+#define DRWAV_INVALID_FILE      -100
+#define DRWAV_EOF               -101
 
-#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  // 64 for AVX-512 in the future.
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
 
 #ifdef _MSC_VER
 #define DRWAV_INLINE __forceinline
 #else
 #ifdef __GNUC__
-#define DRWAV_INLINE inline __attribute__((always_inline))
+#define DRWAV_INLINE __inline__ __attribute__((always_inline))
 #else
-#define DRWAV_INLINE inline
+#define DRWAV_INLINE
 #endif
 #endif
 
-// I couldn't figure out where SIZE_MAX was defined for VC6. If anybody knows, let me know.
-#if defined(_MSC_VER) && _MSC_VER <= 1200
-    #if defined(_WIN64)
-        #define SIZE_MAX    ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
+#if defined(SIZE_MAX)
+    #define DRWAV_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
     #else
-        #define SIZE_MAX    0xFFFFFFFF
+        #define DRWAV_SIZE_MAX  0xFFFFFFFF
     #endif
 #endif
 
-static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    // 66666972-912E-11CF-A5D6-28DB04C10000
-static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 65766177-ACF3-11D3-8CD1-00C04F8EDB8A
-static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A
-static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A
-static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 74636166-ACF3-11D3-8CD1-00C04F8EDB8A
-static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    // 61746164-ACF3-11D3-8CD1-00C04F8EDB8A
+static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
+static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
 
 static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
 {
@@ -787,11 +1003,7 @@ static DRWAV_INLINE int drwav__is_little_endian()
 
 static DRWAV_INLINE unsigned short drwav__bytes_to_u16(const unsigned char* data)
 {
-    if (drwav__is_little_endian()) {
-        return (data[0] << 0) | (data[1] << 8);
-    } else {
-        return (data[1] << 0) | (data[0] << 8);
-    }
+    return (data[0] << 0) | (data[1] << 8);
 }
 
 static DRWAV_INLINE short drwav__bytes_to_s16(const unsigned char* data)
@@ -801,29 +1013,20 @@ static DRWAV_INLINE short drwav__bytes_to_s16(const unsigned char* data)
 
 static DRWAV_INLINE unsigned int drwav__bytes_to_u32(const unsigned char* data)
 {
-    if (drwav__is_little_endian()) {
-        return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-    } else {
-        return (data[3] << 0) | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
-    }
+    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
 }
 
 static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const unsigned char* data)
 {
-    if (drwav__is_little_endian()) {
-        return
-            ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
-            ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
-    } else {
-        return
-            ((drwav_uint64)data[7] <<  0) | ((drwav_uint64)data[6] <<  8) | ((drwav_uint64)data[5] << 16) | ((drwav_uint64)data[4] << 24) |
-            ((drwav_uint64)data[3] << 32) | ((drwav_uint64)data[2] << 40) | ((drwav_uint64)data[1] << 48) | ((drwav_uint64)data[0] << 56);
-    }
+    return
+        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
 }
 
 static DRWAV_INLINE void drwav__bytes_to_guid(const unsigned char* data, drwav_uint8* guid)
 {
-    for (int i = 0; i < 16; ++i) {
+    int i;
+    for (i = 0; i < 16; ++i) {
         guid[i] = data[i];
     }
 }
@@ -836,60 +1039,44 @@ static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 fo
         formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
 }
 
-
 drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
 drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
 drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
 drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData);
 
-typedef struct
-{
-    union
-    {
-        drwav_uint8 fourcc[4];
-        drwav_uint8 guid[16];
-    } id;
-
-    // The size in bytes of the chunk.
-    drwav_uint64 sizeInBytes;
-
-    // RIFF = 2 byte alignment.
-    // W64  = 8 byte alignment.
-    unsigned int paddingSize;
-
-} drwav__chunk_header;
-
-static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav__chunk_header* pHeaderOut)
+static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
 {
     if (container == drwav_container_riff) {
+        unsigned char sizeInBytes[4];
+
         if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
-            return DRWAV_FALSE;
+            return DRWAV_EOF;
         }
 
-        unsigned char sizeInBytes[4];
         if (onRead(pUserData, sizeInBytes, 4) != 4) {
-            return DRWAV_FALSE;
+            return DRWAV_INVALID_FILE;
         }
 
         pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
         pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 2);
         *pRunningBytesReadOut += 8;
     } else {
-        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
-            return DRWAV_FALSE;
-        }
-
         unsigned char sizeInBytes[8];
-        if (onRead(pUserData, sizeInBytes, 8) != 8) {
-            return DRWAV_FALSE;
+
+        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
+            return DRWAV_EOF;
         }
 
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    // <-- Subtract 24 because w64 includes the size of the header.
+        if (onRead(pUserData, sizeInBytes, 8) != 8) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
         pHeaderOut->paddingSize = (unsigned int)(pHeaderOut->sizeInBytes % 8);
         *pRunningBytesReadOut += 24;
     }
 
-    return DRWAV_TRUE;
+    return DRWAV_SUCCESS;
 }
 
 static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
@@ -912,27 +1099,59 @@ static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 off
     return DRWAV_TRUE;
 }
 
+static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
+    }
+
+    /* Larger than 32-bit seek. */
+    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+    offset -= 0x7FFFFFFF;
+
+    for (;;) {
+        if (offset <= 0x7FFFFFFF) {
+            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
+        }
+
+        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        offset -= 0x7FFFFFFF;
+    }
+
+    /* Should never get here. */
+    /*return DRWAV_TRUE; */
+}
+
 
 static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
 {
-    drwav__chunk_header header;
-    if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) {
+    drwav_chunk_header header;
+    unsigned char fmt[16];
+
+    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
         return DRWAV_FALSE;
     }
 
 
-    // Skip non-fmt chunks.
-    if ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
+    /* Skip non-fmt chunks. */
+    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
         if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
             return DRWAV_FALSE;
         }
         *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
 
-        return drwav__read_fmt(onRead, onSeek, pUserData, container, pRunningBytesReadOut, fmtOut);
+        /* Try the next header. */
+        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
     }
 
 
-    // Validation.
+    /* Validation. */
     if (container == drwav_container_riff) {
         if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
             return DRWAV_FALSE;
@@ -944,7 +1163,6 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe
     }
 
 
-    unsigned char fmt[16];
     if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
         return DRWAV_FALSE;
     }
@@ -964,16 +1182,18 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe
 
     if (header.sizeInBytes > 16) {
         unsigned char fmt_cbSize[2];
+        int bytesReadSoFar = 0;
+
         if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
-            return DRWAV_FALSE;    // Expecting more data.
+            return DRWAV_FALSE;    /* Expecting more data. */
         }
         *pRunningBytesReadOut += sizeof(fmt_cbSize);
 
-        int bytesReadSoFar = 18;
+        bytesReadSoFar = 18;
 
         fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
         if (fmtOut->extendedSize > 0) {
-            // Simple validation.
+            /* Simple validation. */
             if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
                 if (fmtOut->extendedSize != 22) {
                     return DRWAV_FALSE;
@@ -983,7 +1203,7 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe
             if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
                 unsigned char fmtext[22];
                 if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
-                    return DRWAV_FALSE;    // Expecting more data.
+                    return DRWAV_FALSE;    /* Expecting more data. */
                 }
 
                 fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
@@ -999,7 +1219,7 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe
             bytesReadSoFar += fmtOut->extendedSize;
         }
 
-        // Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size.
+        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
         if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
             return DRWAV_FALSE;
         }
@@ -1051,13 +1271,18 @@ static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek
 }
 
 drwav_bool32 drwav_init_file(drwav* pWav, const char* filename)
+{
+    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0);
+}
+
+drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
 {
     FILE* pFile = drwav_fopen(filename, "rb");
     if (pFile == NULL) {
         return DRWAV_FALSE;
     }
 
-    return drwav_init(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile);
+    return drwav_init_ex(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, onChunk, (void*)pFile, pChunkUserData, flags);
 }
 
 
@@ -1083,12 +1308,20 @@ drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename,
 
 drwav* drwav_open_file(const char* filename)
 {
-    FILE* pFile = drwav_fopen(filename, "rb");
+    return drwav_open_file_ex(filename, NULL, NULL, 0);
+}
+
+drwav* drwav_open_file_ex(const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    FILE* pFile;
+    drwav* pWav;
+
+    pFile = drwav_fopen(filename, "rb");
     if (pFile == NULL) {
         return DRWAV_FALSE;
     }
 
-    drwav* pWav = drwav_open(drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile);
+    pWav = drwav_open_ex(drwav__on_read_stdio, drwav__on_seek_stdio, onChunk, (void*)pFile, pChunkUserData, flags);
     if (pWav == NULL) {
         fclose(pFile);
         return NULL;
@@ -1100,12 +1333,15 @@ drwav* drwav_open_file(const char* filename)
 
 drwav* drwav_open_file_write__internal(const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential)
 {
-    FILE* pFile = drwav_fopen(filename, "wb");
+    FILE* pFile;
+    drwav* pWav;
+
+    pFile = drwav_fopen(filename, "wb");
     if (pFile == NULL) {
         return DRWAV_FALSE;
     }
 
-    drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile);
+    pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile);
     if (pWav == NULL) {
         fclose(pFile);
         return NULL;
@@ -1123,16 +1359,18 @@ drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_f
 {
     return drwav_open_file_write__internal(filename, pFormat, totalSampleCount, DRWAV_TRUE);
 }
-#endif  //DR_WAV_NO_STDIO
+#endif  /* DR_WAV_NO_STDIO */
 
 
 static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
 {
     drwav__memory_stream* memory = (drwav__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
     drwav_assert(memory != NULL);
     drwav_assert(memory->dataSize >= memory->currentReadPos);
 
-    size_t bytesRemaining = memory->dataSize - memory->currentReadPos;
+    bytesRemaining = memory->dataSize - memory->currentReadPos;
     if (bytesToRead > bytesRemaining) {
         bytesToRead = bytesRemaining;
     }
@@ -1153,21 +1391,21 @@ static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_see
     if (origin == drwav_seek_origin_current) {
         if (offset > 0) {
             if (memory->currentReadPos + offset > memory->dataSize) {
-                offset = (int)(memory->dataSize - memory->currentReadPos);  // Trying to seek too far forward.
+                return DRWAV_FALSE; /* Trying to seek too far forward. */
             }
         } else {
             if (memory->currentReadPos < (size_t)-offset) {
-                offset = -(int)memory->currentReadPos;  // Trying to seek too far backwards.
+                return DRWAV_FALSE; /* Trying to seek too far backwards. */
             }
         }
 
-        // This will never underflow thanks to the clamps above.
+        /* This will never underflow thanks to the clamps above. */
         memory->currentReadPos += offset;
     } else {
         if ((drwav_uint32)offset <= memory->dataSize) {
             memory->currentReadPos = offset;
         } else {
-            memory->currentReadPos = memory->dataSize;  // Trying to seek too far forward.
+            return DRWAV_FALSE; /* Trying to seek too far forward. */
         }
     }
     
@@ -1177,20 +1415,23 @@ static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_see
 static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
 {
     drwav__memory_stream_write* memory = (drwav__memory_stream_write*)pUserData;
+    size_t bytesRemaining;
+
     drwav_assert(memory != NULL);
     drwav_assert(memory->dataCapacity >= memory->currentWritePos);
 
-    size_t bytesRemaining = memory->dataCapacity - memory->currentWritePos;
+    bytesRemaining = memory->dataCapacity - memory->currentWritePos;
     if (bytesRemaining < bytesToWrite) {
-        // Need to reallocate.
+        /* Need to reallocate. */
+        void* pNewData;
         size_t newDataCapacity = (memory->dataCapacity == 0) ? 256 : memory->dataCapacity * 2;
 
-        // If doubling wasn't enough, just make it the minimum required size to write the data.
+        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
         if ((newDataCapacity - memory->currentWritePos) < bytesToWrite) {
             newDataCapacity = memory->currentWritePos + bytesToWrite;
         }
 
-        void* pNewData = DRWAV_REALLOC(*memory->ppData, newDataCapacity);
+        pNewData = DRWAV_REALLOC(*memory->ppData, newDataCapacity);
         if (pNewData == NULL) {
             return 0;
         }
@@ -1199,8 +1440,7 @@ static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_
         memory->dataCapacity = newDataCapacity;
     }
 
-    drwav_uint8* pDataOut = (drwav_uint8*)(*memory->ppData);
-    DRWAV_COPY_MEMORY(pDataOut + memory->currentWritePos, pDataIn, bytesToWrite);
+    DRWAV_COPY_MEMORY(((drwav_uint8*)(*memory->ppData)) + memory->currentWritePos, pDataIn, bytesToWrite);
 
     memory->currentWritePos += bytesToWrite;
     if (memory->dataSize < memory->currentWritePos) {
@@ -1220,21 +1460,21 @@ static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drw
     if (origin == drwav_seek_origin_current) {
         if (offset > 0) {
             if (memory->currentWritePos + offset > memory->dataSize) {
-                offset = (int)(memory->dataSize - memory->currentWritePos);  // Trying to seek too far forward.
+                offset = (int)(memory->dataSize - memory->currentWritePos);  /* Trying to seek too far forward. */
             }
         } else {
             if (memory->currentWritePos < (size_t)-offset) {
-                offset = -(int)memory->currentWritePos;  // Trying to seek too far backwards.
+                offset = -(int)memory->currentWritePos;  /* Trying to seek too far backwards. */
             }
         }
 
-        // This will never underflow thanks to the clamps above.
+        /* This will never underflow thanks to the clamps above. */
         memory->currentWritePos += offset;
     } else {
         if ((drwav_uint32)offset <= memory->dataSize) {
             memory->currentWritePos = offset;
         } else {
-            memory->currentWritePos = memory->dataSize;  // Trying to seek too far forward.
+            memory->currentWritePos = memory->dataSize;  /* Trying to seek too far forward. */
         }
     }
     
@@ -1243,17 +1483,23 @@ static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drw
 
 drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize)
 {
+    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0);
+}
+
+drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    drwav__memory_stream memoryStream;
+
     if (data == NULL || dataSize == 0) {
         return DRWAV_FALSE;
     }
 
-    drwav__memory_stream memoryStream;
     drwav_zero_memory(&memoryStream, sizeof(memoryStream));
     memoryStream.data = (const unsigned char*)data;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
 
-    if (!drwav_init(pWav, drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream)) {
+    if (!drwav_init_ex(pWav, drwav__on_read_memory, drwav__on_seek_memory, onChunk, (void*)&memoryStream, pChunkUserData, flags)) {
         return DRWAV_FALSE;
     }
 
@@ -1265,14 +1511,15 @@ drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize)
 
 drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential)
 {
+    drwav__memory_stream_write memoryStreamWrite;
+
     if (ppData == NULL) {
         return DRWAV_FALSE;
     }
 
-    *ppData = NULL; // Important because we're using realloc()!
+    *ppData = NULL; /* Important because we're using realloc()! */
     *pDataSize = 0;
 
-    drwav__memory_stream_write memoryStreamWrite;
     drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite));
     memoryStreamWrite.ppData = ppData;
     memoryStreamWrite.pDataSize = pDataSize;
@@ -1302,17 +1549,24 @@ drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size
 
 drwav* drwav_open_memory(const void* data, size_t dataSize)
 {
+    return drwav_open_memory_ex(data, dataSize, NULL, NULL, 0);
+}
+
+drwav* drwav_open_memory_ex(const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    drwav__memory_stream memoryStream;
+    drwav* pWav;
+
     if (data == NULL || dataSize == 0) {
         return NULL;
     }
 
-    drwav__memory_stream memoryStream;
     drwav_zero_memory(&memoryStream, sizeof(memoryStream));
     memoryStream.data = (const unsigned char*)data;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
 
-    drwav* pWav = drwav_open(drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream);
+    pWav = drwav_open_ex(drwav__on_read_memory, drwav__on_seek_memory, onChunk, (void*)&memoryStream, pChunkUserData, flags);
     if (pWav == NULL) {
         return NULL;
     }
@@ -1325,14 +1579,16 @@ drwav* drwav_open_memory(const void* data, size_t dataSize)
 
 drwav* drwav_open_memory_write__internal(void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential)
 {
+    drwav__memory_stream_write memoryStreamWrite;
+    drwav* pWav;
+
     if (ppData == NULL) {
         return NULL;
     }
 
-    *ppData = NULL; // Important because we're using realloc()!
+    *ppData = NULL; /* Important because we're using realloc()! */
     *pDataSize = 0;
 
-    drwav__memory_stream_write memoryStreamWrite;
     drwav_zero_memory(&memoryStreamWrite, sizeof(memoryStreamWrite));
     memoryStreamWrite.ppData = ppData;
     memoryStreamWrite.pDataSize = pDataSize;
@@ -1340,7 +1596,7 @@ drwav* drwav_open_memory_write__internal(void** ppData, size_t* pDataSize, const
     memoryStreamWrite.dataCapacity = 0;
     memoryStreamWrite.currentWritePos = 0;
 
-    drwav* pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite);
+    pWav = drwav_open_write__internal(pFormat, totalSampleCount, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, (void*)&memoryStreamWrite);
     if (pWav == NULL) {
         return NULL;
     }
@@ -1361,143 +1617,274 @@ drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, cons
 }
 
 
+size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+{
+    size_t bytesRead;
+
+    drwav_assert(onRead != NULL);
+    drwav_assert(pCursor != NULL);
+
+    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
+    *pCursor += bytesRead;
+    return bytesRead;
+}
+
+drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
+{
+    drwav_assert(onSeek != NULL);
+    drwav_assert(pCursor != NULL);
+
+    if (!onSeek(pUserData, offset, origin)) {
+        return DRWAV_FALSE;
+    }
+
+    if (origin == drwav_seek_origin_start) {
+        *pCursor = offset;
+    } else {
+        *pCursor += offset;
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+static drwav_uint32 drwav_get_bytes_per_sample(drwav* pWav)
+{
+    /*
+    The number of bytes per sample is based on the bits per sample or the block align. We prioritize floor(bitsPerSample/8), but if
+    this is zero or the bits per sample is not a multiple of 8 we need to fall back to the block align.
+    */
+    drwav_uint32 bytesPerSample = pWav->bitsPerSample >> 3;
+    if (bytesPerSample == 0 || (pWav->bitsPerSample & 0x7) != 0) {
+        bytesPerSample = pWav->fmt.blockAlign/pWav->fmt.channels;
+    }
+
+    return bytesPerSample;
+}
+
+static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+{
+    /*
+    The number of bytes per frame is based on the bits per sample or the block align. We prioritize floor(bitsPerSample*channels/8), but if
+    this is zero or the bits per frame is not a multiple of 8 we need to fall back to the block align.
+    */
+    drwav_uint32 bitsPerFrame = pWav->bitsPerSample * pWav->fmt.channels;
+    drwav_uint32 bytesPerFrame = bitsPerFrame >> 3;
+    if (bytesPerFrame == 0 || (bitsPerFrame & 0x7) != 0) {
+        bytesPerFrame = pWav->fmt.blockAlign;
+    }
+
+    return bytesPerFrame;
+}
+
+
 drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData)
 {
+    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);
+}
+
+drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags)
+{
+    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
+    drwav_bool32 sequential;
+    unsigned char riff[4];
+    drwav_fmt fmt;
+    unsigned short translatedFormatTag;
+    drwav_uint64 sampleCountFromFactChunk;
+    drwav_bool32 foundDataChunk;
+    drwav_uint64 dataChunkSize;
+    drwav_uint64 chunkSize;
+
     if (onRead == NULL || onSeek == NULL) {
         return DRWAV_FALSE;
     }
 
+    cursor = 0;
+    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+
     drwav_zero_memory(pWav, sizeof(*pWav));
+    pWav->onRead    = onRead;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pReadSeekUserData;
 
-
-    // The first 4 bytes should be the RIFF identifier.
-    unsigned char riff[4];
-    if (onRead(pUserData, riff, sizeof(riff)) != sizeof(riff)) {
-        return DRWAV_FALSE;    // Failed to read data.
+    /* The first 4 bytes should be the RIFF identifier. */
+    if (drwav__on_read(onRead, pReadSeekUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
+        return DRWAV_FALSE;
     }
 
-    // The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
-    // w64 it will start with "riff".
+    /*
+    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    w64 it will start with "riff".
+    */
     if (drwav__fourcc_equal(riff, "RIFF")) {
         pWav->container = drwav_container_riff;
     } else if (drwav__fourcc_equal(riff, "riff")) {
+        int i;
+        drwav_uint8 riff2[12];
+
         pWav->container = drwav_container_w64;
 
-        // Check the rest of the GUID for validity.
-        drwav_uint8 riff2[12];
-        if (onRead(pUserData, riff2, sizeof(riff2)) != sizeof(riff2)) {
+        /* Check the rest of the GUID for validity. */
+        if (drwav__on_read(onRead, pReadSeekUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
             return DRWAV_FALSE;
         }
 
-        for (int i = 0; i < 12; ++i) {
+        for (i = 0; i < 12; ++i) {
             if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
                 return DRWAV_FALSE;
             }
         }
     } else {
-        return DRWAV_FALSE;   // Unknown or unsupported container.
+        return DRWAV_FALSE;   /* Unknown or unsupported container. */
     }
 
 
     if (pWav->container == drwav_container_riff) {
-        // RIFF/WAVE
         unsigned char chunkSizeBytes[4];
-        if (onRead(pUserData, chunkSizeBytes, sizeof(chunkSizeBytes)) != sizeof(chunkSizeBytes)) {
+        unsigned char wave[4];
+
+        /* RIFF/WAVE */
+        if (drwav__on_read(onRead, pReadSeekUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
             return DRWAV_FALSE;
         }
 
-        unsigned int chunkSize = drwav__bytes_to_u32(chunkSizeBytes);
-        if (chunkSize < 36) {
-            return DRWAV_FALSE;    // Chunk size should always be at least 36 bytes.
+        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
+            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
         }
 
-        unsigned char wave[4];
-        if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) {
+        if (drwav__on_read(onRead, pReadSeekUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
             return DRWAV_FALSE;
         }
 
         if (!drwav__fourcc_equal(wave, "WAVE")) {
-            return DRWAV_FALSE;    // Expecting "WAVE".
+            return DRWAV_FALSE;    /* Expecting "WAVE". */
         }
-
-        pWav->dataChunkDataPos = 4 + sizeof(chunkSizeBytes) + sizeof(wave);
     } else {
-        // W64
-        unsigned char chunkSize[8];
-        if (onRead(pUserData, chunkSize, sizeof(chunkSize)) != sizeof(chunkSize)) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__bytes_to_u64(chunkSize) < 80) {
-            return DRWAV_FALSE;
-        }
-
+        unsigned char chunkSizeBytes[8];
         drwav_uint8 wave[16];
-        if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) {
+
+        /* W64 */
+        if (drwav__on_read(onRead, pReadSeekUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(onRead, pReadSeekUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
             return DRWAV_FALSE;
         }
 
         if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
             return DRWAV_FALSE;
         }
-
-        pWav->dataChunkDataPos = 16 + sizeof(chunkSize) + sizeof(wave);
     }
 
 
-    // The next bytes should be the "fmt " chunk.
-    drwav_fmt fmt;
-    if (!drwav__read_fmt(onRead, onSeek, pUserData, pWav->container, &pWav->dataChunkDataPos, &fmt)) {
-        return DRWAV_FALSE;    // Failed to read the "fmt " chunk.
+    /* The next bytes should be the "fmt " chunk. */
+    if (!drwav__read_fmt(onRead, onSeek, pReadSeekUserData, pWav->container, &cursor, &fmt)) {
+        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
     }
 
-    // Basic validation.
+    /* Basic validation. */
     if (fmt.sampleRate == 0 || fmt.channels == 0 || fmt.bitsPerSample == 0 || fmt.blockAlign == 0) {
-        return DRWAV_FALSE; // Invalid channel count. Probably an invalid WAV file.
+        return DRWAV_FALSE; /* Invalid channel count. Probably an invalid WAV file. */
     }
 
 
-    // Translate the internal format.
-    unsigned short translatedFormatTag = fmt.formatTag;
+    /* Translate the internal format. */
+    translatedFormatTag = fmt.formatTag;
     if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
         translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
     }
 
 
-    drwav_uint64 sampleCountFromFactChunk = 0;
 
-    // The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop.
-    drwav_uint64 dataSize;
+    sampleCountFromFactChunk = 0;
+
+    /*
+    We need to enumerate over each chunk for two reasons:
+      1) The "data" chunk may not be the next one
+      2) We may want to report each chunk back to the client
+    
+    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
+    */
+    foundDataChunk = DRWAV_FALSE;
+    dataChunkSize = 0;
+
+    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
+    chunkSize = 0;
     for (;;)
     {
-        drwav__chunk_header header;
-        if (!drwav__read_chunk_header(onRead, pUserData, pWav->container, &pWav->dataChunkDataPos, &header)) {
-            return DRWAV_FALSE;
+        drwav_chunk_header header;
+        drwav_result result = drwav__read_chunk_header(onRead, pReadSeekUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            if (!foundDataChunk) {
+                return DRWAV_FALSE;
+            } else {
+                break;  /* Probably at the end of the file. Get out of the loop. */
+            }
         }
 
-        dataSize = header.sizeInBytes;
+        /* Tell the client about this chunk. */
+        if (!sequential && onChunk != NULL) {
+            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, onRead, onSeek, pReadSeekUserData, &header);
+
+            /*
+            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
+            we called the callback.
+            */
+            if (callbackBytesRead > 0) {
+                if (!drwav__seek_from_start(onSeek, cursor, pReadSeekUserData)) {
+                    return DRWAV_FALSE;
+                }
+            }
+        }
+        
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
+
+        chunkSize = header.sizeInBytes;
         if (pWav->container == drwav_container_riff) {
             if (drwav__fourcc_equal(header.id.fourcc, "data")) {
-                break;
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
             }
         } else {
             if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
-                break;
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
             }
         }
 
-        // Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats.
+        /*
+        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
+        this is that we would otherwise require a backwards seek which sequential mode forbids.
+        */
+        if (foundDataChunk && sequential) {
+            break;
+        }
+
+        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
         if (pWav->container == drwav_container_riff) {
             if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
                 drwav_uint32 sampleCount;
-                if (onRead(pUserData, &sampleCount, 4) != 4) {
+                if (drwav__on_read(onRead, pReadSeekUserData, &sampleCount, 4, &cursor) != 4) {
                     return DRWAV_FALSE;
                 }
-                pWav->dataChunkDataPos += 4;
-                dataSize -= 4;
+                chunkSize -= 4;
 
-                // The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
-                // for Microsoft ADPCM formats.
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
+
+                /*
+                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
+                for Microsoft ADPCM formats.
+                */
                 if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
                     sampleCountFromFactChunk = sampleCount;
                 } else {
@@ -1506,62 +1893,120 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS
             }
         } else {
             if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
-                if (onRead(pUserData, &sampleCountFromFactChunk, 8) != 8) {
+                if (drwav__on_read(onRead, pReadSeekUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
                     return DRWAV_FALSE;
                 }
-                pWav->dataChunkDataPos += 8;
-                dataSize -= 8;
+                chunkSize -= 8;
+
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
             }
         }
 
-        // If we get here it means we didn't find the "data" chunk. Seek past it.
+        /* "smpl" chunk. */
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
+                unsigned char smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
+                if (chunkSize >= sizeof(smplHeaderData)) {
+                    drwav_uint64 bytesJustRead = drwav__on_read(onRead, pReadSeekUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
+                    chunkSize -= bytesJustRead;
 
-        // Make sure we seek past the padding.
-        dataSize += header.paddingSize;
-        drwav__seek_forward(onSeek, dataSize, pUserData);
-        pWav->dataChunkDataPos += dataSize;
+                    if (bytesJustRead == sizeof(smplHeaderData)) {
+                        drwav_uint32 iLoop;
+
+                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
+                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
+                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
+                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
+                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
+                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
+                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
+                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
+                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
+
+                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
+                            unsigned char smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
+                            bytesJustRead = drwav__on_read(onRead, pReadSeekUserData, smplLoopData, sizeof(smplLoopData), &cursor);
+                            chunkSize -= bytesJustRead;
+
+                            if (bytesJustRead == sizeof(smplLoopData)) {
+                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
+                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
+                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
+                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
+                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
+                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
+                            } else {
+                                break;  /* Break from the smpl loop for loop. */
+                            }
+                        }
+                    }
+                } else {
+                    /* Looks like invalid data. Ignore the chunk. */
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
+                /*
+                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
+                is welcome to add support for this.
+                */
+            }
+        }
+
+        /* Make sure we seek past the padding. */
+        chunkSize += header.paddingSize;
+        if (!drwav__seek_forward(onSeek, chunkSize, pReadSeekUserData)) {
+            break;
+        }
+        cursor += chunkSize;
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
     }
 
-    // At this point we should be sitting on the first byte of the raw audio data.
+    /* If we haven't found a data chunk, return an error. */
+    if (!foundDataChunk) {
+        return DRWAV_FALSE;
+    }
+
+    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
+    if (!sequential) {
+        if (!drwav__seek_from_start(onSeek, pWav->dataChunkDataPos, pReadSeekUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor = pWav->dataChunkDataPos;
+    }
+    
+
+    /* At this point we should be sitting on the first byte of the raw audio data. */
 
-    pWav->onRead              = onRead;
-    pWav->onSeek              = onSeek;
-    pWav->pUserData           = pUserData;
     pWav->fmt                 = fmt;
     pWav->sampleRate          = fmt.sampleRate;
     pWav->channels            = fmt.channels;
     pWav->bitsPerSample       = fmt.bitsPerSample;
-    pWav->bytesPerSample      = fmt.blockAlign / fmt.channels;
-    pWav->bytesRemaining      = dataSize;
+    pWav->bytesRemaining      = dataChunkSize;
     pWav->translatedFormatTag = translatedFormatTag;
-    pWav->dataChunkDataSize   = dataSize;
-
-    // The bytes per sample should never be 0 at this point. This would indicate an invalid WAV file.
-    if (pWav->bytesPerSample == 0) {
-        return DRWAV_FALSE;
-    }
+    pWav->dataChunkDataSize   = dataChunkSize;
 
     if (sampleCountFromFactChunk != 0) {
-        pWav->totalSampleCount = sampleCountFromFactChunk * fmt.channels;
+        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
     } else {
-        pWav->totalSampleCount = dataSize / pWav->bytesPerSample;
+        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
 
         if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-            drwav_uint64 blockCount = dataSize / fmt.blockAlign;
-            pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2;  // x2 because two samples per byte.
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+            pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
         }
         if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-            drwav_uint64 blockCount = dataSize / fmt.blockAlign;
-            pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels);
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+            pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
         }
     }
 
-    // The way we calculate the bytes per sample does not make sense for compressed formats so we just set it to 0.
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        pWav->bytesPerSample = 0;
-    }
-
-    // Some formats only support a certain number of channels.
+    /* Some formats only support a certain number of channels. */
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
         if (pWav->channels > 2) {
             return DRWAV_FALSE;
@@ -1569,22 +2014,26 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS
     }
 
 #ifdef DR_WAV_LIBSNDFILE_COMPAT
-    // I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
-    // it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
-    // from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
-    // way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
-    // always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
-    // correctness tests against libsndfile, and is disabled by default.
+    /*
+    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
+    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
+    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
+    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
+    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
+    correctness tests against libsndfile, and is disabled by default.
+    */
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        drwav_uint64 blockCount = dataSize / fmt.blockAlign;
-        pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2;  // x2 because two samples per byte.
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
     }
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        drwav_uint64 blockCount = dataSize / fmt.blockAlign;
-        pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels);
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
     }
 #endif
 
+    pWav->totalSampleCount = pWav->totalPCMFrameCount * pWav->channels;
+
     return DRWAV_TRUE;
 }
 
@@ -1609,17 +2058,21 @@ drwav_uint32 drwav_data_chunk_size_riff(drwav_uint64 dataChunkSize)
 
 drwav_uint64 drwav_riff_chunk_size_w64(drwav_uint64 dataChunkSize)
 {
-    return 80 + 24 + dataChunkSize;   // +24 because W64 includes the size of the GUID and size fields.
+    return 80 + 24 + dataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
 }
 
 drwav_uint64 drwav_data_chunk_size_w64(drwav_uint64 dataChunkSize)
 {
-    return 24 + dataChunkSize;        // +24 because W64 includes the size of the GUID and size fields.
+    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
 }
 
 
 drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData)
 {
+    size_t runningPos = 0;
+    drwav_uint64 initialDataChunkSize = 0;
+    drwav_uint64 chunkSizeFMT;
+
     if (pWav == NULL) {
         return DRWAV_FALSE;
     }
@@ -1629,11 +2082,11 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
     }
 
     if (!isSequential && onSeek == NULL) {
-        return DRWAV_FALSE; // <-- onSeek is required when in non-sequential mode.
+        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
     }
 
 
-    // Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this.
+    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
     if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
         return DRWAV_FALSE;
     }
@@ -1655,21 +2108,21 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
     pWav->fmt.extendedSize = 0;
     pWav->isSequentialWrite = isSequential;
 
-
-    size_t runningPos = 0;
-
-    // The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
-    // sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
-    // sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
-    drwav_uint64 initialDataChunkSize = 0;
+    /*
+    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
+    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
+    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
+    */
     if (isSequential) {
         initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
 
-        // The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
-        // so for the sake of simplicity I'm not doing any validation for that.
+        /*
+        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
+        so for the sake of simplicity I'm not doing any validation for that.
+        */
         if (pFormat->container == drwav_container_riff) {
             if (initialDataChunkSize > (0xFFFFFFFF - 36)) {
-                return DRWAV_FALSE; // Not enough room to store every sample.
+                return DRWAV_FALSE; /* Not enough room to store every sample. */
             }
         }
     }
@@ -1677,21 +2130,20 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
     pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
 
 
-    // "RIFF" chunk.
+    /* "RIFF" chunk. */
     if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   // +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk]
+        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
         runningPos += pWav->onWrite(pUserData, "RIFF", 4);
         runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 4);
         runningPos += pWav->onWrite(pUserData, "WAVE", 4);
     } else {
-        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   // +24 because W64 includes the size of the GUID and size fields.
+        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
         runningPos += pWav->onWrite(pUserData, drwavGUID_W64_RIFF, 16);
         runningPos += pWav->onWrite(pUserData, &chunkSizeRIFF, 8);
         runningPos += pWav->onWrite(pUserData, drwavGUID_W64_WAVE, 16);
     }
 
-    // "fmt " chunk.
-    drwav_uint64 chunkSizeFMT;
+    /* "fmt " chunk. */
     if (pFormat->container == drwav_container_riff) {
         chunkSizeFMT = 16;
         runningPos += pWav->onWrite(pUserData, "fmt ", 4);
@@ -1711,19 +2163,19 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
 
     pWav->dataChunkDataPos = runningPos;
 
-    // "data" chunk.
+    /* "data" chunk. */
     if (pFormat->container == drwav_container_riff) {
         drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
         runningPos += pWav->onWrite(pUserData, "data", 4);
         runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 4);
     } else {
-        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; // +24 because W64 includes the size of the GUID and size fields.
+        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
         runningPos += pWav->onWrite(pUserData, drwavGUID_W64_DATA, 16);
         runningPos += pWav->onWrite(pUserData, &chunkSizeDATA, 8);
     }
 
 
-    // Simple validation.
+    /* Simple validation. */
     if (pFormat->container == drwav_container_riff) {
         if (runningPos != 20 + chunkSizeFMT + 8) {
             return DRWAV_FALSE;
@@ -1736,12 +2188,11 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
     
 
 
-    // Set some properties for the client's convenience.
+    /* Set some properties for the client's convenience. */
     pWav->container = pFormat->container;
     pWav->channels = (drwav_uint16)pFormat->channels;
     pWav->sampleRate = pFormat->sampleRate;
     pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
-    pWav->bytesPerSample = (drwav_uint16)(pFormat->bitsPerSample >> 3);
     pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
 
     return DRWAV_TRUE;
@@ -1750,12 +2201,12 @@ drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pF
 
 drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData)
 {
-    return drwav_init_write__internal(pWav, pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData);               // DRWAV_FALSE = Not Sequential
+    return drwav_init_write__internal(pWav, pFormat, 0, DRWAV_FALSE, onWrite, onSeek, pUserData);               /* DRWAV_FALSE = Not Sequential */
 }
 
 drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData)
 {
-    return drwav_init_write__internal(pWav, pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData);   // DRWAV_TRUE = Sequential
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount, DRWAV_TRUE, onWrite, NULL, pUserData);   /* DRWAV_TRUE = Sequential */
 }
 
 void drwav_uninit(drwav* pWav)
@@ -1764,17 +2215,20 @@ void drwav_uninit(drwav* pWav)
         return;
     }
 
-    // If the drwav object was opened in write mode we'll need to finalize a few things:
-    //   - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
-    //   - Set the size of the "data" chunk.
+    /*
+    If the drwav object was opened in write mode we'll need to finalize a few things:
+      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
+      - Set the size of the "data" chunk.
+    */
     if (pWav->onWrite != NULL) {
-        // Validation for sequential mode.
+        drwav_uint32 paddingSize = 0;
+
+        /* Validation for sequential mode. */
         if (pWav->isSequentialWrite) {
             drwav_assert(pWav->dataChunkDataSize == pWav->dataChunkDataSizeTargetWrite);
         }
 
-        // Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding.
-        drwav_uint32 paddingSize = 0;
+        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
         if (pWav->container == drwav_container_riff) {
             paddingSize = (drwav_uint32)(pWav->dataChunkDataSize % 2);
         } else {
@@ -1786,30 +2240,31 @@ void drwav_uninit(drwav* pWav)
             pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
         }
 
-
-        // Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
-        // to do this when using non-sequential mode.
+        /*
+        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
+        to do this when using non-sequential mode.
+        */
         if (pWav->onSeek && !pWav->isSequentialWrite) {
             if (pWav->container == drwav_container_riff) {
-                // The "RIFF" chunk size.
+                /* The "RIFF" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
                     drwav_uint32 riffChunkSize = drwav_riff_chunk_size_riff(pWav->dataChunkDataSize);
                     pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
                 }
 
-                // the "data" chunk size.
+                /* the "data" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
                     drwav_uint32 dataChunkSize = drwav_data_chunk_size_riff(pWav->dataChunkDataSize);
                     pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
                 }
             } else {
-                // The "RIFF" chunk size.
+                /* The "RIFF" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
                     drwav_uint64 riffChunkSize = drwav_riff_chunk_size_w64(pWav->dataChunkDataSize);
                     pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
                 }
 
-                // The "data" chunk size.
+                /* The "data" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
                     drwav_uint64 dataChunkSize = drwav_data_chunk_size_w64(pWav->dataChunkDataSize);
                     pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
@@ -1819,8 +2274,10 @@ void drwav_uninit(drwav* pWav)
     }
 
 #ifndef DR_WAV_NO_STDIO
-    // If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
-    // was used by looking at the onRead and onSeek callbacks.
+    /*
+    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
+    was used by looking at the onRead and onSeek callbacks.
+    */
     if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
         fclose((FILE*)pWav->pUserData);
     }
@@ -1829,13 +2286,18 @@ void drwav_uninit(drwav* pWav)
 
 
 drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData)
+{
+    return drwav_open_ex(onRead, onSeek, NULL, pUserData, NULL, 0);
+}
+
+drwav* drwav_open_ex(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags)
 {
     drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav));
     if (pWav == NULL) {
         return NULL;
     }
 
-    if (!drwav_init(pWav, onRead, onSeek, pUserData)) {
+    if (!drwav_init_ex(pWav, onRead, onSeek, onChunk, pReadSeekUserData, pChunkUserData, flags)) {
         DRWAV_FREE(pWav);
         return NULL;
     }
@@ -1878,6 +2340,8 @@ void drwav_close(drwav* pWav)
 
 size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
 {
+    size_t bytesRead;
+
     if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
         return 0;
     }
@@ -1886,7 +2350,7 @@ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
         bytesToRead = (size_t)pWav->bytesRemaining;
     }
 
-    size_t bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
 
     pWav->bytesRemaining -= bytesRead;
     return bytesRead;
@@ -1894,28 +2358,64 @@ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
 
 drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut)
 {
+    drwav_uint32 bytesPerSample;
+    size_t bytesRead;
+
     if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) {
         return 0;
     }
 
-    // Cannot use this function for compressed formats.
+    /* Cannot use this function for compressed formats. */
     if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
         return 0;
     }
 
-    // Don't try to read more samples than can potentially fit in the output buffer.
-    if (samplesToRead * pWav->bytesPerSample > SIZE_MAX) {
-        samplesToRead = SIZE_MAX / pWav->bytesPerSample;
+    bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
+        return 0;
     }
 
-    size_t bytesRead = drwav_read_raw(pWav, (size_t)(samplesToRead * pWav->bytesPerSample), pBufferOut);
-    return bytesRead / pWav->bytesPerSample;
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (samplesToRead * bytesPerSample > DRWAV_SIZE_MAX) {
+        samplesToRead = DRWAV_SIZE_MAX / bytesPerSample;
+    }
+
+    bytesRead = drwav_read_raw(pWav, (size_t)(samplesToRead * bytesPerSample), pBufferOut);
+    return bytesRead / bytesPerSample;
 }
 
-drwav_bool32 drwav_seek_to_first_sample(drwav* pWav)
+drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+    size_t bytesRead;
+
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Cannot use this function for compressed formats. */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        return 0;
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
+    }
+
+    bytesRead = drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut);
+    return bytesRead / bytesPerFrame;
+}
+
+drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
 {
     if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE; // No seeking in write mode.
+        return DRWAV_FALSE; /* No seeking in write mode. */
     }
 
     if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
@@ -1932,36 +2432,39 @@ drwav_bool32 drwav_seek_to_first_sample(drwav* pWav)
 
 drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample)
 {
-    // Seeking should be compatible with wave files > 2GB.
+    /* Seeking should be compatible with wave files > 2GB. */
 
     if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE; // No seeking in write mode.
+        return DRWAV_FALSE; /* No seeking in write mode. */
     }
 
     if (pWav == NULL || pWav->onSeek == NULL) {
         return DRWAV_FALSE;
     }
 
-    // If there are no samples, just return DRWAV_TRUE without doing anything.
+    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
     if (pWav->totalSampleCount == 0) {
         return DRWAV_TRUE;
     }
 
-    // Make sure the sample is clamped.
+    /* Make sure the sample is clamped. */
     if (sample >= pWav->totalSampleCount) {
         sample  = pWav->totalSampleCount - 1;
     }
 
-
-    // For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
-    // to seek back to the start.
+    /*
+    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
+    to seek back to the start.
+    */
     if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        // TODO: This can be optimized.
+        /* TODO: This can be optimized. */
         
-        // If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
-        // we first need to seek back to the start and then just do the same thing as a forward seek.
+        /*
+        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
+        we first need to seek back to the start and then just do the same thing as a forward seek.
+        */
         if (sample < pWav->compressed.iCurrentSample) {
-            if (!drwav_seek_to_first_sample(pWav)) {
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
                 return DRWAV_FALSE;
             }
         }
@@ -1971,18 +2474,18 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample)
 
             drwav_int16 devnull[2048];
             while (offset > 0) {
+                drwav_uint64 samplesRead = 0;
                 drwav_uint64 samplesToRead = offset;
                 if (samplesToRead > 2048) {
                     samplesToRead = 2048;
                 }
 
-                drwav_uint64 samplesRead = 0;
                 if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
                     samplesRead = drwav_read_s16__msadpcm(pWav, samplesToRead, devnull);
                 } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
                     samplesRead = drwav_read_s16__ima(pWav, samplesToRead, devnull);
                 } else {
-                    assert(DRWAV_FALSE);    // If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here.
+                    assert(DRWAV_FALSE);    /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
                 }
 
                 if (samplesRead != samplesToRead) {
@@ -1993,19 +2496,23 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample)
             }
         }
     } else {
-        drwav_uint64 totalSizeInBytes = pWav->totalSampleCount * pWav->bytesPerSample;
+        drwav_uint64 totalSizeInBytes;
+        drwav_uint64 currentBytePos;
+        drwav_uint64 targetBytePos;
+        drwav_uint64 offset;
+
+        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
         drwav_assert(totalSizeInBytes >= pWav->bytesRemaining);
 
-        drwav_uint64 currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
-        drwav_uint64 targetBytePos  = sample * pWav->bytesPerSample;
+        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
+        targetBytePos  = sample * drwav_get_bytes_per_sample(pWav);
 
-        drwav_uint64 offset;
         if (currentBytePos < targetBytePos) {
-            // Offset forwards.
+            /* Offset forwards. */
             offset = (targetBytePos - currentBytePos);
         } else {
-            // Offset backwards.
-            if (!drwav_seek_to_first_sample(pWav)) {
+            /* Offset backwards. */
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
                 return DRWAV_FALSE;
             }
             offset = targetBytePos;
@@ -2025,14 +2532,21 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample)
     return DRWAV_TRUE;
 }
 
+drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
+{
+    return drwav_seek_to_sample(pWav, targetFrameIndex * pWav->channels);
+}
+
 
 size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
 {
+    size_t bytesWritten;
+
     if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
         return 0;
     }
 
-    size_t bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
+    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
     pWav->dataChunkDataSize += bytesWritten;
 
     return bytesWritten;
@@ -2040,24 +2554,29 @@ size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
 
 drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData)
 {
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    const drwav_uint8* pRunningData;
+
     if (pWav == NULL || samplesToWrite == 0 || pData == NULL) {
         return 0;
     }
 
-    drwav_uint64 bytesToWrite = ((samplesToWrite * pWav->bitsPerSample) / 8);
-    if (bytesToWrite > SIZE_MAX) {
+    bytesToWrite = ((samplesToWrite * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
         return 0;
     }
 
-    drwav_uint64 bytesWritten = 0;
-    const drwav_uint8* pRunningData = (const drwav_uint8*)pData;
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
     while (bytesToWrite > 0) {
+        size_t bytesJustWritten;
         drwav_uint64 bytesToWriteThisIteration = bytesToWrite;
-        if (bytesToWriteThisIteration > SIZE_MAX) {
-            bytesToWriteThisIteration = SIZE_MAX;
+        if (bytesToWriteThisIteration > DRWAV_SIZE_MAX) {
+            bytesToWriteThisIteration = DRWAV_SIZE_MAX;
         }
 
-        size_t bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
         if (bytesJustWritten == 0) {
             break;
         }
@@ -2070,23 +2589,28 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p
     return (bytesWritten * 8) / pWav->bitsPerSample;
 }
 
+drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    return drwav_write(pWav, framesToWrite * pWav->channels, pData) / pWav->channels;
+}
+
 
 
 drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
+    drwav_uint64 totalSamplesRead = 0;
+
     drwav_assert(pWav != NULL);
     drwav_assert(samplesToRead > 0);
     drwav_assert(pBufferOut != NULL);
 
-    // TODO: Lots of room for optimization here.
-
-    drwav_uint64 totalSamplesRead = 0;
+    /* TODO: Lots of room for optimization here. */
 
     while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
-        // If there are no cached samples we need to load a new block.
+        /* If there are no cached samples we need to load a new block. */
         if (pWav->msadpcm.cachedSampleCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
             if (pWav->channels == 1) {
-                // Mono.
+                /* Mono. */
                 drwav_uint8 header[7];
                 if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
                     return totalSamplesRead;
@@ -2101,7 +2625,7 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
                 pWav->msadpcm.cachedSamples[3] = pWav->msadpcm.prevSamples[0][1];
                 pWav->msadpcm.cachedSampleCount = 2;
             } else {
-                // Stereo.
+                /* Stereo. */
                 drwav_uint8 header[14];
                 if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
                     return totalSamplesRead;
@@ -2125,7 +2649,7 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
             }
         }
 
-        // Output anything that's cached.
+        /* Output anything that's cached. */
         while (samplesToRead > 0 && pWav->msadpcm.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
             pBufferOut[0] = (drwav_int16)pWav->msadpcm.cachedSamples[drwav_countof(pWav->msadpcm.cachedSamples) - pWav->msadpcm.cachedSampleCount];
             pWav->msadpcm.cachedSampleCount -= 1;
@@ -2141,22 +2665,14 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
         }
 
 
-        // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        // loop iteration which will trigger the loading of a new block.
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
         if (pWav->msadpcm.cachedSampleCount == 0) {
             if (pWav->msadpcm.bytesRemainingInBlock == 0) {
                 continue;
             } else {
-                drwav_uint8 nibbles;
-                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
-                    return totalSamplesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock -= 1;
-
-                // TODO: Optimize away these if statements.
-                drwav_int32 nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
-                drwav_int32 nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
-
                 static drwav_int32 adaptationTable[] = { 
                     230, 230, 230, 230, 307, 409, 512, 614, 
                     768, 614, 512, 409, 307, 230, 230, 230 
@@ -2164,9 +2680,24 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
                 static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
                 static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
 
+                drwav_uint8 nibbles;
+                drwav_int32 nibble0;
+                drwav_int32 nibble1;
+
+                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
+                    return totalSamplesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock -= 1;
+
+                /* TODO: Optimize away these if statements. */
+                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
+                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
+
                 if (pWav->channels == 1) {
-                    // Mono.
+                    /* Mono. */
                     drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
                     newSample0  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
                     newSample0 += nibble0 * pWav->msadpcm.delta[0];
                     newSample0  = drwav_clamp(newSample0, -32768, 32767);
@@ -2180,7 +2711,6 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
                     pWav->msadpcm.prevSamples[0][1] = newSample0;
 
 
-                    drwav_int32 newSample1;
                     newSample1  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
                     newSample1 += nibble1 * pWav->msadpcm.delta[0];
                     newSample1  = drwav_clamp(newSample1, -32768, 32767);
@@ -2198,10 +2728,11 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
                     pWav->msadpcm.cachedSamples[3] = newSample1;
                     pWav->msadpcm.cachedSampleCount = 2;
                 } else {
-                    // Stereo.
-
-                    // Left.
+                    /* Stereo. */
                     drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    /* Left. */
                     newSample0  = ((pWav->msadpcm.prevSamples[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevSamples[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
                     newSample0 += nibble0 * pWav->msadpcm.delta[0];
                     newSample0  = drwav_clamp(newSample0, -32768, 32767);
@@ -2215,8 +2746,7 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
                     pWav->msadpcm.prevSamples[0][1] = newSample0;
 
 
-                    // Right.
-                    drwav_int32 newSample1;
+                    /* Right. */
                     newSample1  = ((pWav->msadpcm.prevSamples[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevSamples[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
                     newSample1 += nibble1 * pWav->msadpcm.delta[1];
                     newSample1  = drwav_clamp(newSample1, -32768, 32767);
@@ -2242,19 +2772,19 @@ drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
 
 drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
+    drwav_uint64 totalSamplesRead = 0;
+
     drwav_assert(pWav != NULL);
     drwav_assert(samplesToRead > 0);
     drwav_assert(pBufferOut != NULL);
 
-    // TODO: Lots of room for optimization here.
-
-    drwav_uint64 totalSamplesRead = 0;
+    /* TODO: Lots of room for optimization here. */
 
     while (samplesToRead > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
-        // If there are no cached samples we need to load a new block.
+        /* If there are no cached samples we need to load a new block. */
         if (pWav->ima.cachedSampleCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
             if (pWav->channels == 1) {
-                // Mono.
+                /* Mono. */
                 drwav_uint8 header[4];
                 if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
                     return totalSamplesRead;
@@ -2266,7 +2796,7 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
                 pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - 1] = pWav->ima.predictor[0];
                 pWav->ima.cachedSampleCount = 1;
             } else {
-                // Stereo.
+                /* Stereo. */
                 drwav_uint8 header[8];
                 if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
                     return totalSamplesRead;
@@ -2284,7 +2814,7 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
             }
         }
 
-        // Output anything that's cached.
+        /* Output anything that's cached. */
         while (samplesToRead > 0 && pWav->ima.cachedSampleCount > 0 && pWav->compressed.iCurrentSample < pWav->totalSampleCount) {
             pBufferOut[0] = (drwav_int16)pWav->ima.cachedSamples[drwav_countof(pWav->ima.cachedSamples) - pWav->ima.cachedSampleCount];
             pWav->ima.cachedSampleCount -= 1;
@@ -2299,8 +2829,10 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
             return totalSamplesRead;
         }
 
-        // If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        // loop iteration which will trigger the loading of a new block.
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
         if (pWav->ima.cachedSampleCount == 0) {
             if (pWav->ima.bytesRemainingInBlock == 0) {
                 continue;
@@ -2322,17 +2854,22 @@ drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
                     15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
                 };
 
-                // From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
-                // left channel, 4 bytes for the right channel.
+                drwav_uint32 iChannel;
+
+                /*
+                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
+                left channel, 4 bytes for the right channel.
+                */
                 pWav->ima.cachedSampleCount = 8 * pWav->channels;
-                for (drwav_uint32 iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                    drwav_uint32 iByte;
                     drwav_uint8 nibbles[4];
                     if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
                         return totalSamplesRead;
                     }
                     pWav->ima.bytesRemainingInBlock -= 4;
 
-                    for (drwav_uint32 iByte = 0; iByte < 4; ++iByte) {
+                    for (iByte = 0; iByte < 4; ++iByte) {
                         drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
                         drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
 
@@ -2425,19 +2962,21 @@ static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
 
 
 
-static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
-    // Special case for 8-bit sample data because it's treated as unsigned.
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
     if (bytesPerSample == 1) {
         drwav_u8_to_s16(pOut, pIn, totalSampleCount);
         return;
     }
 
 
-    // Slightly more optimal implementation for common formats.
+    /* Slightly more optimal implementation for common formats. */
     if (bytesPerSample == 2) {
-        for (unsigned int i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((drwav_int16*)pIn)[i];
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int16*)pIn)[i];
         }
         return;
     }
@@ -2451,15 +2990,15 @@ static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_
     }
 
 
-    // Anything more than 64 bits per sample is not supported.
+    /* Anything more than 64 bits per sample is not supported. */
     if (bytesPerSample > 8) {
         drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut));
         return;
     }
 
 
-    // Generic, slow converter.
-    for (unsigned int i = 0; i < totalSampleCount; ++i) {
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
         drwav_uint64 sample = 0;
         unsigned int shift  = (8 - bytesPerSample) * 8;
 
@@ -2474,16 +3013,16 @@ static void drwav__pcm_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_
     }
 }
 
-static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
-        drwav_f32_to_s16(pOut, (float*)pIn, totalSampleCount);
+        drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount);
         return;
     } else if (bytesPerSample == 8) {
-        drwav_f64_to_s16(pOut, (double*)pIn, totalSampleCount);
+        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
         return;
     } else {
-        // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float.
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
         drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut));
         return;
     }
@@ -2491,20 +3030,29 @@ static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size
 
 drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
-    // Fast path.
-    if (pWav->bytesPerSample == 2) {
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
         return drwav_read(pWav, samplesToRead, pBufferOut);
     }
+    
+    bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
+        return 0;
+    }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+    
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -2516,15 +3064,23 @@ drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_
 
 drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
-    drwav_uint64 totalSamplesRead = 0;
+    drwav_uint64 totalSamplesRead;
     unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
+        return 0;
+    }
+
+    totalSamplesRead = 0;
+    
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -2536,10 +3092,18 @@ drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav
 
 drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
-    drwav_uint64 totalSamplesRead = 0;
+    drwav_uint64 totalSamplesRead;
     unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
+        return 0;
+    }
+
+    totalSamplesRead = 0;
+    
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -2556,10 +3120,18 @@ drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav
 
 drwav_uint64 drwav_read_s16__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut)
 {
-    drwav_uint64 totalSamplesRead = 0;
+    drwav_uint64 totalSamplesRead;
     unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
+        return 0;
+    }
+
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -2580,9 +3152,9 @@ drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16
         return 0;
     }
 
-    // Don't try to read more samples than can potentially fit in the output buffer.
-    if (samplesToRead * sizeof(drwav_int16) > SIZE_MAX) {
-        samplesToRead = SIZE_MAX / sizeof(drwav_int16);
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (samplesToRead * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
+        samplesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16);
     }
 
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
@@ -2612,10 +3184,16 @@ drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16
     return 0;
 }
 
+drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    return drwav_read_s16(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels;
+}
+
 void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
     int r;
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         int x = pIn[i];
         r = x - 128;
         r = r << 8;
@@ -2626,8 +3204,9 @@ void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCou
 void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
     int r;
-    for (size_t i = 0; i < sampleCount; ++i) {
-        int x = ((int)(((unsigned int)(((unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = ((int)(((unsigned int)(((const unsigned char*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const unsigned char*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const unsigned char*)pIn)[i*3+2])) << 24)) >> 8;
         r = x >> 8;
         pOut[i] = (short)r;
     }
@@ -2636,7 +3215,8 @@ void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCo
 void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
 {
     int r;
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         int x = pIn[i];
         r = x >> 16;
         pOut[i] = (short)r;
@@ -2646,7 +3226,8 @@ void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCo
 void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
 {
     int r;
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         float x = pIn[i];
         float c;
         c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
@@ -2660,7 +3241,8 @@ void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
 void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
 {
     int r;
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         double x = pIn[i];
         double c;
         c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
@@ -2673,29 +3255,33 @@ void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
 
 void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         pOut[i] = drwav__alaw_to_s16(pIn[i]);
     }
 }
 
 void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
-    for (size_t i = 0; i < sampleCount; ++i) {
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
         pOut[i] = drwav__mulaw_to_s16(pIn[i]);
     }
 }
 
 
 
-static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample)
+static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned int bytesPerSample)
 {
-    // Special case for 8-bit sample data because it's treated as unsigned.
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
     if (bytesPerSample == 1) {
         drwav_u8_to_f32(pOut, pIn, sampleCount);
         return;
     }
 
-    // Slightly more optimal implementation for common formats.
+    /* Slightly more optimal implementation for common formats. */
     if (bytesPerSample == 2) {
         drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
         return;
@@ -2710,15 +3296,15 @@ static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t samp
     }
 
 
-    // Anything more than 64 bits per sample is not supported.
+    /* Anything more than 64 bits per sample is not supported. */
     if (bytesPerSample > 8) {
         drwav_zero_memory(pOut, sampleCount * sizeof(*pOut));
         return;
     }
 
 
-    // Generic, slow converter.
-    for (unsigned int i = 0; i < sampleCount; ++i) {
+    /* Generic, slow converter. */
+    for (i = 0; i < sampleCount; ++i) {
         drwav_uint64 sample = 0;
         unsigned int shift  = (8 - bytesPerSample) * 8;
 
@@ -2733,18 +3319,19 @@ static void drwav__pcm_to_f32(float* pOut, const unsigned char* pIn, size_t samp
     }
 }
 
-static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned short bytesPerSample)
+static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
-        for (unsigned int i = 0; i < sampleCount; ++i) {
-            *pOut++ = ((float*)pIn)[i];
+        unsigned int i;
+        for (i = 0; i < sampleCount; ++i) {
+            *pOut++ = ((const float*)pIn)[i];
         }
         return;
     } else if (bytesPerSample == 8) {
-        drwav_f64_to_f32(pOut, (double*)pIn, sampleCount);
+        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
         return;
     } else {
-        // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float.
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
         drwav_zero_memory(pOut, sampleCount * sizeof(*pOut));
         return;
     }
@@ -2753,19 +3340,23 @@ static void drwav__ieee_to_f32(float* pOut, const unsigned char* pIn, size_t sam
 
 drwav_uint64 drwav_read_f32__pcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
         pBufferOut += samplesRead;
 
         samplesToRead    -= samplesRead;
@@ -2777,8 +3368,10 @@ drwav_uint64 drwav_read_f32__pcm(drwav* pWav, drwav_uint64 samplesToRead, float*
 
 drwav_uint64 drwav_read_f32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    // want to duplicate that code.
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
     drwav_uint64 totalSamplesRead = 0;
     drwav_int16 samples16[2048];
     while (samplesToRead > 0) {
@@ -2787,7 +3380,7 @@ drwav_uint64 drwav_read_f32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, fl
             break;
         }
 
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -2799,8 +3392,10 @@ drwav_uint64 drwav_read_f32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, fl
 
 drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    // want to duplicate that code.
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
     drwav_uint64 totalSamplesRead = 0;
     drwav_int16 samples16[2048];
     while (samplesToRead > 0) {
@@ -2809,7 +3404,7 @@ drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float*
             break;
         }
 
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)samplesRead);   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -2821,24 +3416,29 @@ drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float*
 
 drwav_uint64 drwav_read_f32__ieee(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    // Fast path.
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bytesPerSample == 4) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+    drwav_uint32 bytesPerSample;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
         return drwav_read(pWav, samplesToRead, pBufferOut);
     }
-
-    if (pWav->bytesPerSample == 0) {
+    
+    bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -2850,14 +3450,17 @@ drwav_uint64 drwav_read_f32__ieee(drwav* pWav, drwav_uint64 samplesToRead, float
 
 drwav_uint64 drwav_read_f32__alaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -2874,14 +3477,18 @@ drwav_uint64 drwav_read_f32__alaw(drwav* pWav, drwav_uint64 samplesToRead, float
 
 drwav_uint64 drwav_read_f32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -2902,9 +3509,9 @@ drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBuf
         return 0;
     }
 
-    // Don't try to read more samples than can potentially fit in the output buffer.
-    if (samplesToRead * sizeof(float) > SIZE_MAX) {
-        samplesToRead = SIZE_MAX / sizeof(float);
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (samplesToRead * sizeof(float) > DRWAV_SIZE_MAX) {
+        samplesToRead = DRWAV_SIZE_MAX / sizeof(float);
     }
 
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
@@ -2934,22 +3541,31 @@ drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBuf
     return 0;
 }
 
+drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    return drwav_read_f32(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels;
+}
+
 void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
 #ifdef DR_WAV_LIBSNDFILE_COMPAT
-    // It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
-    // libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
-    // the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
-    // correctness testing. This is disabled by default.
-    for (size_t i = 0; i < sampleCount; ++i) {
+    /*
+    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
+    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
+    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
+    correctness testing. This is disabled by default.
+    */
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
     }
 #else
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (pIn[i] / 255.0f) * 2 - 1;
     }
 #endif
@@ -2957,22 +3573,26 @@ void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 
 void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = pIn[i] / 32768.0f;
     }
 }
 
 void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         unsigned int s0 = pIn[i*3 + 0];
         unsigned int s1 = pIn[i*3 + 1];
         unsigned int s2 = pIn[i*3 + 2];
@@ -2984,59 +3604,68 @@ void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 
 void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount)
 {
+    size_t i;
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (float)(pIn[i] / 2147483648.0);
     }
 }
 
 void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (float)pIn[i];
     }
 }
 
 void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
     }
 }
 
 void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
     }
 }
 
 
 
-static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
-    // Special case for 8-bit sample data because it's treated as unsigned.
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
     if (bytesPerSample == 1) {
         drwav_u8_to_s32(pOut, pIn, totalSampleCount);
         return;
     }
 
-    // Slightly more optimal implementation for common formats.
+    /* Slightly more optimal implementation for common formats. */
     if (bytesPerSample == 2) {
         drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
         return;
@@ -3046,22 +3675,22 @@ static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_
         return;
     }
     if (bytesPerSample == 4) {
-        for (unsigned int i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((drwav_int32*)pIn)[i];
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int32*)pIn)[i];
         }
         return;
     }
 
 
-    // Anything more than 64 bits per sample is not supported.
+    /* Anything more than 64 bits per sample is not supported. */
     if (bytesPerSample > 8) {
         drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut));
         return;
     }
 
 
-    // Generic, slow converter.
-    for (unsigned int i = 0; i < totalSampleCount; ++i) {
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
         drwav_uint64 sample = 0;
         unsigned int shift  = (8 - bytesPerSample) * 8;
 
@@ -3076,16 +3705,16 @@ static void drwav__pcm_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_
     }
 }
 
-static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned short bytesPerSample)
+static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
-        drwav_f32_to_s32(pOut, (float*)pIn, totalSampleCount);
+        drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount);
         return;
     } else if (bytesPerSample == 8) {
-        drwav_f64_to_s32(pOut, (double*)pIn, totalSampleCount);
+        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
         return;
     } else {
-        // Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float.
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
         drwav_zero_memory(pOut, totalSampleCount * sizeof(*pOut));
         return;
     }
@@ -3094,24 +3723,29 @@ static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size
 
 drwav_uint64 drwav_read_s32__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    // Fast path.
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bytesPerSample == 4) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+    drwav_uint32 bytesPerSample;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
         return drwav_read(pWav, samplesToRead, pBufferOut);
     }
-
-    if (pWav->bytesPerSample == 0) {
+    
+    bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -3123,8 +3757,10 @@ drwav_uint64 drwav_read_s32__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_
 
 drwav_uint64 drwav_read_s32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    // We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    // want to duplicate that code.
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
     drwav_uint64 totalSamplesRead = 0;
     drwav_int16 samples16[2048];
     while (samplesToRead > 0) {
@@ -3133,7 +3769,7 @@ drwav_uint64 drwav_read_s32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
             break;
         }
 
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -3145,8 +3781,10 @@ drwav_uint64 drwav_read_s32__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, dr
 
 drwav_uint64 drwav_read_s32__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    // We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    // want to duplicate that code.
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
     drwav_uint64 totalSamplesRead = 0;
     drwav_int16 samples16[2048];
     while (samplesToRead > 0) {
@@ -3155,7 +3793,7 @@ drwav_uint64 drwav_read_s32__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
             break;
         }
 
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   // <-- Safe cast because we're clamping to 2048.
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)samplesRead);   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -3167,19 +3805,23 @@ drwav_uint64 drwav_read_s32__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_
 
 drwav_uint64 drwav_read_s32__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, pWav->bytesPerSample);
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
         pBufferOut       += samplesRead;
         samplesToRead    -= samplesRead;
@@ -3191,14 +3833,18 @@ drwav_uint64 drwav_read_s32__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav
 
 drwav_uint64 drwav_read_s32__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -3215,14 +3861,18 @@ drwav_uint64 drwav_read_s32__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav
 
 drwav_uint64 drwav_read_s32__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut)
 {
-    if (pWav->bytesPerSample == 0) {
+    drwav_uint64 totalSamplesRead;
+    unsigned char sampleData[4096];
+
+    drwav_uint32 bytesPerSample = drwav_get_bytes_per_sample(pWav);
+    if (bytesPerSample == 0) {
         return 0;
     }
 
-    drwav_uint64 totalSamplesRead = 0;
-    unsigned char sampleData[4096];
+    totalSamplesRead = 0;
+
     while (samplesToRead > 0) {
-        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/pWav->bytesPerSample), sampleData);
+        drwav_uint64 samplesRead = drwav_read(pWav, drwav_min(samplesToRead, sizeof(sampleData)/bytesPerSample), sampleData);
         if (samplesRead == 0) {
             break;
         }
@@ -3243,9 +3893,9 @@ drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32
         return 0;
     }
 
-    // Don't try to read more samples than can potentially fit in the output buffer.
-    if (samplesToRead * sizeof(drwav_int32) > SIZE_MAX) {
-        samplesToRead = SIZE_MAX / sizeof(drwav_int32);
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (samplesToRead * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
+        samplesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32);
     }
 
 
@@ -3276,35 +3926,46 @@ drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32
     return 0;
 }
 
+drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    return drwav_read_s32(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels;
+}
+
 void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = ((int)pIn[i] - 128) << 24;
     }
 }
 
 void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = pIn[i] << 16;
     }
 }
 
 void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         unsigned int s0 = pIn[i*3 + 0];
         unsigned int s1 = pIn[i*3 + 1];
         unsigned int s2 = pIn[i*3 + 2];
@@ -3316,44 +3977,52 @@ void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCo
 
 void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
     }
 }
 
 void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
     }
 }
 
 void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i = 0; i < sampleCount; ++i) {
+    for (i = 0; i < sampleCount; ++i) {
         *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
     }
 }
 
 void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
 {
+    size_t i;
+
     if (pOut == NULL || pIn == NULL) {
         return;
     }
 
-    for (size_t i= 0; i < sampleCount; ++i) {
+    for (i= 0; i < sampleCount; ++i) {
         *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
     }
 }
@@ -3362,105 +4031,145 @@ void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sample
 
 drwav_int16* drwav__read_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
+    drwav_uint64 sampleDataSize;
+    drwav_int16* pSampleData;
+    drwav_uint64 samplesRead;
+
     drwav_assert(pWav != NULL);
 
-    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int16);
-    if (sampleDataSize > SIZE_MAX) {
+    sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int16);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
         drwav_uninit(pWav);
-        return NULL;    // File's too big.
+        return NULL;    /* File's too big. */
     }
 
-    drwav_int16* pSampleData = (drwav_int16*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    pSampleData = (drwav_int16*)DRWAV_MALLOC((size_t)sampleDataSize);    /* <-- Safe cast due to the check above. */
     if (pSampleData == NULL) {
         drwav_uninit(pWav);
-        return NULL;    // Failed to allocate memory.
+        return NULL;    /* Failed to allocate memory. */
     }
 
-    drwav_uint64 samplesRead = drwav_read_s16(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    samplesRead = drwav_read_s16(pWav, (size_t)pWav->totalSampleCount, pSampleData);
     if (samplesRead != pWav->totalSampleCount) {
         DRWAV_FREE(pSampleData);
         drwav_uninit(pWav);
-        return NULL;    // There was an error reading the samples.
+        return NULL;    /* There was an error reading the samples. */
     }
 
     drwav_uninit(pWav);
 
-    if (sampleRate) *sampleRate = pWav->sampleRate;
-    if (channels) *channels = pWav->channels;
-    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = pWav->totalSampleCount;
+    }
+
     return pSampleData;
 }
 
 float* drwav__read_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
+    drwav_uint64 sampleDataSize;
+    float* pSampleData;
+    drwav_uint64 samplesRead;
+
     drwav_assert(pWav != NULL);
 
-    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(float);
-    if (sampleDataSize > SIZE_MAX) {
+    sampleDataSize = pWav->totalSampleCount * sizeof(float);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
         drwav_uninit(pWav);
-        return NULL;    // File's too big.
+        return NULL;    /* File's too big. */
     }
 
-    float* pSampleData = (float*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    pSampleData = (float*)DRWAV_MALLOC((size_t)sampleDataSize);    /* <-- Safe cast due to the check above. */
     if (pSampleData == NULL) {
         drwav_uninit(pWav);
-        return NULL;    // Failed to allocate memory.
+        return NULL;    /* Failed to allocate memory. */
     }
 
-    drwav_uint64 samplesRead = drwav_read_f32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    samplesRead = drwav_read_f32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
     if (samplesRead != pWav->totalSampleCount) {
         DRWAV_FREE(pSampleData);
         drwav_uninit(pWav);
-        return NULL;    // There was an error reading the samples.
+        return NULL;    /* There was an error reading the samples. */
     }
 
     drwav_uninit(pWav);
 
-    if (sampleRate) *sampleRate = pWav->sampleRate;
-    if (channels) *channels = pWav->channels;
-    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = pWav->totalSampleCount;
+    }
+
     return pSampleData;
 }
 
 drwav_int32* drwav__read_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
+    drwav_uint64 sampleDataSize;
+    drwav_int32* pSampleData;
+    drwav_uint64 samplesRead;
+
     drwav_assert(pWav != NULL);
 
-    drwav_uint64 sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int32);
-    if (sampleDataSize > SIZE_MAX) {
+    sampleDataSize = pWav->totalSampleCount * sizeof(drwav_int32);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
         drwav_uninit(pWav);
-        return NULL;    // File's too big.
+        return NULL;    /* File's too big. */
     }
 
-    drwav_int32* pSampleData = (drwav_int32*)DRWAV_MALLOC((size_t)sampleDataSize);    // <-- Safe cast due to the check above.
+    pSampleData = (drwav_int32*)DRWAV_MALLOC((size_t)sampleDataSize);    /* <-- Safe cast due to the check above. */
     if (pSampleData == NULL) {
         drwav_uninit(pWav);
-        return NULL;    // Failed to allocate memory.
+        return NULL;    /* Failed to allocate memory. */
     }
 
-    drwav_uint64 samplesRead = drwav_read_s32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
+    samplesRead = drwav_read_s32(pWav, (size_t)pWav->totalSampleCount, pSampleData);
     if (samplesRead != pWav->totalSampleCount) {
         DRWAV_FREE(pSampleData);
         drwav_uninit(pWav);
-        return NULL;    // There was an error reading the samples.
+        return NULL;    /* There was an error reading the samples. */
     }
 
     drwav_uninit(pWav);
 
-    if (sampleRate) *sampleRate = pWav->sampleRate;
-    if (channels) *channels = pWav->channels;
-    if (totalSampleCount) *totalSampleCount = pWav->totalSampleCount;
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = pWav->totalSampleCount;
+    }
+
     return pSampleData;
 }
 
 
 drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
-
     drwav wav;
+
+    if (channels) {
+        *channels = 0;
+    }
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
         return NULL;
     }
@@ -3468,13 +4177,55 @@ drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onS
     return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
 }
 
+drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int16* result;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_and_read_s16(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
 float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
-
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
         return NULL;
     }
@@ -3482,13 +4233,55 @@ float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, v
     return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
 }
 
+float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    float* result;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_and_read_f32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
 drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
-
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init(&wav, onRead, onSeek, pUserData)) {
         return NULL;
     }
@@ -3496,14 +4289,56 @@ drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onS
     return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
 }
 
-#ifndef DR_WAV_NO_STDIO
-drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int32* result;
 
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_and_read_s32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
+#ifndef DR_WAV_NO_STDIO
+drwav_int16* drwav_open_file_and_read_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_file(&wav, filename)) {
         return NULL;
     }
@@ -3511,13 +4346,55 @@ drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* ch
     return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
 }
 
-float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int16* result;
 
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_file_and_read_s16(filename, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
+float* drwav_open_file_and_read_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_file(&wav, filename)) {
         return NULL;
     }
@@ -3525,28 +4402,112 @@ float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels
     return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
 }
 
-drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    float* result;
 
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_file_and_read_f32(filename, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
+drwav_int32* drwav_open_file_and_read_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_file(&wav, filename)) {
         return NULL;
     }
 
     return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
 }
+
+drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int32* result;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_file_and_read_s32(filename, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
 #endif
 
-drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+drwav_int16* drwav_open_memory_and_read_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
-
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_memory(&wav, data, dataSize)) {
         return NULL;
     }
@@ -3554,13 +4515,55 @@ drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, u
     return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount);
 }
 
-float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int16* result;
 
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_memory_and_read_s16(data, dataSize, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
+float* drwav_open_memory_and_read_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_memory(&wav, data, dataSize)) {
         return NULL;
     }
@@ -3568,20 +4571,97 @@ float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigne
     return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount);
 }
 
-drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
 {
-    if (sampleRate) *sampleRate = 0;
-    if (channels) *channels = 0;
-    if (totalSampleCount) *totalSampleCount = 0;
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    float* result;
 
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_memory_and_read_f32(data, dataSize, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+
+drwav_int32* drwav_open_memory_and_read_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount)
+{
     drwav wav;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalSampleCount) {
+        *totalSampleCount = 0;
+    }
+
     if (!drwav_init_memory(&wav, data, dataSize)) {
         return NULL;
     }
 
     return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount);
 }
-#endif  //DR_WAV_NO_CONVERSION_API
+
+drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut)
+{
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalSampleCount;
+    drwav_int32* result;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    result = drwav_open_memory_and_read_s32(data, dataSize, &channels, &sampleRate, &totalSampleCount);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    if (channelsOut) {
+        *channelsOut = channels;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = sampleRate;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = totalSampleCount / channels;
+    }
+
+    return result;
+}
+#endif  /* DR_WAV_NO_CONVERSION_API */
 
 
 void drwav_free(void* pDataReturnedByOpenAndRead)
@@ -3589,136 +4669,203 @@ void drwav_free(void* pDataReturnedByOpenAndRead)
     DRWAV_FREE(pDataReturnedByOpenAndRead);
 }
 
-#endif  //DR_WAV_IMPLEMENTATION
-
-
-// REVISION HISTORY
-//
-// v0.8.1 - 2018-06-29
-//   - Add support for sequential writing APIs.
-//   - Disable seeking in write mode.
-//   - Fix bugs with Wave64.
-//   - Fix typos.
-//
-// v0.8 - 2018-04-27
-//   - Bug fix.
-//   - Start using major.minor.revision versioning.
-//
-// v0.7f - 2018-02-05
-//   - Restrict ADPCM formats to a maximum of 2 channels.
-//
-// v0.7e - 2018-02-02
-//   - Fix a crash.
-//
-// v0.7d - 2018-02-01
-//   - Fix a crash.
-//
-// v0.7c - 2018-02-01
-//   - Set drwav.bytesPerSample to 0 for all compressed formats.
-//   - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
-//     all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
-//   - Fix some divide-by-zero errors.
-//
-// v0.7b - 2018-01-22
-//   - Fix errors with seeking of compressed formats.
-//   - Fix compilation error when DR_WAV_NO_CONVERSION_API
-//
-// v0.7a - 2017-11-17
-//   - Fix some GCC warnings.
-//
-// v0.7 - 2017-11-04
-//   - Add writing APIs.
-//
-// v0.6 - 2017-08-16
-//   - API CHANGE: Rename dr_* types to drwav_*.
-//   - Add support for custom implementations of malloc(), realloc(), etc.
-//   - Add support for Microsoft ADPCM.
-//   - Add support for IMA ADPCM (DVI, format code 0x11).
-//   - Optimizations to drwav_read_s16().
-//   - Bug fixes.
-//
-// v0.5g - 2017-07-16
-//   - Change underlying type for booleans to unsigned.
-//
-// v0.5f - 2017-04-04
-//   - Fix a minor bug with drwav_open_and_read_s16() and family.
-//
-// v0.5e - 2016-12-29
-//   - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
-//   - Minor fixes to documentation.
-//
-// v0.5d - 2016-12-28
-//   - Use drwav_int*/drwav_uint* sized types to improve compiler support.
-//
-// v0.5c - 2016-11-11
-//   - Properly handle JUNK chunks that come before the FMT chunk.
-//
-// v0.5b - 2016-10-23
-//   - A minor change to drwav_bool8 and drwav_bool32 types.
-//
-// v0.5a - 2016-10-11
-//   - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
-//   - Improve A-law and mu-law efficiency.
-//
-// v0.5 - 2016-09-29
-//   - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
-//     keep it consistent with dr_audio and dr_flac.
-//
-// v0.4b - 2016-09-18
-//   - Fixed a typo in documentation.
-//
-// v0.4a - 2016-09-18
-//   - Fixed a typo.
-//   - Change date format to ISO 8601 (YYYY-MM-DD)
-//
-// v0.4 - 2016-07-13
-//   - API CHANGE. Make onSeek consistent with dr_flac.
-//   - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
-//   - Added support for Sony Wave64.
-//
-// v0.3a - 2016-05-28
-//   - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
-//   - Fixed a memory leak.
-//
-// v0.3 - 2016-05-22
-//   - Lots of API changes for consistency.
-//
-// v0.2a - 2016-05-16
-//   - Fixed Linux/GCC build.
-//
-// v0.2 - 2016-05-11
-//   - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
-//
-// v0.1a - 2016-05-07
-//   - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
-//
-// v0.1 - 2016-05-04
-//   - Initial versioned release.
+#endif  /* DR_WAV_IMPLEMENTATION */
 
 
 /*
+REVISION HISTORY
+================
+v0.9.1 - 2019-05-05
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.9.0 - 2018-12-16
+  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
+    will be removed in v0.10.0. Deprecated APIs and their replacements:
+      drwav_read()                     -> drwav_read_pcm_frames()
+      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
+      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
+      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
+      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
+      drwav_write()                    -> drwav_write_pcm_frames()
+      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
+      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
+      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
+      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
+      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
+      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
+      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
+      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
+      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
+      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
+  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
+  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
+  - Add built-in support for smpl chunks.
+  - Add support for firing a callback for each chunk in the file at initialization time.
+    - This is enabled through the drwav_init_ex(), etc. family of APIs.
+  - Handle invalid FMT chunks more robustly.
+
+v0.8.5 - 2018-09-11
+  - Const correctness.
+  - Fix a potential stack overflow.
+
+v0.8.4 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.8.3 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.8.2 - 2018-08-02
+  - Fix some big-endian bugs.
+
+v0.8.1 - 2018-06-29
+  - Add support for sequential writing APIs.
+  - Disable seeking in write mode.
+  - Fix bugs with Wave64.
+  - Fix typos.
+
+v0.8 - 2018-04-27
+  - Bug fix.
+  - Start using major.minor.revision versioning.
+
+v0.7f - 2018-02-05
+  - Restrict ADPCM formats to a maximum of 2 channels.
+
+v0.7e - 2018-02-02
+  - Fix a crash.
+
+v0.7d - 2018-02-01
+  - Fix a crash.
+
+v0.7c - 2018-02-01
+  - Set drwav.bytesPerSample to 0 for all compressed formats.
+  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
+    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
+  - Fix some divide-by-zero errors.
+
+v0.7b - 2018-01-22
+  - Fix errors with seeking of compressed formats.
+  - Fix compilation error when DR_WAV_NO_CONVERSION_API
+
+v0.7a - 2017-11-17
+  - Fix some GCC warnings.
+
+v0.7 - 2017-11-04
+  - Add writing APIs.
+
+v0.6 - 2017-08-16
+  - API CHANGE: Rename dr_* types to drwav_*.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add support for Microsoft ADPCM.
+  - Add support for IMA ADPCM (DVI, format code 0x11).
+  - Optimizations to drwav_read_s16().
+  - Bug fixes.
+
+v0.5g - 2017-07-16
+  - Change underlying type for booleans to unsigned.
+
+v0.5f - 2017-04-04
+  - Fix a minor bug with drwav_open_and_read_s16() and family.
+
+v0.5e - 2016-12-29
+  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
+  - Minor fixes to documentation.
+
+v0.5d - 2016-12-28
+  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
+
+v0.5c - 2016-11-11
+  - Properly handle JUNK chunks that come before the FMT chunk.
+
+v0.5b - 2016-10-23
+  - A minor change to drwav_bool8 and drwav_bool32 types.
+
+v0.5a - 2016-10-11
+  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
+  - Improve A-law and mu-law efficiency.
+
+v0.5 - 2016-09-29
+  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
+    keep it consistent with dr_audio and dr_flac.
+
+v0.4b - 2016-09-18
+  - Fixed a typo in documentation.
+
+v0.4a - 2016-09-18
+  - Fixed a typo.
+  - Change date format to ISO 8601 (YYYY-MM-DD)
+
+v0.4 - 2016-07-13
+  - API CHANGE. Make onSeek consistent with dr_flac.
+  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
+  - Added support for Sony Wave64.
+
+v0.3a - 2016-05-28
+  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
+  - Fixed a memory leak.
+
+v0.3 - 2016-05-22
+  - Lots of API changes for consistency.
+
+v0.2a - 2016-05-16
+  - Fixed Linux/GCC build.
+
+v0.2 - 2016-05-11
+  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
+
+v0.1a - 2016-05-07
+  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
+
+v0.1 - 2016-05-04
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
 This is free and unencumbered software released into the public domain.
 
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
 
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2018 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 */