From 724a8aa32e258905befd412a013b0fb39b97f708 Mon Sep 17 00:00:00 2001 From: Martijn van Beurden Date: Wed, 1 Mar 2023 20:21:37 +0100 Subject: [PATCH] Further speed up format_input for 24-bit PCM by about 25% --- src/flac/encode.c | 56 +++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/flac/encode.c b/src/flac/encode.c index 146f767d..633d8b1f 100644 --- a/src/flac/encode.c +++ b/src/flac/encode.c @@ -2366,50 +2366,58 @@ FLAC__bool format_input(FLAC__int32 *dest[], uint32_t wide_samples, FLAC__bool i else if(bps == 24) { if(!is_big_endian) { if(is_unsigned_samples) { - uint32_t b; - for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++) - for(channel = 0; channel < channels; channel++, sample++) { + for(channel = 0; channel < channels; channel++) { + uint32_t b = 3*channel; + for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) { uint32_t t; - t = ubuffer.u8[b++]; - t |= (uint32_t)(ubuffer.u8[b++]) << 8; - t |= (uint32_t)(ubuffer.u8[b++]) << 16; + t = ubuffer.u8[b]; + t |= (uint32_t)(ubuffer.u8[b+1]) << 8; + t |= (uint32_t)(ubuffer.u8[b+2]) << 16; out[channel][wide_sample] = (FLAC__int32)t - 0x800000; + b += 3*channels; } + } } else { - uint32_t b; - for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++) - for(channel = 0; channel < channels; channel++, sample++) { + for(channel = 0; channel < channels; channel++) { + uint32_t b = 3*channel; + for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) { uint32_t t; - t = ubuffer.u8[b++]; - t |= (uint32_t)(ubuffer.u8[b++]) << 8; - t |= (int32_t)(ubuffer.s8[b++]) << 16; + t = ubuffer.u8[b]; + t |= (uint32_t)(ubuffer.u8[b+1]) << 8; + t |= (int32_t)(ubuffer.s8[b+2]) << 16; out[channel][wide_sample] = t; + b += 3*channels; } + } } } else { if(is_unsigned_samples) { - uint32_t b; - for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++) - for(channel = 0; channel < channels; channel++, sample++) { + for(channel = 0; channel < channels; channel++) { + uint32_t b = 3*channel; + for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) { uint32_t t; - t = ubuffer.u8[b++]; t <<= 8; - t |= ubuffer.u8[b++]; t <<= 8; - t |= ubuffer.u8[b++]; + t = ubuffer.u8[b]; t <<= 8; + t |= ubuffer.u8[b+1]; t <<= 8; + t |= ubuffer.u8[b+2]; out[channel][wide_sample] = (FLAC__int32)t - 0x800000; + b += 3*channels; } + } } else { - uint32_t b; - for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++) - for(channel = 0; channel < channels; channel++, sample++) { + for(channel = 0; channel < channels; channel++) { + uint32_t b = 3*channel; + for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) { uint32_t t; - t = ubuffer.s8[b++]; t <<= 8; - t |= ubuffer.u8[b++]; t <<= 8; - t |= ubuffer.u8[b++]; + t = ubuffer.s8[b]; t <<= 8; + t |= ubuffer.u8[b+1]; t <<= 8; + t |= ubuffer.u8[b+2]; out[channel][wide_sample] = t; + b += 3*channels; } + } } } }