Further speed up format_input for 24-bit PCM by about 25%

This commit is contained in:
Martijn van Beurden 2023-03-01 20:21:37 +01:00
parent 6884355d87
commit 724a8aa32e
1 changed files with 32 additions and 24 deletions

View File

@ -2366,50 +2366,58 @@ FLAC__bool format_input(FLAC__int32 *dest[], uint32_t wide_samples, FLAC__bool i
else if(bps == 24) {
if(!is_big_endian) {
if(is_unsigned_samples) {
uint32_t b;
for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++)
for(channel = 0; channel < channels; channel++, sample++) {
for(channel = 0; channel < channels; channel++) {
uint32_t b = 3*channel;
for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) {
uint32_t t;
t = ubuffer.u8[b++];
t |= (uint32_t)(ubuffer.u8[b++]) << 8;
t |= (uint32_t)(ubuffer.u8[b++]) << 16;
t = ubuffer.u8[b];
t |= (uint32_t)(ubuffer.u8[b+1]) << 8;
t |= (uint32_t)(ubuffer.u8[b+2]) << 16;
out[channel][wide_sample] = (FLAC__int32)t - 0x800000;
b += 3*channels;
}
}
}
else {
uint32_t b;
for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++)
for(channel = 0; channel < channels; channel++, sample++) {
for(channel = 0; channel < channels; channel++) {
uint32_t b = 3*channel;
for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) {
uint32_t t;
t = ubuffer.u8[b++];
t |= (uint32_t)(ubuffer.u8[b++]) << 8;
t |= (int32_t)(ubuffer.s8[b++]) << 16;
t = ubuffer.u8[b];
t |= (uint32_t)(ubuffer.u8[b+1]) << 8;
t |= (int32_t)(ubuffer.s8[b+2]) << 16;
out[channel][wide_sample] = t;
b += 3*channels;
}
}
}
}
else {
if(is_unsigned_samples) {
uint32_t b;
for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++)
for(channel = 0; channel < channels; channel++, sample++) {
for(channel = 0; channel < channels; channel++) {
uint32_t b = 3*channel;
for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) {
uint32_t t;
t = ubuffer.u8[b++]; t <<= 8;
t |= ubuffer.u8[b++]; t <<= 8;
t |= ubuffer.u8[b++];
t = ubuffer.u8[b]; t <<= 8;
t |= ubuffer.u8[b+1]; t <<= 8;
t |= ubuffer.u8[b+2];
out[channel][wide_sample] = (FLAC__int32)t - 0x800000;
b += 3*channels;
}
}
}
else {
uint32_t b;
for(b = sample = wide_sample = 0; wide_sample < wide_samples; wide_sample++)
for(channel = 0; channel < channels; channel++, sample++) {
for(channel = 0; channel < channels; channel++) {
uint32_t b = 3*channel;
for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) {
uint32_t t;
t = ubuffer.s8[b++]; t <<= 8;
t |= ubuffer.u8[b++]; t <<= 8;
t |= ubuffer.u8[b++];
t = ubuffer.s8[b]; t <<= 8;
t |= ubuffer.u8[b+1]; t <<= 8;
t |= ubuffer.u8[b+2];
out[channel][wide_sample] = t;
b += 3*channels;
}
}
}
}
}