From 111c3add738587f97f41f8eaea35c6ee0d084ca5 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Sun, 10 Apr 2022 13:23:51 -0400 Subject: [PATCH] audio: Resampler optimizations. - Calculate `j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING` once per loop iteration since we use it multiple times. - Do the left-wing loop in two sections: while `srcframe < 0` and then the remaining calculations when `srcframe >= 0`. This bubbles a conditional out of every iteration of a tight loop, giving us a boost. We could _probably_ do this to the right-wing loop too, but it's less straightforward there. - The real win: Use floats instead of doubles. This almost doubles the speed of the entire function on Intel CPUs, and for embedded things without hardware-level support for doubles, the speedup is enormous. This in theory might reduce audio quality, though, and I had to put a check in place to avoid a division-by-zero that we avoided at higher precision, but this is likely to be worth keeping for at least the Sony PSP and other smaller platforms, if not everyone. --- src/audio/SDL_audiocvt.c | 46 ++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c index 26821312d..6ecc2c9ff 100644 --- a/src/audio/SDL_audiocvt.c +++ b/src/audio/SDL_audiocvt.c @@ -725,46 +725,64 @@ SDL_ResampleAudio(const int chans, const int inrate, const int outrate, const float *inbuf, const int inbuflen, float *outbuf, const int outbuflen) { - const double finrate = (double) inrate; - const double outtimeincr = 1.0 / ((float) outrate); - const double ratio = ((float) outrate) / ((float) inrate); + /* Note that this used to be double, but it looks like we can get by with float in most cases at + almost twice the speed on Intel processors, and orders of magnitude more + on CPUs that need a software fallback for double calculations. */ + typedef float ResampleFloatType; + + const ResampleFloatType finrate = (ResampleFloatType) inrate; + const ResampleFloatType outtimeincr = ((ResampleFloatType) 1.0f) / ((ResampleFloatType) outrate); + const ResampleFloatType ratio = ((float) outrate) / ((float) inrate); const int paddinglen = ResamplerPadding(inrate, outrate); const int framelen = chans * (int)sizeof (float); const int inframes = inbuflen / framelen; const int wantedoutframes = (int) ((inbuflen / framelen) * ratio); /* outbuflen isn't total to write, it's total available. */ const int maxoutframes = outbuflen / framelen; const int outframes = SDL_min(wantedoutframes, maxoutframes); + ResampleFloatType outtime = 0.0f; float *dst = outbuf; - double outtime = 0.0; int i, j, chan; for (i = 0; i < outframes; i++) { const int srcindex = (int) (outtime * inrate); - const double intime = ((double) srcindex) / finrate; - const double innexttime = ((double) (srcindex + 1)) / finrate; - const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime)); + const ResampleFloatType intime = ((ResampleFloatType) srcindex) / finrate; + const ResampleFloatType innexttime = ((ResampleFloatType) (srcindex + 1)) / finrate; + const ResampleFloatType indeltatime = innexttime - intime; + const ResampleFloatType interpolation1 = (indeltatime == 0.0f) ? 1.0f : (1.0f - ((innexttime - outtime) / indeltatime)); const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING); - const double interpolation2 = 1.0 - interpolation1; + const ResampleFloatType interpolation2 = 1.0f - interpolation1; const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING); for (chan = 0; chan < chans; chan++) { float outsample = 0.0f; /* do this twice to calculate the sample, once for the "left wing" and then same for the right. */ - /* !!! FIXME: do both wings in one loop */ - for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) { + + /* Left wing! split the "srcframe < 0" condition out into a preloop. */ + for (j = 0; srcindex < j; j++) { + const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING; const int srcframe = srcindex - j; - /* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */ - const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan]; - outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)]))); + const float insample = lpadding[((paddinglen + srcframe) * chans) + chan]; + outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples]))); + } + + /* Finish the left wing now that srcframe >= 0 */ + for (; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) { + const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING; + const int srcframe = srcindex - j; + const float insample = inbuf[(srcframe * chans) + chan]; + outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples]))); } + /* Do the right wing! */ for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) { + const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING; const int srcframe = srcindex + 1 + j; /* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */ const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan]; - outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)]))); + outsample += (float)(insample * (ResamplerFilter[filterindex2 + jsamples] + (interpolation2 * ResamplerFilterDifference[filterindex2 + jsamples]))); } + *(dst++) = outsample; }