AVCodecDecoder: fix timing problems
A combination of two problems made things go wrong with the timestamp of decoded audio. 1) The output buffer size is too small to hold the complete input. swresample handles this by buffering the input for use the next time it is called, however repeatedly doing this results in lots of buffering, and our way to compute the output timestamp from the input does not take it into account so it does weird things. Moreover, we would need to empty the buffer by calling swr_convert with NULL input in that case. Fix: make sure to not feed more data to swr_convert than it can output in our buffer. This way, no buffering occurs, only the matrixing conversion. 2) When using planar audio, the "frame size" is a bit different. Instead of adding sample size * channel count to 1 pointer, we need to add sample size * 1 to each channel buffer. Fix: add the "fInputFrameSize" which takes this into account, instead of misusing fOutputFrameSize for the input. Fixes #12460.
This commit is contained in:
parent
45569721aa
commit
da455572ed
@ -112,6 +112,7 @@ AVCodecDecoder::AVCodecDecoder()
|
|||||||
fOutputFrameCount(0),
|
fOutputFrameCount(0),
|
||||||
fOutputFrameRate(1.0),
|
fOutputFrameRate(1.0),
|
||||||
fOutputFrameSize(0),
|
fOutputFrameSize(0),
|
||||||
|
fInputFrameSize(0),
|
||||||
|
|
||||||
fChunkBuffer(NULL),
|
fChunkBuffer(NULL),
|
||||||
fChunkBufferSize(0),
|
fChunkBufferSize(0),
|
||||||
@ -403,6 +404,11 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
|
|||||||
fOutputFrameSize = sampleSize * outputAudioFormat.channel_count;
|
fOutputFrameSize = sampleSize * outputAudioFormat.channel_count;
|
||||||
fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize;
|
fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize;
|
||||||
fOutputFrameRate = outputAudioFormat.frame_rate;
|
fOutputFrameRate = outputAudioFormat.frame_rate;
|
||||||
|
if (av_sample_fmt_is_planar(fContext->sample_fmt))
|
||||||
|
fInputFrameSize = sampleSize;
|
||||||
|
else
|
||||||
|
fInputFrameSize = fOutputFrameSize;
|
||||||
|
|
||||||
fRawDecodedAudio->opaque
|
fRawDecodedAudio->opaque
|
||||||
= av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context));
|
= av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context));
|
||||||
if (fRawDecodedAudio->opaque == NULL)
|
if (fRawDecodedAudio->opaque == NULL)
|
||||||
@ -925,10 +931,19 @@ AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
|
|||||||
ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset;
|
ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32 result = swr_convert(fResampleContext, fRawDecodedAudio->data,
|
// When there are more input frames than space in the output buffer,
|
||||||
|
// we could feed everything to swr and it would buffer the extra data.
|
||||||
|
// However, there is no easy way to flush that data without feeding more
|
||||||
|
// input, and it makes our timestamp computations fail.
|
||||||
|
// So, we feed only as much frames as we can get out, and handle the
|
||||||
|
// buffering ourselves.
|
||||||
|
// TODO Ideally, we should try to size our output buffer so that it can
|
||||||
|
// always hold all the output (swr provides helper functions for this)
|
||||||
|
inFrames = frames;
|
||||||
|
frames = swr_convert(fResampleContext, fRawDecodedAudio->data,
|
||||||
outFrames, ptr, inFrames);
|
outFrames, ptr, inFrames);
|
||||||
|
|
||||||
if (result < 0)
|
if (frames < 0)
|
||||||
debugger("resampling failed");
|
debugger("resampling failed");
|
||||||
} else {
|
} else {
|
||||||
memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0]
|
memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0]
|
||||||
@ -937,7 +952,7 @@ AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
|
|||||||
inFrames = frames;
|
inFrames = frames;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t remainingSize = inFrames * fOutputFrameSize;
|
size_t remainingSize = inFrames * fInputFrameSize;
|
||||||
size_t decodedSize = outFrames * fOutputFrameSize;
|
size_t decodedSize = outFrames * fOutputFrameSize;
|
||||||
fDecodedDataBufferSize -= inFrames;
|
fDecodedDataBufferSize -= inFrames;
|
||||||
|
|
||||||
|
@ -131,6 +131,9 @@ private:
|
|||||||
float fOutputFrameRate;
|
float fOutputFrameRate;
|
||||||
int fOutputFrameSize;
|
int fOutputFrameSize;
|
||||||
// sample size * channel count
|
// sample size * channel count
|
||||||
|
int fInputFrameSize;
|
||||||
|
// sample size * channel count
|
||||||
|
// or just sample size for planar formats
|
||||||
|
|
||||||
uint8_t* fChunkBuffer;
|
uint8_t* fChunkBuffer;
|
||||||
size_t fChunkBufferSize;
|
size_t fChunkBufferSize;
|
||||||
|
Loading…
Reference in New Issue
Block a user