FFMPEG Plugin: Automatic detection of audio decoding parameters.

- Kudos to Marcus Overhagen for laying out the general idea of automatic
  detection by sharing some of his dvb code examples with me.
- Automatically detect the audio frame rate, channel count and sample format.
- Share audio sample format conversion code between AVFormatReader and
  AVCodecDecoder.
- Tested with several video and audio files via MediaPlayer.
- Tested also with test case mp3_decoder_test -after- removing the hard coded
  audio decoding parameters. Although the test shows that auto detection is
  working (via stepping through the auto detection code path) the complete test
  is still failing, due to missing implementation of incomplete audio frame
  decoding.
- Add and update the documentation accordingly.
This commit is contained in:
Colin Günther 2014-08-24 00:08:17 +02:00
parent 1a963de4e0
commit ffb0f5db8e
4 changed files with 181 additions and 104 deletions

View File

@ -339,43 +339,23 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
{
TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n");
media_multi_audio_format outputAudioFormat;
outputAudioFormat = media_raw_audio_format::wildcard;
outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN;
outputAudioFormat.frame_rate
= fInputFormat.u.encoded_audio.output.frame_rate;
outputAudioFormat.channel_count
= fInputFormat.u.encoded_audio.output.channel_count;
outputAudioFormat.format = fInputFormat.u.encoded_audio.output.format;
outputAudioFormat.buffer_size
= inOutFormat->u.raw_audio.buffer_size;
// Check that format is not still a wild card!
if (outputAudioFormat.format == 0) {
TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n");
outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT;
}
size_t sampleSize = outputAudioFormat.format
& media_raw_audio_format::B_AUDIO_SIZE_MASK;
// Check that channel count is not still a wild card!
if (outputAudioFormat.channel_count == 0) {
TRACE(" channel_count still a wild-card, assuming stereo.\n");
outputAudioFormat.channel_count = 2;
}
if (outputAudioFormat.buffer_size == 0) {
outputAudioFormat.buffer_size = 512
* sampleSize * outputAudioFormat.channel_count;
}
inOutFormat->type = B_MEDIA_RAW_AUDIO;
inOutFormat->u.raw_audio = outputAudioFormat;
fContext->bit_rate = (int)fInputFormat.u.encoded_audio.bit_rate;
fContext->frame_size = (int)fInputFormat.u.encoded_audio.frame_size;
ConvertRawAudioFormatToAVSampleFormat(
fInputFormat.u.encoded_audio.output.format, fContext->sample_fmt);
fContext->bit_rate
= static_cast<int>(fInputFormat.u.encoded_audio.bit_rate);
fContext->frame_size
= static_cast<int>(fInputFormat.u.encoded_audio.frame_size);
fContext->sample_rate
= (int)fInputFormat.u.encoded_audio.output.frame_rate;
fContext->channels = outputAudioFormat.channel_count;
= static_cast<int>(fInputFormat.u.encoded_audio.output.frame_rate);
fContext->channels
= static_cast<int>(fInputFormat.u.encoded_audio.output.channel_count);
// Check that channel count is not still a wild card!
if (fContext->channels == 0) {
TRACE(" channel_count still a wild-card, assuming stereo.\n");
fContext->channels = 2;
}
fContext->block_align = fBlockAlign;
fContext->extradata = (uint8_t*)fExtraData;
fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData);
fContext->extradata_size = fExtraDataSize;
// TODO: This probably needs to go away, there is some misconception
@ -385,7 +365,8 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
// the infoBuffer passed to GetStreamInfo(). I think this may be why
// the code below was added.
if (fInputFormat.MetaDataSize() > 0) {
fContext->extradata = (uint8_t*)fInputFormat.MetaData();
fContext->extradata = static_cast<uint8_t*>(
const_cast<void*>(fInputFormat.MetaData()));
fContext->extradata_size = fInputFormat.MetaDataSize();
}
@ -399,18 +380,12 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
avcodec_close(fContext);
}
// open new
int result = avcodec_open2(fContext, fCodec, NULL);
fCodecInitDone = (result >= 0);
fOutputFrameSize = sampleSize * outputAudioFormat.channel_count;
fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize;
fOutputFrameRate = outputAudioFormat.frame_rate;
TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, init = %d, "
"output frame size: %d, count: %ld, rate: %.2f\n",
fContext->bit_rate, fContext->sample_rate, fContext->channels,
result, fOutputFrameSize, fOutputFrameCount, fOutputFrameRate);
if (avcodec_open2(fContext, fCodec, NULL) >= 0)
fCodecInitDone = true;
else {
TRACE("avcodec_open() failed to init codec!\n");
return B_ERROR;
}
fChunkBuffer = NULL;
fChunkBufferSize = 0;
@ -418,20 +393,54 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
fDecodedDataBufferOffset = 0;
fDecodedDataBufferSize = 0;
_ResetTempPacket();
status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk();
if (statusOfDecodingFirstFrameChunk != B_OK) {
TRACE("[a] decoding first audio frame chunk failed\n");
return B_ERROR;
}
media_multi_audio_format outputAudioFormat;
outputAudioFormat = media_raw_audio_format::wildcard;
outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN;
outputAudioFormat.frame_rate = fContext->sample_rate;
outputAudioFormat.channel_count = fContext->channels;
ConvertAVSampleFormatToRawAudioFormat(fContext->sample_fmt,
outputAudioFormat.format);
// Check that format is not still a wild card!
if (outputAudioFormat.format == 0) {
TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n");
outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT;
}
outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size;
// Check that buffer_size has a sane value
size_t sampleSize = outputAudioFormat.format
& media_raw_audio_format::B_AUDIO_SIZE_MASK;
if (outputAudioFormat.buffer_size == 0) {
outputAudioFormat.buffer_size = 512 * sampleSize
* outputAudioFormat.channel_count;
}
inOutFormat->type = B_MEDIA_RAW_AUDIO;
inOutFormat->u.raw_audio = outputAudioFormat;
inOutFormat->require_flags = 0;
inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS;
// Initialize variables needed to manage decoding as much audio frames as
// needed to fill the buffer_size.
fOutputFrameSize = sampleSize * outputAudioFormat.channel_count;
fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize;
fOutputFrameRate = outputAudioFormat.frame_rate;
fRawDecodedAudio->opaque
= av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context));
if (fRawDecodedAudio->opaque == NULL)
return B_NO_MEMORY;
_ResetTempPacket();
inOutFormat->require_flags = 0;
inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS;
if (!fCodecInitDone) {
TRACE("avcodec_open() failed!\n");
return B_ERROR;
}
TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, init = %d, "
"output frame size: %d, count: %ld, rate: %.2f\n",
fContext->bit_rate, fContext->sample_rate, fContext->channels,
result, fOutputFrameSize, fOutputFrameCount, fOutputFrameRate);
return B_OK;
}
@ -449,10 +458,18 @@ AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
// This makes video formats play that encode the video properties in
// the video container (e.g. WMV) and not in the video frames
// themself (e.g. MPEG2).
// Note: Doing this step everytime is OK, because the first call to
// _DecodeNextVideoFrame() will update the essential video format
// Note: Doing this step unconditionally is OK, because the first call
// to _DecodeNextVideoFrame() will update the essential video format
// properties accordingly.
bool codecCanHandleIncompleteFrames
= (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0;
if (codecCanHandleIncompleteFrames) {
// Expect and handle video frames to be splitted across consecutive
// data chunks.
fContext->flags |= CODEC_FLAG_TRUNCATED;
}
// Make MediaPlayer happy (if not in rgb32 screen depth and no overlay,
// it will only ask for YCbCr, which DrawBitmap doesn't handle, so the
// default colordepth is RGB32).
@ -469,17 +486,6 @@ AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
fFormatConversionFunc = 0;
#endif
fContext->extradata = (uint8_t*)fExtraData;
fContext->extradata_size = fExtraDataSize;
bool codecCanHandleIncompleteFrames
= (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0;
if (codecCanHandleIncompleteFrames) {
// Expect and handle video frames to be splitted across consecutive
// data chunks.
fContext->flags |= CODEC_FLAG_TRUNCATED;
}
// close any previous instance
if (fCodecInitDone) {
fCodecInitDone = false;
@ -518,9 +524,7 @@ AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
inOutFormat->type = B_MEDIA_RAW_VIDEO;
inOutFormat->require_flags = 0;
inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS;
inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output;
inOutFormat->u.raw_video.interlace = 1;
// Progressive (non-interlaced) video frames are delivered
inOutFormat->u.raw_video.first_active = fHeader.u.raw_video.first_active_line;
@ -529,7 +533,6 @@ AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
inOutFormat->u.raw_video.pixel_height_aspect = fHeader.u.raw_video.pixel_height_aspect;
inOutFormat->u.raw_video.field_rate = fOutputFrameRate;
// Was calculated by first call to _DecodeNextVideoFrame()
inOutFormat->u.raw_video.display.format = fOutputColorSpace;
inOutFormat->u.raw_video.display.line_width = fHeader.u.raw_video.display_line_width;
inOutFormat->u.raw_video.display.line_count = fHeader.u.raw_video.display_line_count;
@ -654,8 +657,7 @@ AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount,
_DecodeNextAudioFrame() will then result in the return of
status code B_LAST_BUFFER_ERROR.
ii TODO: A change in the size of the audio frames.
3. TODO: make the following statement hold true, too:
fHeader is populated with the audio frame properties of the first
3. fHeader is populated with the audio frame properties of the first
audio frame in fDecodedData. Especially the start_time field of
fHeader relates to that first audio frame. Start times of
consecutive audio frames in fDecodedData have to be calculated
@ -686,6 +688,8 @@ AVCodecDecoder::_DecodeNextAudioFrame()
size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize;
fDecodedData
= static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData));
if (fDecodedData == NULL)
return B_NO_MEMORY;
}
_ResetRawDecodedAudio();
@ -1243,6 +1247,9 @@ AVCodecDecoder::_ApplyEssentialVideoContainerPropertiesToContext()
ConvertVideoFrameRateToAVCodecContext(containerProperties.field_rate,
*fContext);
}
fContext->extradata = (uint8_t*)fExtraData;
fContext->extradata_size = fExtraDataSize;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright 2009-2010, Stephan Aßmus <superstippi@gmx.de>
* Copyright 2014, Colin Günther <coling@gmx.de>
* All rights reserved. Distributed under the terms of the GNU L-GPL license.
*/
@ -49,22 +50,6 @@ extern "C" {
static const int64 kNoPTSValue = AV_NOPTS_VALUE;
static uint32
avformat_to_beos_format(SampleFormat format)
{
switch (format) {
case SAMPLE_FMT_U8: return media_raw_audio_format::B_AUDIO_UCHAR;
case SAMPLE_FMT_S16: return media_raw_audio_format::B_AUDIO_SHORT;
case SAMPLE_FMT_S32: return media_raw_audio_format::B_AUDIO_INT;
case SAMPLE_FMT_FLT: return media_raw_audio_format::B_AUDIO_FLOAT;
case SAMPLE_FMT_DBL: return media_raw_audio_format::B_AUDIO_DOUBLE;
default:
break;
}
return 0;
}
static uint32
avformat_to_beos_byte_order(SampleFormat format)
{
@ -1087,8 +1072,8 @@ AVFormatReader::Stream::Init(int32 virtualIndex)
format->u.raw_audio.channel_mask = codecContext->channel_layout;
format->u.raw_audio.byte_order
= avformat_to_beos_byte_order(codecContext->sample_fmt);
format->u.raw_audio.format
= avformat_to_beos_format(codecContext->sample_fmt);
ConvertAVSampleFormatToRawAudioFormat(codecContext->sample_fmt,
format->u.raw_audio.format);
format->u.raw_audio.buffer_size = 0;
// Read one packet and mark it for later re-use. (So our first
@ -1115,8 +1100,8 @@ AVFormatReader::Stream::Init(int32 virtualIndex)
= codecContext->channel_layout;
format->u.encoded_audio.output.byte_order
= avformat_to_beos_byte_order(codecContext->sample_fmt);
format->u.encoded_audio.output.format
= avformat_to_beos_format(codecContext->sample_fmt);
ConvertAVSampleFormatToRawAudioFormat(codecContext->sample_fmt,
format->u.encoded_audio.output.format);
if (codecContext->block_align > 0) {
format->u.encoded_audio.output.buffer_size
= codecContext->block_align;

View File

@ -201,7 +201,7 @@ ConvertAVCodecContextToVideoFrameRate(AVCodecContext& contextIn, float& frameRat
\param frameRateIn Contains Media Kits notation of the video frame rate
that will be converted into FFmpegs notation. Must be greater than
zero.
\param contextOut An AVCodecContext structure of FFmpeg.
\param contextOut An AVCodecContext structure of FFmpeg.
On output contains converted values in the following fields (other
fields stay as they were on input):
- AVCodecContext.time_base.num
@ -218,4 +218,97 @@ ConvertVideoFrameRateToAVCodecContext(float frameRateIn,
contextOut.time_base = av_d2q(1.0 / frameRateIn, 1024);
}
/*! \brief Converts the Media Kits notation of an audio sample format to
FFmpegs notation.
\see ConvertAVSampleFormatToRawAudioFormat() for converting in the other
direction.
\param rawAudioFormatIn Contains Media Kits notation of an audio sample
format that will be converted into FFmpegs notation.
\param sampleFormatOut On output contains FFmpegs notation of the passed
audio sample format. Might return AV_SAMPLE_FMT_NONE if there is no
conversion path.
*/
inline void
ConvertRawAudioFormatToAVSampleFormat(uint32 rawAudioFormatIn,
AVSampleFormat& sampleFormatOut)
{
switch(rawAudioFormatIn) {
case media_raw_audio_format::B_AUDIO_FLOAT:
sampleFormatOut = AV_SAMPLE_FMT_FLT;
return;
case media_raw_audio_format::B_AUDIO_DOUBLE:
sampleFormatOut = AV_SAMPLE_FMT_DBL;
return;
case media_raw_audio_format::B_AUDIO_INT:
sampleFormatOut = AV_SAMPLE_FMT_S32;
return;
case media_raw_audio_format::B_AUDIO_SHORT:
sampleFormatOut = AV_SAMPLE_FMT_S16;
return;
case media_raw_audio_format::B_AUDIO_UCHAR:
sampleFormatOut = AV_SAMPLE_FMT_U8;
return;
default:
// Silence compiler warnings about unhandled enumeration values.
break;
}
sampleFormatOut = AV_SAMPLE_FMT_NONE;
}
/*! \brief Converts FFmpegs notation of an audio sample format to the Media
Kits notation.
\see ConvertAVSampleFormatToRawAudioFormat() for converting in the other
direction.
\param sampleFormatIn Contains FFmpegs notation of an audio sample format
that will be converted into the Media Kits notation.
\param rawAudioFormatOut On output contains Media Kits notation of the
passed audio sample format. Might return 0 if there is no conversion
path.
*/
inline void
ConvertAVSampleFormatToRawAudioFormat(AVSampleFormat sampleFormatIn,
uint32& rawAudioFormatOut)
{
switch(sampleFormatIn) {
case AV_SAMPLE_FMT_FLT:
rawAudioFormatOut = media_raw_audio_format::B_AUDIO_FLOAT;
return;
case AV_SAMPLE_FMT_DBL:
rawAudioFormatOut = media_raw_audio_format::B_AUDIO_DOUBLE;
return;
case AV_SAMPLE_FMT_S32:
rawAudioFormatOut = media_raw_audio_format::B_AUDIO_INT;
return;
case AV_SAMPLE_FMT_S16:
rawAudioFormatOut = media_raw_audio_format::B_AUDIO_SHORT;
return;
case AV_SAMPLE_FMT_U8:
rawAudioFormatOut = media_raw_audio_format::B_AUDIO_UCHAR;
return;
default:
// Silence compiler warnings about unhandled enumeration values.
break;
}
const uint32 kBAudioNone = 0;
rawAudioFormatOut = kBAudioNone;
}
#endif // UTILITIES_H

View File

@ -247,14 +247,6 @@ CreateMp3MediaFormat()
return sNoMp3MediaFormat;
}
// TODO: The following code block can be removed, once the ffmpeg addon can
// determine the codec output parameters from the encoded data.
mp3MediaFormat->u.encoded_audio.output.frame_rate = 48000;
mp3MediaFormat->u.encoded_audio.output.channel_count = 2;
mp3MediaFormat->u.encoded_audio.output.buffer_size = 1024;
mp3MediaFormat->u.encoded_audio.output.format
= media_raw_audio_format::B_AUDIO_SHORT;
return mp3MediaFormat;
}