Implemented audio track encoding. There is something wrong with the PTS

generation for the packets and how I set the time_base in the AVStream and
AVStream->codec structures. This results in the audio streams of the written
files to report a much too long duration.


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@32064 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Stephan Aßmus 2009-08-03 09:35:30 +00:00
parent 33dda791af
commit 3ca4a7b1be
5 changed files with 274 additions and 28 deletions

View File

@ -34,9 +34,13 @@ AVCodecEncoder::AVCodecEncoder(uint32 codecID)
Encoder(),
fCodec(NULL),
fContext(avcodec_alloc_context()),
fCodecInitDone(false),
fFrame(avcodec_alloc_frame()),
fSwsContext(NULL),
fCodecInitDone(false),
fFramesWritten(0),
fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize])
{
TRACE("AVCodecEncoder::AVCodecEncoder()\n");
@ -110,7 +114,13 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
if (inputFormat == NULL)
return B_BAD_VALUE;
if (fCodecInitDone) {
fCodecInitDone = false;
avcodec_close(fContext);
}
fInputFormat = *inputFormat;
fFramesWritten = 0;
if (fInputFormat.type == B_MEDIA_RAW_VIDEO) {
// frame rate
@ -167,6 +177,68 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
PIX_FMT_RGB32, fContext->width, fContext->height,
fContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
} else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) {
// frame rate
fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate;
fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate;
fContext->time_base.num = 1;
// channels
fContext->channels = fInputFormat.u.raw_audio.channel_count;
switch (fInputFormat.u.raw_audio.format) {
case media_raw_audio_format::B_AUDIO_FLOAT:
fContext->sample_fmt = SAMPLE_FMT_FLT;
break;
case media_raw_audio_format::B_AUDIO_DOUBLE:
fContext->sample_fmt = SAMPLE_FMT_DBL;
break;
case media_raw_audio_format::B_AUDIO_INT:
fContext->sample_fmt = SAMPLE_FMT_S32;
break;
case media_raw_audio_format::B_AUDIO_SHORT:
fContext->sample_fmt = SAMPLE_FMT_S16;
break;
case media_raw_audio_format::B_AUDIO_UCHAR:
fContext->sample_fmt = SAMPLE_FMT_U8;
break;
case media_raw_audio_format::B_AUDIO_CHAR:
default:
return B_MEDIA_BAD_FORMAT;
break;
}
if (fInputFormat.u.raw_audio.channel_mask == 0) {
// guess the channel mask...
switch (fInputFormat.u.raw_audio.channel_count) {
default:
case 2:
fContext->channel_layout = CH_LAYOUT_STEREO;
break;
case 1:
fContext->channel_layout = CH_LAYOUT_MONO;
break;
case 3:
fContext->channel_layout = CH_LAYOUT_SURROUND;
break;
case 4:
fContext->channel_layout = CH_LAYOUT_QUAD;
break;
case 5:
fContext->channel_layout = CH_LAYOUT_5POINT0;
break;
case 6:
fContext->channel_layout = CH_LAYOUT_5POINT1;
break;
case 8:
fContext->channel_layout = CH_LAYOUT_7POINT1;
break;
case 10:
fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE;
break;
}
} else {
// The bits match 1:1 for media_multi_channels and FFmpeg defines.
fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask;
}
} else {
return B_NOT_SUPPORTED;
}
@ -221,13 +293,80 @@ AVCodecEncoder::Encode(const void* buffer, int64 frameCount,
status_t
AVCodecEncoder::_EncodeAudio(const void* buffer, int64 frameCount,
AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount,
media_encode_info* info)
{
TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", buffer, frameCount,
TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount,
info);
return B_NOT_SUPPORTED;
if (fChunkBuffer == NULL)
return B_NO_MEMORY;
status_t ret = B_OK;
const uint8* buffer = reinterpret_cast<const uint8*>(_buffer);
size_t inputSampleSize = fInputFormat.u.raw_audio.format
& media_raw_audio_format::B_AUDIO_SIZE_MASK;
size_t inputFrameSize = inputSampleSize
* fInputFormat.u.raw_audio.channel_count;
size_t outSampleSize = av_get_bits_per_sample_format(
fContext->sample_fmt) / 8;
size_t outSize = outSampleSize * fContext->channels;
TRACE(" sampleSize: %ld/%ld, frameSize: %ld/%ld\n",
inputSampleSize, inputFrameSize, outSampleSize, outSize);
size_t bufferSize = frameCount * inputFrameSize;
bufferSize = min_c(bufferSize, kDefaultChunkBufferSize);
while (frameCount > 0) {
if (frameCount < fContext->frame_size) {
TRACE(" ERROR: too few frames left! (left: %lld, needed: %d)\n",
frameCount, fContext->frame_size);
// TODO: Handle this some way. Maybe use an av_fifo to buffer data?
return B_ERROR;
}
int chunkFrames = fContext->frame_size;
TRACE(" frames left: %lld, chunk frames: %d\n",
frameCount, chunkFrames);
// Encode one audio chunk/frame.
int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer,
bufferSize, reinterpret_cast<const short*>(buffer));
if (usedBytes < 0) {
TRACE(" avcodec_encode_video() failed: %d\n", usedBytes);
return B_ERROR;
}
// Setup media_encode_info, most important is the time stamp.
info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
/ fInputFormat.u.raw_audio.frame_rate);
// Write the chunk
ret = WriteChunk(fChunkBuffer, usedBytes, info);
if (ret != B_OK)
break;
size_t framesWritten = usedBytes / inputFrameSize;
if (chunkFrames == 1) {
// For PCM data:
framesWritten = usedBytes / inputFrameSize;
} else {
// For encoded audio:
framesWritten = chunkFrames * inputFrameSize;
}
// Skip to next chunk of buffer.
fFramesWritten += framesWritten;
frameCount -= framesWritten;
buffer += usedBytes;
}
return ret;
}
@ -268,6 +407,10 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
return B_ERROR;
}
// Setup media_encode_info, most important is the time stamp.
info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
/ fInputFormat.u.raw_video.field_rate);
// Write the chunk
ret = WriteChunk(fChunkBuffer, usedBytes, info);
if (ret != B_OK)
@ -276,6 +419,7 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
// Skip to the next frame (but usually, there is only one to encode
// for video).
frameCount--;
fFramesWritten++;
buffer = (const void*)((const uint8*)buffer + bufferSize);
}

View File

@ -52,14 +52,14 @@ private:
// TODO: Refactor common base class from AVCodec[De|En]Coder!
AVCodec* fCodec;
AVCodecContext* fContext;
bool fCodecInitDone;
AVPicture fSrcFrame;
AVPicture fDstFrame;
AVFrame* fFrame;
SwsContext* fSwsContext;
uint32 fAVCodecID;
bool fCodecInitDone;
int64 fFramesWritten;
uint8* fChunkBuffer;
};

View File

@ -26,7 +26,7 @@ extern "C" {
#include "gfx_util.h"
//#define TRACE_AVFORMAT_READER
#define TRACE_AVFORMAT_READER
#ifdef TRACE_AVFORMAT_READER
# define TRACE printf
# define TRACE_IO(a...)
@ -674,7 +674,7 @@ AVFormatReader::StreamCookie::GetStreamInfo(int64* frameCount,
*duration = (bigtime_t)(1000000LL * fStream->duration
* fStream->time_base.num / fStream->time_base.den);
TRACE(" stream duration: %lld, time_base %.4f (%d/%d)\n",
*duration, av_q2d(fStream->time_base),
fStream->duration, av_q2d(fStream->time_base),
fStream->time_base.num, fStream->time_base.den);
} else if ((int64)fContext->duration != kNoPTSValue) {
*duration = (bigtime_t)(1000000LL * fContext->duration / AV_TIME_BASE);
@ -844,6 +844,8 @@ AVFormatReader::StreamCookie::GetNextChunk(const void** chunkBuffer,
mediaHeader->destination = -1;
mediaHeader->time_source = -1;
mediaHeader->size_used = fPacket.size;
//TRACE(" PTS: %lld (time_base.num: %d, .den: %d)\n",
//fPacket.pts, fStream->time_base.num, fStream->time_base.den);
mediaHeader->start_time = (bigtime_t)(1000000.0 * fPacket.pts
/ av_q2d(fStream->time_base));
mediaHeader->file_pos = fPacket.pos;

View File

@ -30,7 +30,7 @@ extern "C" {
#ifdef TRACE_AVFORMAT_WRITER
# define TRACE printf
# define TRACE_IO(a...)
# define TRACE_PACKET(a...)
# define TRACE_PACKET printf
#else
# define TRACE(a...)
# define TRACE_IO(a...)
@ -75,6 +75,7 @@ private:
// Since different threads may write to the target,
// we need to protect the file position and I/O by a lock.
BLocker* fStreamLock;
int64 fChunksWritten;
};
@ -84,7 +85,8 @@ AVFormatWriter::StreamCookie::StreamCookie(AVFormatContext* context,
:
fContext(context),
fStream(NULL),
fStreamLock(streamLock)
fStreamLock(streamLock),
fChunksWritten(0)
{
av_new_packet(&fPacket, 0);
}
@ -118,6 +120,10 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
// frame rate
fStream->codec->time_base.den = (int)format->u.raw_video.field_rate;
fStream->codec->time_base.num = 1;
fStream->r_frame_rate.den = (int)format->u.raw_video.field_rate;
fStream->r_frame_rate.num = 1;
fStream->time_base.den = (int)format->u.raw_video.field_rate;
fStream->time_base.num = 1;
// video size
fStream->codec->width = format->u.raw_video.display.line_width;
fStream->codec->height = format->u.raw_video.display.line_count;
@ -138,8 +144,74 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
fStream->codec->pix_fmt = PIX_FMT_YUV420P;
} else if (format->type == B_MEDIA_RAW_AUDIO) {
avcodec_get_context_defaults2(fStream->codec, CODEC_TYPE_AUDIO);
// TODO: ...
// channels
fStream->codec->channels = format->u.raw_audio.channel_count;
switch (format->u.raw_audio.format) {
case media_raw_audio_format::B_AUDIO_FLOAT:
fStream->codec->sample_fmt = SAMPLE_FMT_FLT;
break;
case media_raw_audio_format::B_AUDIO_DOUBLE:
fStream->codec->sample_fmt = SAMPLE_FMT_DBL;
break;
case media_raw_audio_format::B_AUDIO_INT:
fStream->codec->sample_fmt = SAMPLE_FMT_S32;
break;
case media_raw_audio_format::B_AUDIO_SHORT:
fStream->codec->sample_fmt = SAMPLE_FMT_S16;
break;
case media_raw_audio_format::B_AUDIO_UCHAR:
fStream->codec->sample_fmt = SAMPLE_FMT_U8;
break;
case media_raw_audio_format::B_AUDIO_CHAR:
default:
return B_MEDIA_BAD_FORMAT;
break;
}
if (format->u.raw_audio.channel_mask == 0) {
// guess the channel mask...
switch (format->u.raw_audio.channel_count) {
default:
case 2:
fStream->codec->channel_layout = CH_LAYOUT_STEREO;
break;
case 1:
fStream->codec->channel_layout = CH_LAYOUT_MONO;
break;
case 3:
fStream->codec->channel_layout = CH_LAYOUT_SURROUND;
break;
case 4:
fStream->codec->channel_layout = CH_LAYOUT_QUAD;
break;
case 5:
fStream->codec->channel_layout = CH_LAYOUT_5POINT0;
break;
case 6:
fStream->codec->channel_layout = CH_LAYOUT_5POINT1;
break;
case 8:
fStream->codec->channel_layout = CH_LAYOUT_7POINT1;
break;
case 10:
fStream->codec->channel_layout = CH_LAYOUT_7POINT1_WIDE;
break;
}
} else {
// The bits match 1:1 for media_multi_channels and FFmpeg defines.
fStream->codec->channel_layout = format->u.raw_audio.channel_mask;
}
// frame rate
fStream->codec->sample_rate = (int)format->u.raw_audio.frame_rate;
fStream->codec->time_base.den = (int)format->u.raw_audio.frame_rate;
fStream->codec->time_base.num = 1;
fStream->time_base.den = (int)format->u.raw_audio.frame_rate;
fStream->time_base.num = 1;
}
TRACE(" stream->time_base: (%d/%d), codec->time_base: (%d/%d))\n",
fStream->time_base.num, fStream->time_base.den,
fStream->codec->time_base.num, fStream->codec->time_base.den);
// TODO: This is a hack for now! Use avcodec_find_encoder_by_name()
// or something similar...
@ -153,8 +225,8 @@ status_t
AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
size_t chunkSize, media_encode_info* encodeInfo)
{
TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld)\n",
chunkBuffer, chunkSize);
TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld, "
"start_time: %lld)\n", chunkBuffer, chunkSize, encodeInfo->start_time);
BAutolock _(fStreamLock);
@ -164,6 +236,19 @@ AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
fPacket.data = const_cast<uint8_t*>((const uint8_t*)chunkBuffer);
fPacket.size = chunkSize;
fPacket.pts = (encodeInfo->start_time
* fStream->time_base.den / fStream->time_base.num) / 1000000;
TRACE_PACKET(" PTS: %lld (stream->time_base: (%d/%d), "
"codec->time_base: (%d/%d))\n", fPacket.pts,
fStream->time_base.num, fStream->time_base.den,
fStream->codec->time_base.num, fStream->codec->time_base.den);
// From ffmpeg.c::do_audio_out():
// if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
// fPacket.pts = av_rescale_q(enc->coded_frame->pts,
// enc->time_base, ost->st->time_base);
#if 0
// TODO: Eventually, we need to write interleaved packets, but
// maybe we are only supposed to use this if we have actually
@ -280,12 +365,27 @@ AVFormatWriter::CommitHeader()
if (fHeaderWritten)
return B_NOT_ALLOWED;
for (unsigned i = 0; i < fContext->nb_streams; i++) {
AVStream* stream = fContext->streams[i];
TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
i, stream->time_base.num, stream->time_base.den,
stream->codec->time_base.num, stream->codec->time_base.den);
}
int result = av_write_header(fContext);
if (result < 0)
TRACE(" av_write_header(): %d\n", result);
else
fHeaderWritten = true;
TRACE(" wrote header\n");
for (unsigned i = 0; i < fContext->nb_streams; i++) {
AVStream* stream = fContext->streams[i];
TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
i, stream->time_base.num, stream->time_base.den,
stream->codec->time_base.num, stream->codec->time_base.den);
}
return result == 0 ? B_OK : B_ERROR;
}
@ -383,8 +483,8 @@ status_t
AVFormatWriter::WriteChunk(void* _cookie, const void* chunkBuffer,
size_t chunkSize, media_encode_info* encodeInfo)
{
TRACE("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, chunkSize,
encodeInfo);
TRACE_PACKET("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer,
chunkSize, encodeInfo);
StreamCookie* cookie = reinterpret_cast<StreamCookie*>(_cookie);
return cookie->WriteChunk(chunkBuffer, chunkSize, encodeInfo);

View File

@ -48,18 +48,18 @@ const EncoderDescription gEncoderTable[] = {
B_MEDIA_RAW_VIDEO,
B_MEDIA_ENCODED_VIDEO
},
// {
// {
// "MP3 Audio",
// "mp3",
// 0,
// CODEC_ID_MP3,
// { 0 }
// },
// B_ANY_FORMAT_FAMILY,
// B_MEDIA_RAW_AUDIO,
// B_MEDIA_ENCODED_AUDIO
// }
{
{
"Raw Audio",
"pcm",
0,
CODEC_ID_PCM_S16LE,
{ 0 }
},
B_ANY_FORMAT_FAMILY,
B_MEDIA_RAW_AUDIO,
B_MEDIA_ENCODED_AUDIO
}
};
const size_t gEncoderCount = sizeof(gEncoderTable) / sizeof(EncoderDescription);