Implemented audio track encoding. There is something wrong with the PTS
generation for the packets and how I set the time_base in the AVStream and AVStream->codec structures. This results in the audio streams of the written files to report a much too long duration. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@32064 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
33dda791af
commit
3ca4a7b1be
|
@ -34,9 +34,13 @@ AVCodecEncoder::AVCodecEncoder(uint32 codecID)
|
|||
Encoder(),
|
||||
fCodec(NULL),
|
||||
fContext(avcodec_alloc_context()),
|
||||
fCodecInitDone(false),
|
||||
|
||||
fFrame(avcodec_alloc_frame()),
|
||||
fSwsContext(NULL),
|
||||
fCodecInitDone(false),
|
||||
|
||||
fFramesWritten(0),
|
||||
|
||||
fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize])
|
||||
{
|
||||
TRACE("AVCodecEncoder::AVCodecEncoder()\n");
|
||||
|
@ -110,7 +114,13 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
|
|||
if (inputFormat == NULL)
|
||||
return B_BAD_VALUE;
|
||||
|
||||
if (fCodecInitDone) {
|
||||
fCodecInitDone = false;
|
||||
avcodec_close(fContext);
|
||||
}
|
||||
|
||||
fInputFormat = *inputFormat;
|
||||
fFramesWritten = 0;
|
||||
|
||||
if (fInputFormat.type == B_MEDIA_RAW_VIDEO) {
|
||||
// frame rate
|
||||
|
@ -167,6 +177,68 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
|
|||
PIX_FMT_RGB32, fContext->width, fContext->height,
|
||||
fContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
|
||||
|
||||
} else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) {
|
||||
// frame rate
|
||||
fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate;
|
||||
fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate;
|
||||
fContext->time_base.num = 1;
|
||||
// channels
|
||||
fContext->channels = fInputFormat.u.raw_audio.channel_count;
|
||||
switch (fInputFormat.u.raw_audio.format) {
|
||||
case media_raw_audio_format::B_AUDIO_FLOAT:
|
||||
fContext->sample_fmt = SAMPLE_FMT_FLT;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_DOUBLE:
|
||||
fContext->sample_fmt = SAMPLE_FMT_DBL;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_INT:
|
||||
fContext->sample_fmt = SAMPLE_FMT_S32;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_SHORT:
|
||||
fContext->sample_fmt = SAMPLE_FMT_S16;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_UCHAR:
|
||||
fContext->sample_fmt = SAMPLE_FMT_U8;
|
||||
break;
|
||||
|
||||
case media_raw_audio_format::B_AUDIO_CHAR:
|
||||
default:
|
||||
return B_MEDIA_BAD_FORMAT;
|
||||
break;
|
||||
}
|
||||
if (fInputFormat.u.raw_audio.channel_mask == 0) {
|
||||
// guess the channel mask...
|
||||
switch (fInputFormat.u.raw_audio.channel_count) {
|
||||
default:
|
||||
case 2:
|
||||
fContext->channel_layout = CH_LAYOUT_STEREO;
|
||||
break;
|
||||
case 1:
|
||||
fContext->channel_layout = CH_LAYOUT_MONO;
|
||||
break;
|
||||
case 3:
|
||||
fContext->channel_layout = CH_LAYOUT_SURROUND;
|
||||
break;
|
||||
case 4:
|
||||
fContext->channel_layout = CH_LAYOUT_QUAD;
|
||||
break;
|
||||
case 5:
|
||||
fContext->channel_layout = CH_LAYOUT_5POINT0;
|
||||
break;
|
||||
case 6:
|
||||
fContext->channel_layout = CH_LAYOUT_5POINT1;
|
||||
break;
|
||||
case 8:
|
||||
fContext->channel_layout = CH_LAYOUT_7POINT1;
|
||||
break;
|
||||
case 10:
|
||||
fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// The bits match 1:1 for media_multi_channels and FFmpeg defines.
|
||||
fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask;
|
||||
}
|
||||
} else {
|
||||
return B_NOT_SUPPORTED;
|
||||
}
|
||||
|
@ -221,13 +293,80 @@ AVCodecEncoder::Encode(const void* buffer, int64 frameCount,
|
|||
|
||||
|
||||
status_t
|
||||
AVCodecEncoder::_EncodeAudio(const void* buffer, int64 frameCount,
|
||||
AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount,
|
||||
media_encode_info* info)
|
||||
{
|
||||
TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", buffer, frameCount,
|
||||
TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount,
|
||||
info);
|
||||
|
||||
return B_NOT_SUPPORTED;
|
||||
if (fChunkBuffer == NULL)
|
||||
return B_NO_MEMORY;
|
||||
|
||||
status_t ret = B_OK;
|
||||
|
||||
const uint8* buffer = reinterpret_cast<const uint8*>(_buffer);
|
||||
|
||||
size_t inputSampleSize = fInputFormat.u.raw_audio.format
|
||||
& media_raw_audio_format::B_AUDIO_SIZE_MASK;
|
||||
size_t inputFrameSize = inputSampleSize
|
||||
* fInputFormat.u.raw_audio.channel_count;
|
||||
|
||||
size_t outSampleSize = av_get_bits_per_sample_format(
|
||||
fContext->sample_fmt) / 8;
|
||||
size_t outSize = outSampleSize * fContext->channels;
|
||||
TRACE(" sampleSize: %ld/%ld, frameSize: %ld/%ld\n",
|
||||
inputSampleSize, inputFrameSize, outSampleSize, outSize);
|
||||
|
||||
size_t bufferSize = frameCount * inputFrameSize;
|
||||
bufferSize = min_c(bufferSize, kDefaultChunkBufferSize);
|
||||
|
||||
while (frameCount > 0) {
|
||||
if (frameCount < fContext->frame_size) {
|
||||
TRACE(" ERROR: too few frames left! (left: %lld, needed: %d)\n",
|
||||
frameCount, fContext->frame_size);
|
||||
// TODO: Handle this some way. Maybe use an av_fifo to buffer data?
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
int chunkFrames = fContext->frame_size;
|
||||
|
||||
TRACE(" frames left: %lld, chunk frames: %d\n",
|
||||
frameCount, chunkFrames);
|
||||
|
||||
// Encode one audio chunk/frame.
|
||||
int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer,
|
||||
bufferSize, reinterpret_cast<const short*>(buffer));
|
||||
|
||||
if (usedBytes < 0) {
|
||||
TRACE(" avcodec_encode_video() failed: %d\n", usedBytes);
|
||||
return B_ERROR;
|
||||
}
|
||||
|
||||
// Setup media_encode_info, most important is the time stamp.
|
||||
info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
|
||||
/ fInputFormat.u.raw_audio.frame_rate);
|
||||
|
||||
// Write the chunk
|
||||
ret = WriteChunk(fChunkBuffer, usedBytes, info);
|
||||
if (ret != B_OK)
|
||||
break;
|
||||
|
||||
size_t framesWritten = usedBytes / inputFrameSize;
|
||||
if (chunkFrames == 1) {
|
||||
// For PCM data:
|
||||
framesWritten = usedBytes / inputFrameSize;
|
||||
} else {
|
||||
// For encoded audio:
|
||||
framesWritten = chunkFrames * inputFrameSize;
|
||||
}
|
||||
|
||||
// Skip to next chunk of buffer.
|
||||
fFramesWritten += framesWritten;
|
||||
frameCount -= framesWritten;
|
||||
buffer += usedBytes;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
@ -268,6 +407,10 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
|
|||
return B_ERROR;
|
||||
}
|
||||
|
||||
// Setup media_encode_info, most important is the time stamp.
|
||||
info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
|
||||
/ fInputFormat.u.raw_video.field_rate);
|
||||
|
||||
// Write the chunk
|
||||
ret = WriteChunk(fChunkBuffer, usedBytes, info);
|
||||
if (ret != B_OK)
|
||||
|
@ -276,6 +419,7 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
|
|||
// Skip to the next frame (but usually, there is only one to encode
|
||||
// for video).
|
||||
frameCount--;
|
||||
fFramesWritten++;
|
||||
buffer = (const void*)((const uint8*)buffer + bufferSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -52,14 +52,14 @@ private:
|
|||
// TODO: Refactor common base class from AVCodec[De|En]Coder!
|
||||
AVCodec* fCodec;
|
||||
AVCodecContext* fContext;
|
||||
bool fCodecInitDone;
|
||||
|
||||
AVPicture fSrcFrame;
|
||||
AVPicture fDstFrame;
|
||||
AVFrame* fFrame;
|
||||
SwsContext* fSwsContext;
|
||||
|
||||
uint32 fAVCodecID;
|
||||
|
||||
bool fCodecInitDone;
|
||||
int64 fFramesWritten;
|
||||
|
||||
uint8* fChunkBuffer;
|
||||
};
|
||||
|
|
|
@ -26,7 +26,7 @@ extern "C" {
|
|||
#include "gfx_util.h"
|
||||
|
||||
|
||||
//#define TRACE_AVFORMAT_READER
|
||||
#define TRACE_AVFORMAT_READER
|
||||
#ifdef TRACE_AVFORMAT_READER
|
||||
# define TRACE printf
|
||||
# define TRACE_IO(a...)
|
||||
|
@ -674,7 +674,7 @@ AVFormatReader::StreamCookie::GetStreamInfo(int64* frameCount,
|
|||
*duration = (bigtime_t)(1000000LL * fStream->duration
|
||||
* fStream->time_base.num / fStream->time_base.den);
|
||||
TRACE(" stream duration: %lld, time_base %.4f (%d/%d)\n",
|
||||
*duration, av_q2d(fStream->time_base),
|
||||
fStream->duration, av_q2d(fStream->time_base),
|
||||
fStream->time_base.num, fStream->time_base.den);
|
||||
} else if ((int64)fContext->duration != kNoPTSValue) {
|
||||
*duration = (bigtime_t)(1000000LL * fContext->duration / AV_TIME_BASE);
|
||||
|
@ -844,6 +844,8 @@ AVFormatReader::StreamCookie::GetNextChunk(const void** chunkBuffer,
|
|||
mediaHeader->destination = -1;
|
||||
mediaHeader->time_source = -1;
|
||||
mediaHeader->size_used = fPacket.size;
|
||||
//TRACE(" PTS: %lld (time_base.num: %d, .den: %d)\n",
|
||||
//fPacket.pts, fStream->time_base.num, fStream->time_base.den);
|
||||
mediaHeader->start_time = (bigtime_t)(1000000.0 * fPacket.pts
|
||||
/ av_q2d(fStream->time_base));
|
||||
mediaHeader->file_pos = fPacket.pos;
|
||||
|
|
|
@ -30,7 +30,7 @@ extern "C" {
|
|||
#ifdef TRACE_AVFORMAT_WRITER
|
||||
# define TRACE printf
|
||||
# define TRACE_IO(a...)
|
||||
# define TRACE_PACKET(a...)
|
||||
# define TRACE_PACKET printf
|
||||
#else
|
||||
# define TRACE(a...)
|
||||
# define TRACE_IO(a...)
|
||||
|
@ -75,6 +75,7 @@ private:
|
|||
// Since different threads may write to the target,
|
||||
// we need to protect the file position and I/O by a lock.
|
||||
BLocker* fStreamLock;
|
||||
int64 fChunksWritten;
|
||||
};
|
||||
|
||||
|
||||
|
@ -84,7 +85,8 @@ AVFormatWriter::StreamCookie::StreamCookie(AVFormatContext* context,
|
|||
:
|
||||
fContext(context),
|
||||
fStream(NULL),
|
||||
fStreamLock(streamLock)
|
||||
fStreamLock(streamLock),
|
||||
fChunksWritten(0)
|
||||
{
|
||||
av_new_packet(&fPacket, 0);
|
||||
}
|
||||
|
@ -118,6 +120,10 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
|
|||
// frame rate
|
||||
fStream->codec->time_base.den = (int)format->u.raw_video.field_rate;
|
||||
fStream->codec->time_base.num = 1;
|
||||
fStream->r_frame_rate.den = (int)format->u.raw_video.field_rate;
|
||||
fStream->r_frame_rate.num = 1;
|
||||
fStream->time_base.den = (int)format->u.raw_video.field_rate;
|
||||
fStream->time_base.num = 1;
|
||||
// video size
|
||||
fStream->codec->width = format->u.raw_video.display.line_width;
|
||||
fStream->codec->height = format->u.raw_video.display.line_count;
|
||||
|
@ -138,8 +144,74 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
|
|||
fStream->codec->pix_fmt = PIX_FMT_YUV420P;
|
||||
} else if (format->type == B_MEDIA_RAW_AUDIO) {
|
||||
avcodec_get_context_defaults2(fStream->codec, CODEC_TYPE_AUDIO);
|
||||
// TODO: ...
|
||||
// channels
|
||||
fStream->codec->channels = format->u.raw_audio.channel_count;
|
||||
switch (format->u.raw_audio.format) {
|
||||
case media_raw_audio_format::B_AUDIO_FLOAT:
|
||||
fStream->codec->sample_fmt = SAMPLE_FMT_FLT;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_DOUBLE:
|
||||
fStream->codec->sample_fmt = SAMPLE_FMT_DBL;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_INT:
|
||||
fStream->codec->sample_fmt = SAMPLE_FMT_S32;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_SHORT:
|
||||
fStream->codec->sample_fmt = SAMPLE_FMT_S16;
|
||||
break;
|
||||
case media_raw_audio_format::B_AUDIO_UCHAR:
|
||||
fStream->codec->sample_fmt = SAMPLE_FMT_U8;
|
||||
break;
|
||||
|
||||
case media_raw_audio_format::B_AUDIO_CHAR:
|
||||
default:
|
||||
return B_MEDIA_BAD_FORMAT;
|
||||
break;
|
||||
}
|
||||
if (format->u.raw_audio.channel_mask == 0) {
|
||||
// guess the channel mask...
|
||||
switch (format->u.raw_audio.channel_count) {
|
||||
default:
|
||||
case 2:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_STEREO;
|
||||
break;
|
||||
case 1:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_MONO;
|
||||
break;
|
||||
case 3:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_SURROUND;
|
||||
break;
|
||||
case 4:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_QUAD;
|
||||
break;
|
||||
case 5:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_5POINT0;
|
||||
break;
|
||||
case 6:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_5POINT1;
|
||||
break;
|
||||
case 8:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_7POINT1;
|
||||
break;
|
||||
case 10:
|
||||
fStream->codec->channel_layout = CH_LAYOUT_7POINT1_WIDE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// The bits match 1:1 for media_multi_channels and FFmpeg defines.
|
||||
fStream->codec->channel_layout = format->u.raw_audio.channel_mask;
|
||||
}
|
||||
// frame rate
|
||||
fStream->codec->sample_rate = (int)format->u.raw_audio.frame_rate;
|
||||
fStream->codec->time_base.den = (int)format->u.raw_audio.frame_rate;
|
||||
fStream->codec->time_base.num = 1;
|
||||
fStream->time_base.den = (int)format->u.raw_audio.frame_rate;
|
||||
fStream->time_base.num = 1;
|
||||
}
|
||||
|
||||
TRACE(" stream->time_base: (%d/%d), codec->time_base: (%d/%d))\n",
|
||||
fStream->time_base.num, fStream->time_base.den,
|
||||
fStream->codec->time_base.num, fStream->codec->time_base.den);
|
||||
|
||||
// TODO: This is a hack for now! Use avcodec_find_encoder_by_name()
|
||||
// or something similar...
|
||||
|
@ -153,8 +225,8 @@ status_t
|
|||
AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
|
||||
size_t chunkSize, media_encode_info* encodeInfo)
|
||||
{
|
||||
TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld)\n",
|
||||
chunkBuffer, chunkSize);
|
||||
TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld, "
|
||||
"start_time: %lld)\n", chunkBuffer, chunkSize, encodeInfo->start_time);
|
||||
|
||||
BAutolock _(fStreamLock);
|
||||
|
||||
|
@ -164,6 +236,19 @@ AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
|
|||
fPacket.data = const_cast<uint8_t*>((const uint8_t*)chunkBuffer);
|
||||
fPacket.size = chunkSize;
|
||||
|
||||
fPacket.pts = (encodeInfo->start_time
|
||||
* fStream->time_base.den / fStream->time_base.num) / 1000000;
|
||||
TRACE_PACKET(" PTS: %lld (stream->time_base: (%d/%d), "
|
||||
"codec->time_base: (%d/%d))\n", fPacket.pts,
|
||||
fStream->time_base.num, fStream->time_base.den,
|
||||
fStream->codec->time_base.num, fStream->codec->time_base.den);
|
||||
|
||||
// From ffmpeg.c::do_audio_out():
|
||||
// if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
|
||||
// fPacket.pts = av_rescale_q(enc->coded_frame->pts,
|
||||
// enc->time_base, ost->st->time_base);
|
||||
|
||||
|
||||
#if 0
|
||||
// TODO: Eventually, we need to write interleaved packets, but
|
||||
// maybe we are only supposed to use this if we have actually
|
||||
|
@ -280,12 +365,27 @@ AVFormatWriter::CommitHeader()
|
|||
if (fHeaderWritten)
|
||||
return B_NOT_ALLOWED;
|
||||
|
||||
for (unsigned i = 0; i < fContext->nb_streams; i++) {
|
||||
AVStream* stream = fContext->streams[i];
|
||||
TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
|
||||
i, stream->time_base.num, stream->time_base.den,
|
||||
stream->codec->time_base.num, stream->codec->time_base.den);
|
||||
}
|
||||
|
||||
int result = av_write_header(fContext);
|
||||
if (result < 0)
|
||||
TRACE(" av_write_header(): %d\n", result);
|
||||
else
|
||||
fHeaderWritten = true;
|
||||
|
||||
TRACE(" wrote header\n");
|
||||
for (unsigned i = 0; i < fContext->nb_streams; i++) {
|
||||
AVStream* stream = fContext->streams[i];
|
||||
TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
|
||||
i, stream->time_base.num, stream->time_base.den,
|
||||
stream->codec->time_base.num, stream->codec->time_base.den);
|
||||
}
|
||||
|
||||
return result == 0 ? B_OK : B_ERROR;
|
||||
}
|
||||
|
||||
|
@ -383,8 +483,8 @@ status_t
|
|||
AVFormatWriter::WriteChunk(void* _cookie, const void* chunkBuffer,
|
||||
size_t chunkSize, media_encode_info* encodeInfo)
|
||||
{
|
||||
TRACE("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, chunkSize,
|
||||
encodeInfo);
|
||||
TRACE_PACKET("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer,
|
||||
chunkSize, encodeInfo);
|
||||
|
||||
StreamCookie* cookie = reinterpret_cast<StreamCookie*>(_cookie);
|
||||
return cookie->WriteChunk(chunkBuffer, chunkSize, encodeInfo);
|
||||
|
|
|
@ -48,18 +48,18 @@ const EncoderDescription gEncoderTable[] = {
|
|||
B_MEDIA_RAW_VIDEO,
|
||||
B_MEDIA_ENCODED_VIDEO
|
||||
},
|
||||
// {
|
||||
// {
|
||||
// "MP3 Audio",
|
||||
// "mp3",
|
||||
// 0,
|
||||
// CODEC_ID_MP3,
|
||||
// { 0 }
|
||||
// },
|
||||
// B_ANY_FORMAT_FAMILY,
|
||||
// B_MEDIA_RAW_AUDIO,
|
||||
// B_MEDIA_ENCODED_AUDIO
|
||||
// }
|
||||
{
|
||||
{
|
||||
"Raw Audio",
|
||||
"pcm",
|
||||
0,
|
||||
CODEC_ID_PCM_S16LE,
|
||||
{ 0 }
|
||||
},
|
||||
B_ANY_FORMAT_FAMILY,
|
||||
B_MEDIA_RAW_AUDIO,
|
||||
B_MEDIA_ENCODED_AUDIO
|
||||
}
|
||||
};
|
||||
|
||||
const size_t gEncoderCount = sizeof(gEncoderTable) / sizeof(EncoderDescription);
|
||||
|
|
Loading…
Reference in New Issue