From 3ca4a7b1beb866ba584816103f66031d22b7c003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Mon, 3 Aug 2009 09:35:30 +0000 Subject: [PATCH] Implemented audio track encoding. There is something wrong with the PTS generation for the packets and how I set the time_base in the AVStream and AVStream->codec structures. This results in the audio streams of the written files to report a much too long duration. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@32064 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- .../media/plugins/ffmpeg/AVCodecEncoder.cpp | 152 +++++++++++++++++- .../media/plugins/ffmpeg/AVCodecEncoder.h | 6 +- .../media/plugins/ffmpeg/AVFormatReader.cpp | 6 +- .../media/plugins/ffmpeg/AVFormatWriter.cpp | 114 ++++++++++++- .../media/plugins/ffmpeg/EncoderTable.cpp | 24 +-- 5 files changed, 274 insertions(+), 28 deletions(-) diff --git a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp index b38036d359..d7680beb45 100644 --- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp +++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp @@ -34,9 +34,13 @@ AVCodecEncoder::AVCodecEncoder(uint32 codecID) Encoder(), fCodec(NULL), fContext(avcodec_alloc_context()), + fCodecInitDone(false), + fFrame(avcodec_alloc_frame()), fSwsContext(NULL), - fCodecInitDone(false), + + fFramesWritten(0), + fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize]) { TRACE("AVCodecEncoder::AVCodecEncoder()\n"); @@ -110,7 +114,13 @@ AVCodecEncoder::SetUp(const media_format* inputFormat) if (inputFormat == NULL) return B_BAD_VALUE; + if (fCodecInitDone) { + fCodecInitDone = false; + avcodec_close(fContext); + } + fInputFormat = *inputFormat; + fFramesWritten = 0; if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { // frame rate @@ -167,6 +177,68 @@ AVCodecEncoder::SetUp(const media_format* inputFormat) PIX_FMT_RGB32, fContext->width, fContext->height, fContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); + } else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { + // frame rate + fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate; + fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate; + fContext->time_base.num = 1; + // channels + fContext->channels = fInputFormat.u.raw_audio.channel_count; + switch (fInputFormat.u.raw_audio.format) { + case media_raw_audio_format::B_AUDIO_FLOAT: + fContext->sample_fmt = SAMPLE_FMT_FLT; + break; + case media_raw_audio_format::B_AUDIO_DOUBLE: + fContext->sample_fmt = SAMPLE_FMT_DBL; + break; + case media_raw_audio_format::B_AUDIO_INT: + fContext->sample_fmt = SAMPLE_FMT_S32; + break; + case media_raw_audio_format::B_AUDIO_SHORT: + fContext->sample_fmt = SAMPLE_FMT_S16; + break; + case media_raw_audio_format::B_AUDIO_UCHAR: + fContext->sample_fmt = SAMPLE_FMT_U8; + break; + + case media_raw_audio_format::B_AUDIO_CHAR: + default: + return B_MEDIA_BAD_FORMAT; + break; + } + if (fInputFormat.u.raw_audio.channel_mask == 0) { + // guess the channel mask... + switch (fInputFormat.u.raw_audio.channel_count) { + default: + case 2: + fContext->channel_layout = CH_LAYOUT_STEREO; + break; + case 1: + fContext->channel_layout = CH_LAYOUT_MONO; + break; + case 3: + fContext->channel_layout = CH_LAYOUT_SURROUND; + break; + case 4: + fContext->channel_layout = CH_LAYOUT_QUAD; + break; + case 5: + fContext->channel_layout = CH_LAYOUT_5POINT0; + break; + case 6: + fContext->channel_layout = CH_LAYOUT_5POINT1; + break; + case 8: + fContext->channel_layout = CH_LAYOUT_7POINT1; + break; + case 10: + fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE; + break; + } + } else { + // The bits match 1:1 for media_multi_channels and FFmpeg defines. + fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask; + } } else { return B_NOT_SUPPORTED; } @@ -221,13 +293,80 @@ AVCodecEncoder::Encode(const void* buffer, int64 frameCount, status_t -AVCodecEncoder::_EncodeAudio(const void* buffer, int64 frameCount, +AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount, media_encode_info* info) { - TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", buffer, frameCount, + TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount, info); - return B_NOT_SUPPORTED; + if (fChunkBuffer == NULL) + return B_NO_MEMORY; + + status_t ret = B_OK; + + const uint8* buffer = reinterpret_cast(_buffer); + + size_t inputSampleSize = fInputFormat.u.raw_audio.format + & media_raw_audio_format::B_AUDIO_SIZE_MASK; + size_t inputFrameSize = inputSampleSize + * fInputFormat.u.raw_audio.channel_count; + + size_t outSampleSize = av_get_bits_per_sample_format( + fContext->sample_fmt) / 8; + size_t outSize = outSampleSize * fContext->channels; + TRACE(" sampleSize: %ld/%ld, frameSize: %ld/%ld\n", + inputSampleSize, inputFrameSize, outSampleSize, outSize); + + size_t bufferSize = frameCount * inputFrameSize; + bufferSize = min_c(bufferSize, kDefaultChunkBufferSize); + + while (frameCount > 0) { + if (frameCount < fContext->frame_size) { + TRACE(" ERROR: too few frames left! (left: %lld, needed: %d)\n", + frameCount, fContext->frame_size); + // TODO: Handle this some way. Maybe use an av_fifo to buffer data? + return B_ERROR; + } + + int chunkFrames = fContext->frame_size; + + TRACE(" frames left: %lld, chunk frames: %d\n", + frameCount, chunkFrames); + + // Encode one audio chunk/frame. + int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer, + bufferSize, reinterpret_cast(buffer)); + + if (usedBytes < 0) { + TRACE(" avcodec_encode_video() failed: %d\n", usedBytes); + return B_ERROR; + } + + // Setup media_encode_info, most important is the time stamp. + info->start_time = (bigtime_t)(fFramesWritten * 1000000LL + / fInputFormat.u.raw_audio.frame_rate); + + // Write the chunk + ret = WriteChunk(fChunkBuffer, usedBytes, info); + if (ret != B_OK) + break; + + size_t framesWritten = usedBytes / inputFrameSize; + if (chunkFrames == 1) { + // For PCM data: + framesWritten = usedBytes / inputFrameSize; + } else { + // For encoded audio: + framesWritten = chunkFrames * inputFrameSize; + } + + // Skip to next chunk of buffer. + fFramesWritten += framesWritten; + frameCount -= framesWritten; + buffer += usedBytes; + } + + return ret; } @@ -268,6 +407,10 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount, return B_ERROR; } + // Setup media_encode_info, most important is the time stamp. + info->start_time = (bigtime_t)(fFramesWritten * 1000000LL + / fInputFormat.u.raw_video.field_rate); + // Write the chunk ret = WriteChunk(fChunkBuffer, usedBytes, info); if (ret != B_OK) @@ -276,6 +419,7 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount, // Skip to the next frame (but usually, there is only one to encode // for video). frameCount--; + fFramesWritten++; buffer = (const void*)((const uint8*)buffer + bufferSize); } diff --git a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h index af53872575..fe7b94eb90 100644 --- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h +++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h @@ -52,14 +52,14 @@ private: // TODO: Refactor common base class from AVCodec[De|En]Coder! AVCodec* fCodec; AVCodecContext* fContext; + bool fCodecInitDone; + AVPicture fSrcFrame; AVPicture fDstFrame; AVFrame* fFrame; SwsContext* fSwsContext; - uint32 fAVCodecID; - - bool fCodecInitDone; + int64 fFramesWritten; uint8* fChunkBuffer; }; diff --git a/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp b/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp index aed98da727..79299c5a1c 100644 --- a/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp +++ b/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp @@ -26,7 +26,7 @@ extern "C" { #include "gfx_util.h" -//#define TRACE_AVFORMAT_READER +#define TRACE_AVFORMAT_READER #ifdef TRACE_AVFORMAT_READER # define TRACE printf # define TRACE_IO(a...) @@ -674,7 +674,7 @@ AVFormatReader::StreamCookie::GetStreamInfo(int64* frameCount, *duration = (bigtime_t)(1000000LL * fStream->duration * fStream->time_base.num / fStream->time_base.den); TRACE(" stream duration: %lld, time_base %.4f (%d/%d)\n", - *duration, av_q2d(fStream->time_base), + fStream->duration, av_q2d(fStream->time_base), fStream->time_base.num, fStream->time_base.den); } else if ((int64)fContext->duration != kNoPTSValue) { *duration = (bigtime_t)(1000000LL * fContext->duration / AV_TIME_BASE); @@ -844,6 +844,8 @@ AVFormatReader::StreamCookie::GetNextChunk(const void** chunkBuffer, mediaHeader->destination = -1; mediaHeader->time_source = -1; mediaHeader->size_used = fPacket.size; +//TRACE(" PTS: %lld (time_base.num: %d, .den: %d)\n", +//fPacket.pts, fStream->time_base.num, fStream->time_base.den); mediaHeader->start_time = (bigtime_t)(1000000.0 * fPacket.pts / av_q2d(fStream->time_base)); mediaHeader->file_pos = fPacket.pos; diff --git a/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp b/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp index 6fa0c9066f..b311a28371 100644 --- a/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp +++ b/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp @@ -30,7 +30,7 @@ extern "C" { #ifdef TRACE_AVFORMAT_WRITER # define TRACE printf # define TRACE_IO(a...) -# define TRACE_PACKET(a...) +# define TRACE_PACKET printf #else # define TRACE(a...) # define TRACE_IO(a...) @@ -75,6 +75,7 @@ private: // Since different threads may write to the target, // we need to protect the file position and I/O by a lock. BLocker* fStreamLock; + int64 fChunksWritten; }; @@ -84,7 +85,8 @@ AVFormatWriter::StreamCookie::StreamCookie(AVFormatContext* context, : fContext(context), fStream(NULL), - fStreamLock(streamLock) + fStreamLock(streamLock), + fChunksWritten(0) { av_new_packet(&fPacket, 0); } @@ -118,6 +120,10 @@ AVFormatWriter::StreamCookie::Init(const media_format* format, // frame rate fStream->codec->time_base.den = (int)format->u.raw_video.field_rate; fStream->codec->time_base.num = 1; + fStream->r_frame_rate.den = (int)format->u.raw_video.field_rate; + fStream->r_frame_rate.num = 1; + fStream->time_base.den = (int)format->u.raw_video.field_rate; + fStream->time_base.num = 1; // video size fStream->codec->width = format->u.raw_video.display.line_width; fStream->codec->height = format->u.raw_video.display.line_count; @@ -138,9 +144,75 @@ AVFormatWriter::StreamCookie::Init(const media_format* format, fStream->codec->pix_fmt = PIX_FMT_YUV420P; } else if (format->type == B_MEDIA_RAW_AUDIO) { avcodec_get_context_defaults2(fStream->codec, CODEC_TYPE_AUDIO); - // TODO: ... + // channels + fStream->codec->channels = format->u.raw_audio.channel_count; + switch (format->u.raw_audio.format) { + case media_raw_audio_format::B_AUDIO_FLOAT: + fStream->codec->sample_fmt = SAMPLE_FMT_FLT; + break; + case media_raw_audio_format::B_AUDIO_DOUBLE: + fStream->codec->sample_fmt = SAMPLE_FMT_DBL; + break; + case media_raw_audio_format::B_AUDIO_INT: + fStream->codec->sample_fmt = SAMPLE_FMT_S32; + break; + case media_raw_audio_format::B_AUDIO_SHORT: + fStream->codec->sample_fmt = SAMPLE_FMT_S16; + break; + case media_raw_audio_format::B_AUDIO_UCHAR: + fStream->codec->sample_fmt = SAMPLE_FMT_U8; + break; + + case media_raw_audio_format::B_AUDIO_CHAR: + default: + return B_MEDIA_BAD_FORMAT; + break; + } + if (format->u.raw_audio.channel_mask == 0) { + // guess the channel mask... + switch (format->u.raw_audio.channel_count) { + default: + case 2: + fStream->codec->channel_layout = CH_LAYOUT_STEREO; + break; + case 1: + fStream->codec->channel_layout = CH_LAYOUT_MONO; + break; + case 3: + fStream->codec->channel_layout = CH_LAYOUT_SURROUND; + break; + case 4: + fStream->codec->channel_layout = CH_LAYOUT_QUAD; + break; + case 5: + fStream->codec->channel_layout = CH_LAYOUT_5POINT0; + break; + case 6: + fStream->codec->channel_layout = CH_LAYOUT_5POINT1; + break; + case 8: + fStream->codec->channel_layout = CH_LAYOUT_7POINT1; + break; + case 10: + fStream->codec->channel_layout = CH_LAYOUT_7POINT1_WIDE; + break; + } + } else { + // The bits match 1:1 for media_multi_channels and FFmpeg defines. + fStream->codec->channel_layout = format->u.raw_audio.channel_mask; + } + // frame rate + fStream->codec->sample_rate = (int)format->u.raw_audio.frame_rate; + fStream->codec->time_base.den = (int)format->u.raw_audio.frame_rate; + fStream->codec->time_base.num = 1; + fStream->time_base.den = (int)format->u.raw_audio.frame_rate; + fStream->time_base.num = 1; } + TRACE(" stream->time_base: (%d/%d), codec->time_base: (%d/%d))\n", + fStream->time_base.num, fStream->time_base.den, + fStream->codec->time_base.num, fStream->codec->time_base.den); + // TODO: This is a hack for now! Use avcodec_find_encoder_by_name() // or something similar... fStream->codec->codec_id = (CodecID)codecInfo->sub_id; @@ -153,8 +225,8 @@ status_t AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer, size_t chunkSize, media_encode_info* encodeInfo) { - TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld)\n", - chunkBuffer, chunkSize); + TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld, " + "start_time: %lld)\n", chunkBuffer, chunkSize, encodeInfo->start_time); BAutolock _(fStreamLock); @@ -164,6 +236,19 @@ AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer, fPacket.data = const_cast((const uint8_t*)chunkBuffer); fPacket.size = chunkSize; + fPacket.pts = (encodeInfo->start_time + * fStream->time_base.den / fStream->time_base.num) / 1000000; + TRACE_PACKET(" PTS: %lld (stream->time_base: (%d/%d), " + "codec->time_base: (%d/%d))\n", fPacket.pts, + fStream->time_base.num, fStream->time_base.den, + fStream->codec->time_base.num, fStream->codec->time_base.den); + +// From ffmpeg.c::do_audio_out(): +// if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) +// fPacket.pts = av_rescale_q(enc->coded_frame->pts, +// enc->time_base, ost->st->time_base); + + #if 0 // TODO: Eventually, we need to write interleaved packets, but // maybe we are only supposed to use this if we have actually @@ -280,12 +365,27 @@ AVFormatWriter::CommitHeader() if (fHeaderWritten) return B_NOT_ALLOWED; + for (unsigned i = 0; i < fContext->nb_streams; i++) { + AVStream* stream = fContext->streams[i]; + TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n", + i, stream->time_base.num, stream->time_base.den, + stream->codec->time_base.num, stream->codec->time_base.den); + } + int result = av_write_header(fContext); if (result < 0) TRACE(" av_write_header(): %d\n", result); else fHeaderWritten = true; + TRACE(" wrote header\n"); + for (unsigned i = 0; i < fContext->nb_streams; i++) { + AVStream* stream = fContext->streams[i]; + TRACE(" stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n", + i, stream->time_base.num, stream->time_base.den, + stream->codec->time_base.num, stream->codec->time_base.den); + } + return result == 0 ? B_OK : B_ERROR; } @@ -383,8 +483,8 @@ status_t AVFormatWriter::WriteChunk(void* _cookie, const void* chunkBuffer, size_t chunkSize, media_encode_info* encodeInfo) { - TRACE("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, chunkSize, - encodeInfo); + TRACE_PACKET("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, + chunkSize, encodeInfo); StreamCookie* cookie = reinterpret_cast(_cookie); return cookie->WriteChunk(chunkBuffer, chunkSize, encodeInfo); diff --git a/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp b/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp index 572693f98f..be7db21cad 100644 --- a/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp +++ b/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp @@ -48,18 +48,18 @@ const EncoderDescription gEncoderTable[] = { B_MEDIA_RAW_VIDEO, B_MEDIA_ENCODED_VIDEO }, -// { -// { -// "MP3 Audio", -// "mp3", -// 0, -// CODEC_ID_MP3, -// { 0 } -// }, -// B_ANY_FORMAT_FAMILY, -// B_MEDIA_RAW_AUDIO, -// B_MEDIA_ENCODED_AUDIO -// } + { + { + "Raw Audio", + "pcm", + 0, + CODEC_ID_PCM_S16LE, + { 0 } + }, + B_ANY_FORMAT_FAMILY, + B_MEDIA_RAW_AUDIO, + B_MEDIA_ENCODED_AUDIO + } }; const size_t gEncoderCount = sizeof(gEncoderTable) / sizeof(EncoderDescription);