From 3ca4a7b1beb866ba584816103f66031d22b7c003 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <superstippi@gmx.de>
Date: Mon, 3 Aug 2009 09:35:30 +0000
Subject: [PATCH] Implemented audio track encoding. There is something wrong
 with the PTS generation for the packets and how I set the time_base in the
 AVStream and AVStream->codec structures. This results in the audio streams of
 the written files to report a much too long duration.

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@32064 a95241bf-73f2-0310-859d-f6bbb57e9c96
---
 .../media/plugins/ffmpeg/AVCodecEncoder.cpp   | 152 +++++++++++++++++-
 .../media/plugins/ffmpeg/AVCodecEncoder.h     |   6 +-
 .../media/plugins/ffmpeg/AVFormatReader.cpp   |   6 +-
 .../media/plugins/ffmpeg/AVFormatWriter.cpp   | 114 ++++++++++++-
 .../media/plugins/ffmpeg/EncoderTable.cpp     |  24 +--
 5 files changed, 274 insertions(+), 28 deletions(-)

diff --git a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp
index b38036d359..d7680beb45 100644
--- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp
@@ -34,9 +34,13 @@ AVCodecEncoder::AVCodecEncoder(uint32 codecID)
 	Encoder(),
 	fCodec(NULL),
 	fContext(avcodec_alloc_context()),
+	fCodecInitDone(false),
+
 	fFrame(avcodec_alloc_frame()),
 	fSwsContext(NULL),
-	fCodecInitDone(false),
+
+	fFramesWritten(0),
+
 	fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize])
 {
 	TRACE("AVCodecEncoder::AVCodecEncoder()\n");
@@ -110,7 +114,13 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
 	if (inputFormat == NULL)
 		return B_BAD_VALUE;
 
+	if (fCodecInitDone) {
+		fCodecInitDone = false;
+		avcodec_close(fContext);
+	}
+
 	fInputFormat = *inputFormat;
+	fFramesWritten = 0;
 
 	if (fInputFormat.type == B_MEDIA_RAW_VIDEO) {
 		// frame rate
@@ -167,6 +177,68 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
 			PIX_FMT_RGB32, fContext->width, fContext->height,
 			fContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);
 
+	} else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) {
+		// frame rate
+		fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate;
+		fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate;
+		fContext->time_base.num = 1;
+		// channels
+		fContext->channels = fInputFormat.u.raw_audio.channel_count;
+		switch (fInputFormat.u.raw_audio.format) {
+			case media_raw_audio_format::B_AUDIO_FLOAT:
+				fContext->sample_fmt = SAMPLE_FMT_FLT;
+				break;
+			case media_raw_audio_format::B_AUDIO_DOUBLE:
+				fContext->sample_fmt = SAMPLE_FMT_DBL;
+				break;
+			case media_raw_audio_format::B_AUDIO_INT:
+				fContext->sample_fmt = SAMPLE_FMT_S32;
+				break;
+			case media_raw_audio_format::B_AUDIO_SHORT:
+				fContext->sample_fmt = SAMPLE_FMT_S16;
+				break;
+			case media_raw_audio_format::B_AUDIO_UCHAR:
+				fContext->sample_fmt = SAMPLE_FMT_U8;
+				break;
+
+			case media_raw_audio_format::B_AUDIO_CHAR:
+			default:
+				return B_MEDIA_BAD_FORMAT;
+				break;
+		}
+		if (fInputFormat.u.raw_audio.channel_mask == 0) {
+			// guess the channel mask...
+			switch (fInputFormat.u.raw_audio.channel_count) {
+				default:
+				case 2:
+					fContext->channel_layout = CH_LAYOUT_STEREO;
+					break;
+				case 1:
+					fContext->channel_layout = CH_LAYOUT_MONO;
+					break;
+				case 3:
+					fContext->channel_layout = CH_LAYOUT_SURROUND;
+					break;
+				case 4:
+					fContext->channel_layout = CH_LAYOUT_QUAD;
+					break;
+				case 5:
+					fContext->channel_layout = CH_LAYOUT_5POINT0;
+					break;
+				case 6:
+					fContext->channel_layout = CH_LAYOUT_5POINT1;
+					break;
+				case 8:
+					fContext->channel_layout = CH_LAYOUT_7POINT1;
+					break;
+				case 10:
+					fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE;
+					break;
+			}
+		} else {
+			// The bits match 1:1 for media_multi_channels and FFmpeg defines.
+			fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask;
+		}
 	} else {
 		return B_NOT_SUPPORTED;
 	}
@@ -221,13 +293,80 @@ AVCodecEncoder::Encode(const void* buffer, int64 frameCount,
 
 
 status_t
-AVCodecEncoder::_EncodeAudio(const void* buffer, int64 frameCount,
+AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount,
 	media_encode_info* info)
 {
-	TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", buffer, frameCount,
+	TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount,
 		info);
 
-	return B_NOT_SUPPORTED;
+	if (fChunkBuffer == NULL)
+		return B_NO_MEMORY;
+
+	status_t ret = B_OK;
+
+	const uint8* buffer = reinterpret_cast<const uint8*>(_buffer);
+
+	size_t inputSampleSize = fInputFormat.u.raw_audio.format
+		& media_raw_audio_format::B_AUDIO_SIZE_MASK;
+	size_t inputFrameSize = inputSampleSize
+		* fInputFormat.u.raw_audio.channel_count;
+
+	size_t outSampleSize = av_get_bits_per_sample_format(
+		fContext->sample_fmt) / 8;
+	size_t outSize = outSampleSize * fContext->channels;
+	TRACE("  sampleSize: %ld/%ld, frameSize: %ld/%ld\n",
+		inputSampleSize, inputFrameSize, outSampleSize, outSize);
+
+	size_t bufferSize = frameCount * inputFrameSize;
+	bufferSize = min_c(bufferSize, kDefaultChunkBufferSize);
+
+	while (frameCount > 0) {
+		if (frameCount < fContext->frame_size) {
+			TRACE("  ERROR: too few frames left! (left: %lld, needed: %d)\n",
+				frameCount, fContext->frame_size);
+			// TODO: Handle this some way. Maybe use an av_fifo to buffer data?
+			return B_ERROR;
+		}
+
+		int chunkFrames = fContext->frame_size;
+
+		TRACE("  frames left: %lld, chunk frames: %d\n",
+			frameCount, chunkFrames);
+
+		// Encode one audio chunk/frame.
+		int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer,
+			bufferSize, reinterpret_cast<const short*>(buffer));
+
+		if (usedBytes < 0) {
+			TRACE("  avcodec_encode_video() failed: %d\n", usedBytes);
+			return B_ERROR;
+		}
+
+		// Setup media_encode_info, most important is the time stamp.
+		info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
+			/ fInputFormat.u.raw_audio.frame_rate);
+
+		// Write the chunk
+		ret = WriteChunk(fChunkBuffer, usedBytes, info);
+		if (ret != B_OK)
+			break;
+
+		size_t framesWritten = usedBytes / inputFrameSize;
+		if (chunkFrames == 1) {
+			// For PCM data:
+			framesWritten = usedBytes / inputFrameSize;
+		} else {
+			// For encoded audio:
+			framesWritten = chunkFrames * inputFrameSize;
+		}
+
+		// Skip to next chunk of buffer.
+		fFramesWritten += framesWritten;
+		frameCount -= framesWritten;
+		buffer += usedBytes;
+	}
+
+	return ret;
 }
 
 
@@ -268,6 +407,10 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
 			return B_ERROR;
 		}
 
+		// Setup media_encode_info, most important is the time stamp.
+		info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
+			/ fInputFormat.u.raw_video.field_rate);
+
 		// Write the chunk
 		ret = WriteChunk(fChunkBuffer, usedBytes, info);
 		if (ret != B_OK)
@@ -276,6 +419,7 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
 		// Skip to the next frame (but usually, there is only one to encode
 		// for video).
 		frameCount--;
+		fFramesWritten++;
 		buffer = (const void*)((const uint8*)buffer + bufferSize);
 	}
 
diff --git a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h
index af53872575..fe7b94eb90 100644
--- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h
+++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h
@@ -52,14 +52,14 @@ private:
 			// TODO: Refactor common base class from AVCodec[De|En]Coder!
 			AVCodec*			fCodec;
 			AVCodecContext*		fContext;
+			bool				fCodecInitDone;
+
 			AVPicture			fSrcFrame;
 			AVPicture			fDstFrame;
 			AVFrame*			fFrame;
 			SwsContext*			fSwsContext;
 
-			uint32				fAVCodecID;
-
-			bool				fCodecInitDone;
+			int64				fFramesWritten;
 
 			uint8*				fChunkBuffer;
 };
diff --git a/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp b/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp
index aed98da727..79299c5a1c 100644
--- a/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp
@@ -26,7 +26,7 @@ extern "C" {
 #include "gfx_util.h"
 
 
-//#define TRACE_AVFORMAT_READER
+#define TRACE_AVFORMAT_READER
 #ifdef TRACE_AVFORMAT_READER
 #	define TRACE printf
 #	define TRACE_IO(a...)
@@ -674,7 +674,7 @@ AVFormatReader::StreamCookie::GetStreamInfo(int64* frameCount,
 		*duration = (bigtime_t)(1000000LL * fStream->duration
 			* fStream->time_base.num / fStream->time_base.den);
 		TRACE("  stream duration: %lld, time_base %.4f (%d/%d)\n",
-			*duration, av_q2d(fStream->time_base),
+			fStream->duration, av_q2d(fStream->time_base),
 			fStream->time_base.num, fStream->time_base.den);
 	} else if ((int64)fContext->duration != kNoPTSValue) {
 		*duration = (bigtime_t)(1000000LL * fContext->duration / AV_TIME_BASE);
@@ -844,6 +844,8 @@ AVFormatReader::StreamCookie::GetNextChunk(const void** chunkBuffer,
 		mediaHeader->destination = -1;
 		mediaHeader->time_source = -1;
 		mediaHeader->size_used = fPacket.size;
+//TRACE("  PTS: %lld (time_base.num: %d, .den: %d)\n",
+//fPacket.pts, fStream->time_base.num, fStream->time_base.den);
 		mediaHeader->start_time = (bigtime_t)(1000000.0 * fPacket.pts
 			/ av_q2d(fStream->time_base));
 		mediaHeader->file_pos = fPacket.pos;
diff --git a/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp b/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp
index 6fa0c9066f..b311a28371 100644
--- a/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp
@@ -30,7 +30,7 @@ extern "C" {
 #ifdef TRACE_AVFORMAT_WRITER
 #	define TRACE printf
 #	define TRACE_IO(a...)
-#	define TRACE_PACKET(a...)
+#	define TRACE_PACKET printf
 #else
 #	define TRACE(a...)
 #	define TRACE_IO(a...)
@@ -75,6 +75,7 @@ private:
 			// Since different threads may write to the target,
 			// we need to protect the file position and I/O by a lock.
 			BLocker*			fStreamLock;
+			int64				fChunksWritten;
 };
 
 
@@ -84,7 +85,8 @@ AVFormatWriter::StreamCookie::StreamCookie(AVFormatContext* context,
 	:
 	fContext(context),
 	fStream(NULL),
-	fStreamLock(streamLock)
+	fStreamLock(streamLock),
+	fChunksWritten(0)
 {
 	av_new_packet(&fPacket, 0);
 }
@@ -118,6 +120,10 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
 		// frame rate
 		fStream->codec->time_base.den = (int)format->u.raw_video.field_rate;
 		fStream->codec->time_base.num = 1;
+		fStream->r_frame_rate.den = (int)format->u.raw_video.field_rate;
+		fStream->r_frame_rate.num = 1;
+		fStream->time_base.den = (int)format->u.raw_video.field_rate;
+		fStream->time_base.num = 1;
 		// video size
 		fStream->codec->width = format->u.raw_video.display.line_width;
 		fStream->codec->height = format->u.raw_video.display.line_count;
@@ -138,9 +144,75 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
 		fStream->codec->pix_fmt = PIX_FMT_YUV420P;
 	} else if (format->type == B_MEDIA_RAW_AUDIO) {
 		avcodec_get_context_defaults2(fStream->codec, CODEC_TYPE_AUDIO);
-		// TODO: ...
+		// channels
+		fStream->codec->channels = format->u.raw_audio.channel_count;
+		switch (format->u.raw_audio.format) {
+			case media_raw_audio_format::B_AUDIO_FLOAT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_FLT;
+				break;
+			case media_raw_audio_format::B_AUDIO_DOUBLE:
+				fStream->codec->sample_fmt = SAMPLE_FMT_DBL;
+				break;
+			case media_raw_audio_format::B_AUDIO_INT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_S32;
+				break;
+			case media_raw_audio_format::B_AUDIO_SHORT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_S16;
+				break;
+			case media_raw_audio_format::B_AUDIO_UCHAR:
+				fStream->codec->sample_fmt = SAMPLE_FMT_U8;
+				break;
+
+			case media_raw_audio_format::B_AUDIO_CHAR:
+			default:
+				return B_MEDIA_BAD_FORMAT;
+				break;
+		}
+		if (format->u.raw_audio.channel_mask == 0) {
+			// guess the channel mask...
+			switch (format->u.raw_audio.channel_count) {
+				default:
+				case 2:
+					fStream->codec->channel_layout = CH_LAYOUT_STEREO;
+					break;
+				case 1:
+					fStream->codec->channel_layout = CH_LAYOUT_MONO;
+					break;
+				case 3:
+					fStream->codec->channel_layout = CH_LAYOUT_SURROUND;
+					break;
+				case 4:
+					fStream->codec->channel_layout = CH_LAYOUT_QUAD;
+					break;
+				case 5:
+					fStream->codec->channel_layout = CH_LAYOUT_5POINT0;
+					break;
+				case 6:
+					fStream->codec->channel_layout = CH_LAYOUT_5POINT1;
+					break;
+				case 8:
+					fStream->codec->channel_layout = CH_LAYOUT_7POINT1;
+					break;
+				case 10:
+					fStream->codec->channel_layout = CH_LAYOUT_7POINT1_WIDE;
+					break;
+			}
+		} else {
+			// The bits match 1:1 for media_multi_channels and FFmpeg defines.
+			fStream->codec->channel_layout = format->u.raw_audio.channel_mask;
+		}
+		// frame rate
+		fStream->codec->sample_rate = (int)format->u.raw_audio.frame_rate;
+		fStream->codec->time_base.den = (int)format->u.raw_audio.frame_rate;
+		fStream->codec->time_base.num = 1;
+		fStream->time_base.den = (int)format->u.raw_audio.frame_rate;
+		fStream->time_base.num = 1;
 	}
 
+	TRACE("  stream->time_base: (%d/%d), codec->time_base: (%d/%d))\n",
+		fStream->time_base.num, fStream->time_base.den,
+		fStream->codec->time_base.num, fStream->codec->time_base.den);
+
 	// TODO: This is a hack for now! Use avcodec_find_encoder_by_name()
 	// or something similar...
 	fStream->codec->codec_id = (CodecID)codecInfo->sub_id;
@@ -153,8 +225,8 @@ status_t
 AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
 	size_t chunkSize, media_encode_info* encodeInfo)
 {
-	TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld)\n",
-		chunkBuffer, chunkSize);
+	TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld, "
+		"start_time: %lld)\n", chunkBuffer, chunkSize, encodeInfo->start_time);
 
 	BAutolock _(fStreamLock);
 
@@ -164,6 +236,19 @@ AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
 	fPacket.data = const_cast<uint8_t*>((const uint8_t*)chunkBuffer);
 	fPacket.size = chunkSize;
 
+	fPacket.pts = (encodeInfo->start_time
+		* fStream->time_base.den / fStream->time_base.num) / 1000000;
+	TRACE_PACKET("  PTS: %lld  (stream->time_base: (%d/%d), "
+		"codec->time_base: (%d/%d))\n", fPacket.pts,
+		fStream->time_base.num, fStream->time_base.den,
+		fStream->codec->time_base.num, fStream->codec->time_base.den);
+
+// From ffmpeg.c::do_audio_out():
+//	if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
+//		fPacket.pts = av_rescale_q(enc->coded_frame->pts,
+//		enc->time_base, ost->st->time_base);
+
+
 #if 0
 	// TODO: Eventually, we need to write interleaved packets, but
 	// maybe we are only supposed to use this if we have actually
@@ -280,12 +365,27 @@ AVFormatWriter::CommitHeader()
 	if (fHeaderWritten)
 		return B_NOT_ALLOWED;
 
+	for (unsigned i = 0; i < fContext->nb_streams; i++) {
+		AVStream* stream = fContext->streams[i];
+		TRACE("  stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
+			i, stream->time_base.num, stream->time_base.den,
+			stream->codec->time_base.num, stream->codec->time_base.den);
+	}
+
 	int result = av_write_header(fContext);
 	if (result < 0)
 		TRACE("  av_write_header(): %d\n", result);
 	else
 		fHeaderWritten = true;
 
+	TRACE("  wrote header\n");
+	for (unsigned i = 0; i < fContext->nb_streams; i++) {
+		AVStream* stream = fContext->streams[i];
+		TRACE("  stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
+			i, stream->time_base.num, stream->time_base.den,
+			stream->codec->time_base.num, stream->codec->time_base.den);
+	}
+
 	return result == 0 ? B_OK : B_ERROR;
 }
 
@@ -383,8 +483,8 @@ status_t
 AVFormatWriter::WriteChunk(void* _cookie, const void* chunkBuffer,
 	size_t chunkSize, media_encode_info* encodeInfo)
 {
-	TRACE("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, chunkSize,
-		encodeInfo);
+	TRACE_PACKET("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer,
+		chunkSize, encodeInfo);
 
 	StreamCookie* cookie = reinterpret_cast<StreamCookie*>(_cookie);
 	return cookie->WriteChunk(chunkBuffer, chunkSize, encodeInfo);
diff --git a/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp b/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp
index 572693f98f..be7db21cad 100644
--- a/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp
@@ -48,18 +48,18 @@ const EncoderDescription gEncoderTable[] = {
 		B_MEDIA_RAW_VIDEO,
 		B_MEDIA_ENCODED_VIDEO
 	},
-//	{
-//		{
-//			"MP3 Audio",
-//			"mp3",
-//			0,
-//			CODEC_ID_MP3,
-//			{ 0 }
-//		},
-//		B_ANY_FORMAT_FAMILY,
-//		B_MEDIA_RAW_AUDIO,
-//		B_MEDIA_ENCODED_AUDIO
-//	}
+	{
+		{
+			"Raw Audio",
+			"pcm",
+			0,
+			CODEC_ID_PCM_S16LE,
+			{ 0 }
+		},
+		B_ANY_FORMAT_FAMILY,
+		B_MEDIA_RAW_AUDIO,
+		B_MEDIA_ENCODED_AUDIO
+	}
 };
 
 const size_t gEncoderCount = sizeof(gEncoderTable) / sizeof(EncoderDescription);