Implemented audio track encoding. There is something wrong with the PTS

generation for the packets and how I set the time_base in the AVStream and AVStream->codec structures. This results in the audio streams of the written files to report a much too long duration. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@32064 a95241bf-73f2-0310-859d-f6bbb57e9c96
2009-08-03 09:35:30 +00:00 · 2009-08-03 09:35:30 +00:00 · 3ca4a7b1be
parent 33dda791af
commit 3ca4a7b1be
5 changed files with 274 additions and 28 deletions
--- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.cpp
@ -34,9 +34,13 @@ AVCodecEncoder::AVCodecEncoder(uint32 codecID)
 	Encoder(),
 	fCodec(NULL),
 	fContext(avcodec_alloc_context()),
+	fCodecInitDone(false),
+
 	fFrame(avcodec_alloc_frame()),
 	fSwsContext(NULL),
-	fCodecInitDone(false),
+
+	fFramesWritten(0),
+
 	fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize])
 {
 	TRACE("AVCodecEncoder::AVCodecEncoder()\n");
@ -110,7 +114,13 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
 	if (inputFormat == NULL)
 		return B_BAD_VALUE;

+	if (fCodecInitDone) {
+		fCodecInitDone = false;
+		avcodec_close(fContext);
+	}
+
 	fInputFormat = *inputFormat;
+	fFramesWritten = 0;

 	if (fInputFormat.type == B_MEDIA_RAW_VIDEO) {
 		// frame rate
@ -167,6 +177,68 @@ AVCodecEncoder::SetUp(const media_format* inputFormat)
 			PIX_FMT_RGB32, fContext->width, fContext->height,
 			fContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL);

+	} else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) {
+		// frame rate
+		fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate;
+		fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate;
+		fContext->time_base.num = 1;
+		// channels
+		fContext->channels = fInputFormat.u.raw_audio.channel_count;
+		switch (fInputFormat.u.raw_audio.format) {
+			case media_raw_audio_format::B_AUDIO_FLOAT:
+				fContext->sample_fmt = SAMPLE_FMT_FLT;
+				break;
+			case media_raw_audio_format::B_AUDIO_DOUBLE:
+				fContext->sample_fmt = SAMPLE_FMT_DBL;
+				break;
+			case media_raw_audio_format::B_AUDIO_INT:
+				fContext->sample_fmt = SAMPLE_FMT_S32;
+				break;
+			case media_raw_audio_format::B_AUDIO_SHORT:
+				fContext->sample_fmt = SAMPLE_FMT_S16;
+				break;
+			case media_raw_audio_format::B_AUDIO_UCHAR:
+				fContext->sample_fmt = SAMPLE_FMT_U8;
+				break;
+
+			case media_raw_audio_format::B_AUDIO_CHAR:
+			default:
+				return B_MEDIA_BAD_FORMAT;
+				break;
+		}
+		if (fInputFormat.u.raw_audio.channel_mask == 0) {
+			// guess the channel mask...
+			switch (fInputFormat.u.raw_audio.channel_count) {
+				default:
+				case 2:
+					fContext->channel_layout = CH_LAYOUT_STEREO;
+					break;
+				case 1:
+					fContext->channel_layout = CH_LAYOUT_MONO;
+					break;
+				case 3:
+					fContext->channel_layout = CH_LAYOUT_SURROUND;
+					break;
+				case 4:
+					fContext->channel_layout = CH_LAYOUT_QUAD;
+					break;
+				case 5:
+					fContext->channel_layout = CH_LAYOUT_5POINT0;
+					break;
+				case 6:
+					fContext->channel_layout = CH_LAYOUT_5POINT1;
+					break;
+				case 8:
+					fContext->channel_layout = CH_LAYOUT_7POINT1;
+					break;
+				case 10:
+					fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE;
+					break;
+			}
+		} else {
+			// The bits match 1:1 for media_multi_channels and FFmpeg defines.
+			fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask;
+		}
 	} else {
 		return B_NOT_SUPPORTED;
 	}
@ -221,13 +293,80 @@ AVCodecEncoder::Encode(const void* buffer, int64 frameCount,


 status_t
-AVCodecEncoder::_EncodeAudio(const void* buffer, int64 frameCount,
+AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount,
 	media_encode_info* info)
 {
-	TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", buffer, frameCount,
+	TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount,
 		info);

-	return B_NOT_SUPPORTED;
+	if (fChunkBuffer == NULL)
+		return B_NO_MEMORY;
+
+	status_t ret = B_OK;
+
+	const uint8* buffer = reinterpret_cast<const uint8*>(_buffer);
+
+	size_t inputSampleSize = fInputFormat.u.raw_audio.format
+		& media_raw_audio_format::B_AUDIO_SIZE_MASK;
+	size_t inputFrameSize = inputSampleSize
+		* fInputFormat.u.raw_audio.channel_count;
+
+	size_t outSampleSize = av_get_bits_per_sample_format(
+		fContext->sample_fmt) / 8;
+	size_t outSize = outSampleSize * fContext->channels;
+	TRACE("  sampleSize: %ld/%ld, frameSize: %ld/%ld\n",
+		inputSampleSize, inputFrameSize, outSampleSize, outSize);
+
+	size_t bufferSize = frameCount * inputFrameSize;
+	bufferSize = min_c(bufferSize, kDefaultChunkBufferSize);
+
+	while (frameCount > 0) {
+		if (frameCount < fContext->frame_size) {
+			TRACE("  ERROR: too few frames left! (left: %lld, needed: %d)\n",
+				frameCount, fContext->frame_size);
+			// TODO: Handle this some way. Maybe use an av_fifo to buffer data?
+			return B_ERROR;
+		}
+
+		int chunkFrames = fContext->frame_size;
+
+		TRACE("  frames left: %lld, chunk frames: %d\n",
+			frameCount, chunkFrames);
+
+		// Encode one audio chunk/frame.
+		int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer,
+			bufferSize, reinterpret_cast<const short*>(buffer));
+
+		if (usedBytes < 0) {
+			TRACE("  avcodec_encode_video() failed: %d\n", usedBytes);
+			return B_ERROR;
+		}
+
+		// Setup media_encode_info, most important is the time stamp.
+		info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
+			/ fInputFormat.u.raw_audio.frame_rate);
+
+		// Write the chunk
+		ret = WriteChunk(fChunkBuffer, usedBytes, info);
+		if (ret != B_OK)
+			break;
+
+		size_t framesWritten = usedBytes / inputFrameSize;
+		if (chunkFrames == 1) {
+			// For PCM data:
+			framesWritten = usedBytes / inputFrameSize;
+		} else {
+			// For encoded audio:
+			framesWritten = chunkFrames * inputFrameSize;
+		}
+
+		// Skip to next chunk of buffer.
+		fFramesWritten += framesWritten;
+		frameCount -= framesWritten;
+		buffer += usedBytes;
+	}
+
+	return ret;
 }


@ -268,6 +407,10 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
 			return B_ERROR;
 		}

+		// Setup media_encode_info, most important is the time stamp.
+		info->start_time = (bigtime_t)(fFramesWritten * 1000000LL
+			/ fInputFormat.u.raw_video.field_rate);
+
 		// Write the chunk
 		ret = WriteChunk(fChunkBuffer, usedBytes, info);
 		if (ret != B_OK)
@ -276,6 +419,7 @@ AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount,
 		// Skip to the next frame (but usually, there is only one to encode
 		// for video).
 		frameCount--;
+		fFramesWritten++;
 		buffer = (const void*)((const uint8*)buffer + bufferSize);
 	}

--- a/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h
+++ b/src/add-ons/media/plugins/ffmpeg/AVCodecEncoder.h
@ -52,14 +52,14 @@ private:
 			// TODO: Refactor common base class from AVCodec[De|En]Coder!
 			AVCodec*			fCodec;
 			AVCodecContext*		fContext;
+			bool				fCodecInitDone;
+
 			AVPicture			fSrcFrame;
 			AVPicture			fDstFrame;
 			AVFrame*			fFrame;
 			SwsContext*			fSwsContext;

-			uint32				fAVCodecID;
-
-			bool				fCodecInitDone;
+			int64				fFramesWritten;

 			uint8*				fChunkBuffer;
 };
--- a/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVFormatReader.cpp
@ -26,7 +26,7 @@ extern "C" {
 #include "gfx_util.h"


-//#define TRACE_AVFORMAT_READER
+#define TRACE_AVFORMAT_READER
 #ifdef TRACE_AVFORMAT_READER
 #	define TRACE printf
 #	define TRACE_IO(a...)
@ -674,7 +674,7 @@ AVFormatReader::StreamCookie::GetStreamInfo(int64* frameCount,
 		*duration = (bigtime_t)(1000000LL * fStream->duration
 			* fStream->time_base.num / fStream->time_base.den);
 		TRACE("  stream duration: %lld, time_base %.4f (%d/%d)\n",
-			*duration, av_q2d(fStream->time_base),
+			fStream->duration, av_q2d(fStream->time_base),
 			fStream->time_base.num, fStream->time_base.den);
 	} else if ((int64)fContext->duration != kNoPTSValue) {
 		*duration = (bigtime_t)(1000000LL * fContext->duration / AV_TIME_BASE);
@ -844,6 +844,8 @@ AVFormatReader::StreamCookie::GetNextChunk(const void** chunkBuffer,
 		mediaHeader->destination = -1;
 		mediaHeader->time_source = -1;
 		mediaHeader->size_used = fPacket.size;
+//TRACE("  PTS: %lld (time_base.num: %d, .den: %d)\n",
+//fPacket.pts, fStream->time_base.num, fStream->time_base.den);
 		mediaHeader->start_time = (bigtime_t)(1000000.0 * fPacket.pts
 			/ av_q2d(fStream->time_base));
 		mediaHeader->file_pos = fPacket.pos;
--- a/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/AVFormatWriter.cpp
@ -30,7 +30,7 @@ extern "C" {
 #ifdef TRACE_AVFORMAT_WRITER
 #	define TRACE printf
 #	define TRACE_IO(a...)
-#	define TRACE_PACKET(a...)
+#	define TRACE_PACKET printf
 #else
 #	define TRACE(a...)
 #	define TRACE_IO(a...)
@ -75,6 +75,7 @@ private:
 			// Since different threads may write to the target,
 			// we need to protect the file position and I/O by a lock.
 			BLocker*			fStreamLock;
+			int64				fChunksWritten;
 };


@ -84,7 +85,8 @@ AVFormatWriter::StreamCookie::StreamCookie(AVFormatContext* context,
 	:
 	fContext(context),
 	fStream(NULL),
-	fStreamLock(streamLock)
+	fStreamLock(streamLock),
+	fChunksWritten(0)
 {
 	av_new_packet(&fPacket, 0);
 }
@ -118,6 +120,10 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
 		// frame rate
 		fStream->codec->time_base.den = (int)format->u.raw_video.field_rate;
 		fStream->codec->time_base.num = 1;
+		fStream->r_frame_rate.den = (int)format->u.raw_video.field_rate;
+		fStream->r_frame_rate.num = 1;
+		fStream->time_base.den = (int)format->u.raw_video.field_rate;
+		fStream->time_base.num = 1;
 		// video size
 		fStream->codec->width = format->u.raw_video.display.line_width;
 		fStream->codec->height = format->u.raw_video.display.line_count;
@ -138,8 +144,74 @@ AVFormatWriter::StreamCookie::Init(const media_format* format,
 		fStream->codec->pix_fmt = PIX_FMT_YUV420P;
 	} else if (format->type == B_MEDIA_RAW_AUDIO) {
 		avcodec_get_context_defaults2(fStream->codec, CODEC_TYPE_AUDIO);
-		// TODO: ...
+		// channels
+		fStream->codec->channels = format->u.raw_audio.channel_count;
+		switch (format->u.raw_audio.format) {
+			case media_raw_audio_format::B_AUDIO_FLOAT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_FLT;
+				break;
+			case media_raw_audio_format::B_AUDIO_DOUBLE:
+				fStream->codec->sample_fmt = SAMPLE_FMT_DBL;
+				break;
+			case media_raw_audio_format::B_AUDIO_INT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_S32;
+				break;
+			case media_raw_audio_format::B_AUDIO_SHORT:
+				fStream->codec->sample_fmt = SAMPLE_FMT_S16;
+				break;
+			case media_raw_audio_format::B_AUDIO_UCHAR:
+				fStream->codec->sample_fmt = SAMPLE_FMT_U8;
+				break;
+
+			case media_raw_audio_format::B_AUDIO_CHAR:
+			default:
+				return B_MEDIA_BAD_FORMAT;
+				break;
 		}
+		if (format->u.raw_audio.channel_mask == 0) {
+			// guess the channel mask...
+			switch (format->u.raw_audio.channel_count) {
+				default:
+				case 2:
+					fStream->codec->channel_layout = CH_LAYOUT_STEREO;
+					break;
+				case 1:
+					fStream->codec->channel_layout = CH_LAYOUT_MONO;
+					break;
+				case 3:
+					fStream->codec->channel_layout = CH_LAYOUT_SURROUND;
+					break;
+				case 4:
+					fStream->codec->channel_layout = CH_LAYOUT_QUAD;
+					break;
+				case 5:
+					fStream->codec->channel_layout = CH_LAYOUT_5POINT0;
+					break;
+				case 6:
+					fStream->codec->channel_layout = CH_LAYOUT_5POINT1;
+					break;
+				case 8:
+					fStream->codec->channel_layout = CH_LAYOUT_7POINT1;
+					break;
+				case 10:
+					fStream->codec->channel_layout = CH_LAYOUT_7POINT1_WIDE;
+					break;
+			}
+		} else {
+			// The bits match 1:1 for media_multi_channels and FFmpeg defines.
+			fStream->codec->channel_layout = format->u.raw_audio.channel_mask;
+		}
+		// frame rate
+		fStream->codec->sample_rate = (int)format->u.raw_audio.frame_rate;
+		fStream->codec->time_base.den = (int)format->u.raw_audio.frame_rate;
+		fStream->codec->time_base.num = 1;
+		fStream->time_base.den = (int)format->u.raw_audio.frame_rate;
+		fStream->time_base.num = 1;
+	}
+
+	TRACE("  stream->time_base: (%d/%d), codec->time_base: (%d/%d))\n",
+		fStream->time_base.num, fStream->time_base.den,
+		fStream->codec->time_base.num, fStream->codec->time_base.den);

 	// TODO: This is a hack for now! Use avcodec_find_encoder_by_name()
 	// or something similar...
@ -153,8 +225,8 @@ status_t
 AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
 	size_t chunkSize, media_encode_info* encodeInfo)
 {
-	TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld)\n",
-		chunkBuffer, chunkSize);
+	TRACE_PACKET("AVFormatWriter::StreamCookie::WriteChunk(%p, %ld, "
+		"start_time: %lld)\n", chunkBuffer, chunkSize, encodeInfo->start_time);

 	BAutolock _(fStreamLock);

@ -164,6 +236,19 @@ AVFormatWriter::StreamCookie::WriteChunk(const void* chunkBuffer,
 	fPacket.data = const_cast<uint8_t*>((const uint8_t*)chunkBuffer);
 	fPacket.size = chunkSize;

+	fPacket.pts = (encodeInfo->start_time
+		* fStream->time_base.den / fStream->time_base.num) / 1000000;
+	TRACE_PACKET("  PTS: %lld  (stream->time_base: (%d/%d), "
+		"codec->time_base: (%d/%d))\n", fPacket.pts,
+		fStream->time_base.num, fStream->time_base.den,
+		fStream->codec->time_base.num, fStream->codec->time_base.den);
+
+// From ffmpeg.c::do_audio_out():
+//	if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
+//		fPacket.pts = av_rescale_q(enc->coded_frame->pts,
+//		enc->time_base, ost->st->time_base);
+
+
 #if 0
 	// TODO: Eventually, we need to write interleaved packets, but
 	// maybe we are only supposed to use this if we have actually
@ -280,12 +365,27 @@ AVFormatWriter::CommitHeader()
 	if (fHeaderWritten)
 		return B_NOT_ALLOWED;

+	for (unsigned i = 0; i < fContext->nb_streams; i++) {
+		AVStream* stream = fContext->streams[i];
+		TRACE("  stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
+			i, stream->time_base.num, stream->time_base.den,
+			stream->codec->time_base.num, stream->codec->time_base.den);
+	}
+
 	int result = av_write_header(fContext);
 	if (result < 0)
 		TRACE("  av_write_header(): %d\n", result);
 	else
 		fHeaderWritten = true;

+	TRACE("  wrote header\n");
+	for (unsigned i = 0; i < fContext->nb_streams; i++) {
+		AVStream* stream = fContext->streams[i];
+		TRACE("  stream[%u] time_base: (%d/%d), codec->time_base: (%d/%d)\n",
+			i, stream->time_base.num, stream->time_base.den,
+			stream->codec->time_base.num, stream->codec->time_base.den);
+	}
+
 	return result == 0 ? B_OK : B_ERROR;
 }

@ -383,8 +483,8 @@ status_t
 AVFormatWriter::WriteChunk(void* _cookie, const void* chunkBuffer,
 	size_t chunkSize, media_encode_info* encodeInfo)
 {
-	TRACE("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer, chunkSize,
-		encodeInfo);
+	TRACE_PACKET("AVFormatWriter::WriteChunk(%p, %ld, %p)\n", chunkBuffer,
+		chunkSize, encodeInfo);

 	StreamCookie* cookie = reinterpret_cast<StreamCookie*>(_cookie);
 	return cookie->WriteChunk(chunkBuffer, chunkSize, encodeInfo);
--- a/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp
+++ b/src/add-ons/media/plugins/ffmpeg/EncoderTable.cpp
@ -48,18 +48,18 @@ const EncoderDescription gEncoderTable[] = {
 		B_MEDIA_RAW_VIDEO,
 		B_MEDIA_ENCODED_VIDEO
 	},
-//	{
-//		{
-//			"MP3 Audio",
-//			"mp3",
-//			0,
-//			CODEC_ID_MP3,
-//			{ 0 }
-//		},
-//		B_ANY_FORMAT_FAMILY,
-//		B_MEDIA_RAW_AUDIO,
-//		B_MEDIA_ENCODED_AUDIO
-//	}
+	{
+		{
+			"Raw Audio",
+			"pcm",
+			0,
+			CODEC_ID_PCM_S16LE,
+			{ 0 }
+		},
+		B_ANY_FORMAT_FAMILY,
+		B_MEDIA_RAW_AUDIO,
+		B_MEDIA_ENCODED_AUDIO
+	}
 };

 const size_t gEncoderCount = sizeof(gEncoderTable) / sizeof(EncoderDescription);