diff --git a/libfreerdp/CMakeLists.txt b/libfreerdp/CMakeLists.txt
index 84925220b..653e2c6ae 100644
--- a/libfreerdp/CMakeLists.txt
+++ b/libfreerdp/CMakeLists.txt
@@ -96,6 +96,18 @@ macro (freerdp_compile_options_add)
 	set (LIBFREERDP_COMPILE_OPTIONS ${LIBFREERDP_COMPILE_OPTIONS} PARENT_SCOPE)
 endmacro()
 
+option(WITH_FDK_AAC "Enable FDK_AAC support" OFF)
+if (WITH_FDK_AAC)
+	find_package(PkgConfig REQUIRED)
+	pkg_check_modules(FDK_AAC REQUIRED fdk-aac)
+
+	add_definitions(-DWITH_FDK_AAC)
+	include_directories(${FDK_AAC_INCLUDE_DIRS})
+
+	link_directories(${FDK_AAC_LIBRARY_DIRS})
+	freerdp_library_add(${FDK_AAC_LIBRARIES})
+endif()
+
 set(OPUS_DEFAULT OFF)
 if (NOT WITH_DSP_FFMPEG)
 	find_package(Opus)
@@ -160,8 +172,8 @@ foreach(${MODULE_PREFIX}_SUBMODULE ${${MODULE_PREFIX}_SUBMODULES})
     add_subdirectory(${${MODULE_PREFIX}_SUBMODULE})
 endforeach()
 
-if (NOT WITH_DSP_FFMPEG AND NOT WITH_FAAC)
-    message(WARNING "Compiling without WITH_DSP_FFMPEG and WITH_FAAC, AAC encoder support disabled")
+if (NOT WITH_DSP_FFMPEG AND NOT WITH_FAAC AND NOT WITH_FDK_AAC)
+	message(WARNING "Compiling without WITH_DSP_FFMPEG, WITH_FAAC and WITH_FDK_AAC. AAC encoder support disabled")
 endif ()
 
 add_subdirectory(codec)
diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt
index 1ad61cc4a..24fb1450e 100644
--- a/libfreerdp/codec/CMakeLists.txt
+++ b/libfreerdp/codec/CMakeLists.txt
@@ -111,6 +111,14 @@ if(LAME_FOUND)
     include_directories(${LAME_INCLUDE_DIRS})
 endif()
 
+if (WITH_FDK_AAC)
+	list(APPEND CODEC_SRCS
+		dsp_fdk_impl.c
+		dsp_fdk_impl.h
+		dsp_fdk_aac.c
+		dsp_fdk_aac.h)
+endif()
+
 if(FAAD2_FOUND)
 	list(APPEND CODEC_LIBS ${FAAD2_LIBRARIES})
     include_directories(${FAAD2_INCLUDE_DIRS})
diff --git a/libfreerdp/codec/dsp.c b/libfreerdp/codec/dsp.c
index 841f185ff..a291ee687 100644
--- a/libfreerdp/codec/dsp.c
+++ b/libfreerdp/codec/dsp.c
@@ -30,6 +30,12 @@
 #include <freerdp/log.h>
 #include <freerdp/codec/dsp.h>
 
+#include "dsp.h"
+
+#if defined(WITH_FDK_AAC)
+#include "dsp_fdk_aac.h"
+#endif
+
 #if !defined(WITH_DSP_FFMPEG)
 #if defined(WITH_GSM)
 #include <gsm/gsm.h>
@@ -84,14 +90,9 @@ typedef union
 
 struct S_FREERDP_DSP_CONTEXT
 {
-	BOOL encoder;
+	FREERDP_DSP_COMMON_CONTEXT common;
 
 	ADPCM adpcm;
-	AUDIO_FORMAT format;
-
-	wStream* channelmix;
-	wStream* resample;
-	wStream* buffer;
 
 #if defined(WITH_GSM)
 	gsm gsm;
@@ -161,36 +162,36 @@ static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	bpp = srcFormat->wBitsPerSample > 8 ? 2 : 1;
 	samples = size / bpp / srcFormat->nChannels;
 
-	if (context->format.nChannels == srcFormat->nChannels)
+	if (context->common.format.nChannels == srcFormat->nChannels)
 	{
 		*data = src;
 		*length = size;
 		return TRUE;
 	}
 
-	Stream_SetPosition(context->channelmix, 0);
+	Stream_SetPosition(context->common.channelmix, 0);
 
 	/* Destination has more channels than source */
-	if (context->format.nChannels > srcFormat->nChannels)
+	if (context->common.format.nChannels > srcFormat->nChannels)
 	{
 		switch (srcFormat->nChannels)
 		{
 			case 1:
-				if (!Stream_EnsureCapacity(context->channelmix, size * 2))
+				if (!Stream_EnsureCapacity(context->common.channelmix, size * 2))
 					return FALSE;
 
 				for (size_t x = 0; x < samples; x++)
 				{
 					for (size_t y = 0; y < bpp; y++)
-						Stream_Write_UINT8(context->channelmix, src[x * bpp + y]);
+						Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
 
 					for (size_t y = 0; y < bpp; y++)
-						Stream_Write_UINT8(context->channelmix, src[x * bpp + y]);
+						Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
 				}
 
-				Stream_SealLength(context->channelmix);
-				*data = Stream_Buffer(context->channelmix);
-				*length = Stream_Length(context->channelmix);
+				Stream_SealLength(context->common.channelmix);
+				*data = Stream_Buffer(context->common.channelmix);
+				*length = Stream_Length(context->common.channelmix);
 				return TRUE;
 
 			case 2:  /* We only support stereo, so we can not handle this case. */
@@ -203,7 +204,7 @@ static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	switch (srcFormat->nChannels)
 	{
 		case 2:
-			if (!Stream_EnsureCapacity(context->channelmix, size / 2))
+			if (!Stream_EnsureCapacity(context->common.channelmix, size / 2))
 				return FALSE;
 
 			/* Simply drop second channel.
@@ -211,12 +212,12 @@ static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 			for (size_t x = 0; x < samples; x++)
 			{
 				for (size_t y = 0; y < bpp; y++)
-					Stream_Write_UINT8(context->channelmix, src[2 * x * bpp + y]);
+					Stream_Write_UINT8(context->common.channelmix, src[2 * x * bpp + y]);
 			}
 
-			Stream_SealLength(context->channelmix);
-			*data = Stream_Buffer(context->channelmix);
-			*length = Stream_Length(context->channelmix);
+			Stream_SealLength(context->common.channelmix);
+			*data = Stream_Buffer(context->common.channelmix);
+			*length = Stream_Length(context->common.channelmix);
 			return TRUE;
 
 		case 1:  /* Invalid, do we want to use a 0 channel sound? */
@@ -262,7 +263,7 @@ static BOOL freerdp_dsp_resample(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	format.wFormatTag = WAVE_FORMAT_UNKNOWN;
 	format.wBitsPerSample = 0;
 
-	if (audio_format_compatible(&format, &context->format))
+	if (audio_format_compatible(&format, &context->common.format))
 	{
 		*data = src;
 		*length = size;
@@ -271,25 +272,27 @@ static BOOL freerdp_dsp_resample(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 
 #if defined(WITH_SOXR)
 	srcBytesPerFrame = (srcFormat->wBitsPerSample > 8) ? 2 : 1;
-	dstBytesPerFrame = (context->format.wBitsPerSample > 8) ? 2 : 1;
+	dstBytesPerFrame = (context->common.format.wBitsPerSample > 8) ? 2 : 1;
 	srcChannels = srcFormat->nChannels;
-	dstChannels = context->format.nChannels;
+	dstChannels = context->common.format.nChannels;
 	sbytes = srcChannels * srcBytesPerFrame;
 	sframes = size / sbytes;
 	rbytes = dstBytesPerFrame * dstChannels;
 	/* Integer rounding correct division */
-	rframes = (sframes * context->format.nSamplesPerSec + (srcFormat->nSamplesPerSec + 1) / 2) /
-	          srcFormat->nSamplesPerSec;
+	rframes =
+	    (sframes * context->common.format.nSamplesPerSec + (srcFormat->nSamplesPerSec + 1) / 2) /
+	    srcFormat->nSamplesPerSec;
 	rsize = rframes * rbytes;
 
-	if (!Stream_EnsureCapacity(context->resample, rsize))
+	if (!Stream_EnsureCapacity(context->common.resample, rsize))
 		return FALSE;
 
-	error = soxr_process(context->sox, src, sframes, &idone, Stream_Buffer(context->resample),
-	                     Stream_Capacity(context->resample) / rbytes, &odone);
-	Stream_SetLength(context->resample, odone * rbytes);
-	*data = Stream_Buffer(context->resample);
-	*length = Stream_Length(context->resample);
+	error =
+	    soxr_process(context->sox, src, sframes, &idone, Stream_Buffer(context->common.resample),
+	                 Stream_Capacity(context->common.resample) / rbytes, &odone);
+	Stream_SetLength(context->common.resample, odone * rbytes);
+	*data = Stream_Buffer(context->common.resample);
+	*length = Stream_Length(context->common.resample);
 	return (error == 0) ? TRUE : FALSE;
 #else
 	WLog_ERR(TAG, "Missing resample support, recompile -DWITH_SOXR=ON or -DWITH_DSP_FFMPEG=ON");
@@ -364,8 +367,8 @@ static BOOL freerdp_dsp_decode_ima_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT con
 	UINT16 decoded;
 	size_t out_size = size * 4;
 	UINT32 channel;
-	const UINT32 block_size = context->format.nBlockAlign;
-	const UINT32 channels = context->format.nChannels;
+	const UINT32 block_size = context->common.format.nBlockAlign;
+	const UINT32 channels = context->common.format.nChannels;
 
 	if (!Stream_EnsureCapacity(out, out_size))
 		return FALSE;
@@ -508,20 +511,20 @@ static BOOL freerdp_dsp_decode_mp3(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	if (!context || !src || !out)
 		return FALSE;
 
-	buffer_size = 2 * context->format.nChannels * context->format.nSamplesPerSec;
+	buffer_size = 2 * context->common.format.nChannels * context->common.format.nSamplesPerSec;
 
-	if (!Stream_EnsureCapacity(context->buffer, 2 * buffer_size))
+	if (!Stream_EnsureCapacity(context->common.buffer, 2 * buffer_size))
 		return FALSE;
 
-	pcm_l = (short*)Stream_Buffer(context->buffer);
-	pcm_r = (short*)Stream_Buffer(context->buffer) + buffer_size;
+	pcm_l = (short*)Stream_Buffer(context->common.buffer);
+	pcm_r = (short*)Stream_Buffer(context->common.buffer) + buffer_size;
 	rc = hip_decode(context->hip, (unsigned char*)/* API is not modifying content */ src, size,
 	                pcm_l, pcm_r);
 
 	if (rc <= 0)
 		return FALSE;
 
-	if (!Stream_EnsureRemainingCapacity(out, (size_t)rc * context->format.nChannels * 2))
+	if (!Stream_EnsureRemainingCapacity(out, (size_t)rc * context->common.format.nChannels * 2))
 		return FALSE;
 
 	for (size_t x = 0; x < rc; x++)
@@ -543,13 +546,14 @@ static BOOL freerdp_dsp_encode_mp3(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	if (!context || !src || !out)
 		return FALSE;
 
-	samples_per_channel = size / context->format.nChannels / context->format.wBitsPerSample / 8;
+	samples_per_channel =
+	    size / context->common.format.nChannels / context->common.format.wBitsPerSample / 8;
 
 	/* Ensure worst case buffer size for mp3 stream taken from LAME header */
 	if (!Stream_EnsureRemainingCapacity(out, 5 / 4 * samples_per_channel + 7200))
 		return FALSE;
 
-	samples_per_channel = size / 2 /* size of a sample */ / context->format.nChannels;
+	samples_per_channel = size / 2 /* size of a sample */ / context->common.format.nChannels;
 	rc = lame_encode_buffer_interleaved(context->lame, (short*)src, samples_per_channel,
 	                                    Stream_Pointer(out), Stream_GetRemainingCapacity(out));
 
@@ -574,27 +578,27 @@ static BOOL freerdp_dsp_encode_faac(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	if (!context || !src || !out)
 		return FALSE;
 
-	bpp = context->format.wBitsPerSample / 8;
+	bpp = context->common.format.wBitsPerSample / 8;
 	nrSamples = size / bpp;
 
-	if (!Stream_EnsureRemainingCapacity(context->buffer, nrSamples * sizeof(int16_t)))
+	if (!Stream_EnsureRemainingCapacity(context->common.buffer, nrSamples * sizeof(int16_t)))
 		return FALSE;
 
 	for (size_t x = 0; x < nrSamples; x++)
 	{
-		Stream_Write_INT16(context->buffer, inSamples[x]);
-		if (Stream_GetPosition(context->buffer) / bpp >= context->faacInputSamples)
+		Stream_Write_INT16(context->common.buffer, inSamples[x]);
+		if (Stream_GetPosition(context->common.buffer) / bpp >= context->faacInputSamples)
 		{
 			if (!Stream_EnsureRemainingCapacity(out, context->faacMaxOutputBytes))
 				return FALSE;
-			rc = faacEncEncode(context->faac, (int32_t*)Stream_Buffer(context->buffer),
+			rc = faacEncEncode(context->faac, (int32_t*)Stream_Buffer(context->common.buffer),
 			                   context->faacInputSamples, Stream_Pointer(out),
 			                   Stream_GetRemainingCapacity(out));
 			if (rc < 0)
 				return FALSE;
 			if (rc > 0)
 				Stream_Seek(out, (size_t)rc);
-			Stream_SetPosition(context->buffer, 0);
+			Stream_SetPosition(context->common.buffer, 0);
 		}
 	}
 
@@ -614,15 +618,15 @@ static BOOL freerdp_dsp_decode_opus(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		return FALSE;
 
 	/* Max packet duration is 120ms (5760 at 48KHz) */
-	max_size = OPUS_MAX_FRAMES * context->format.nChannels * sizeof(int16_t);
-	if (!Stream_EnsureRemainingCapacity(context->buffer, max_size))
+	max_size = OPUS_MAX_FRAMES * context->common.format.nChannels * sizeof(int16_t);
+	if (!Stream_EnsureRemainingCapacity(context->common.buffer, max_size))
 		return FALSE;
 
 	frames = opus_decode(context->opus_decoder, src, size, Stream_Pointer(out), OPUS_MAX_FRAMES, 0);
 	if (frames < 0)
 		return FALSE;
 
-	Stream_Seek(out, frames * context->format.nChannels * sizeof(int16_t));
+	Stream_Seek(out, frames * context->common.format.nChannels * sizeof(int16_t));
 
 	return TRUE;
 }
@@ -635,17 +639,17 @@ static BOOL freerdp_dsp_encode_opus(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		return FALSE;
 
 	/* Max packet duration is 120ms (5760 at 48KHz) */
-	const size_t max_size = OPUS_MAX_FRAMES * context->format.nChannels * sizeof(int16_t);
-	if (!Stream_EnsureRemainingCapacity(context->buffer, max_size))
+	const size_t max_size = OPUS_MAX_FRAMES * context->common.format.nChannels * sizeof(int16_t);
+	if (!Stream_EnsureRemainingCapacity(context->common.buffer, max_size))
 		return FALSE;
 
-	const int src_frames = size / sizeof(opus_int16) / context->format.nChannels;
+	const int src_frames = size / sizeof(opus_int16) / context->common.format.nChannels;
 	const opus_int16* src_data = (const opus_int16*)src;
 	const int frames =
 	    opus_encode(context->opus_encoder, src_data, src_frames, Stream_Pointer(out), max_size);
 	if (frames < 0)
 		return FALSE;
-	return Stream_SafeSeek(out, frames * context->format.nChannels * sizeof(int16_t));
+	return Stream_SafeSeek(out, frames * context->common.format.nChannels * sizeof(int16_t));
 }
 #endif
 
@@ -677,10 +681,10 @@ static BOOL freerdp_dsp_decode_faad(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		if (err != 0)
 			return FALSE;
 
-		if (channels != context->format.nChannels)
+		if (channels != context->common.format.nChannels)
 			return FALSE;
 
-		if (samplerate != context->format.nSamplesPerSec)
+		if (samplerate != context->common.format.nSamplesPerSec)
 			return FALSE;
 
 		context->faadSetup = TRUE;
@@ -695,8 +699,8 @@ static BOOL freerdp_dsp_decode_faad(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		} cnv;
 		size_t outSize;
 		void* sample_buffer;
-		outSize = context->format.nSamplesPerSec * context->format.nChannels *
-		          context->format.wBitsPerSample / 8;
+		outSize = context->common.format.nSamplesPerSec * context->common.format.nChannels *
+		          context->common.format.wBitsPerSample / 8;
 
 		if (!Stream_EnsureRemainingCapacity(out, outSize))
 			return FALSE;
@@ -715,7 +719,7 @@ static BOOL freerdp_dsp_decode_faad(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		if (info.samples == 0)
 			continue;
 
-		Stream_Seek(out, info.samples * context->format.wBitsPerSample / 8);
+		Stream_Seek(out, info.samples * context->common.format.wBitsPerSample / 8);
 	}
 
 	return TRUE;
@@ -811,31 +815,35 @@ static BOOL freerdp_dsp_encode_ima_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT con
 
 	if (!Stream_EnsureRemainingCapacity(out, size))
 		return FALSE;
+	if (!Stream_EnsureRemainingCapacity(context->common.buffer, size + 64))
+		return FALSE;
 
-	align = (context->format.nChannels > 1) ? 32 : 4;
+	align = (context->common.format.nChannels > 1) ? 32 : 4;
 
 	while (size >= align)
 	{
-		if (Stream_GetPosition(context->buffer) % context->format.nBlockAlign == 0)
+		if (Stream_GetPosition(context->common.buffer) % context->common.format.nBlockAlign == 0)
 		{
-			Stream_Write_UINT8(context->buffer, context->adpcm.ima.last_sample[0] & 0xFF);
-			Stream_Write_UINT8(context->buffer, (context->adpcm.ima.last_sample[0] >> 8) & 0xFF);
-			Stream_Write_UINT8(context->buffer, (BYTE)context->adpcm.ima.last_step[0]);
-			Stream_Write_UINT8(context->buffer, 0);
+			Stream_Write_UINT8(context->common.buffer, context->adpcm.ima.last_sample[0] & 0xFF);
+			Stream_Write_UINT8(context->common.buffer,
+			                   (context->adpcm.ima.last_sample[0] >> 8) & 0xFF);
+			Stream_Write_UINT8(context->common.buffer, (BYTE)context->adpcm.ima.last_step[0]);
+			Stream_Write_UINT8(context->common.buffer, 0);
 
-			if (context->format.nChannels > 1)
+			if (context->common.format.nChannels > 1)
 			{
-				Stream_Write_UINT8(context->buffer, context->adpcm.ima.last_sample[1] & 0xFF);
-				Stream_Write_UINT8(context->buffer,
+				Stream_Write_UINT8(context->common.buffer,
+				                   context->adpcm.ima.last_sample[1] & 0xFF);
+				Stream_Write_UINT8(context->common.buffer,
 				                   (context->adpcm.ima.last_sample[1] >> 8) & 0xFF);
-				Stream_Write_UINT8(context->buffer, (BYTE)context->adpcm.ima.last_step[1]);
-				Stream_Write_UINT8(context->buffer, 0);
+				Stream_Write_UINT8(context->common.buffer, (BYTE)context->adpcm.ima.last_step[1]);
+				Stream_Write_UINT8(context->common.buffer, 0);
 			}
 		}
 
-		if (context->format.nChannels > 1)
+		if (context->common.format.nChannels > 1)
 		{
-			BYTE* dst = Stream_Pointer(context->buffer);
+			BYTE* dst = Stream_Pointer(context->common.buffer);
 			ZeroMemory(dst, 8);
 
 			for (size_t i = 0; i < 16; i++)
@@ -847,7 +855,7 @@ static BOOL freerdp_dsp_encode_ima_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT con
 				                                          << ima_stereo_encode_map[i].byte_shift;
 			}
 
-			if (!Stream_SafeSeek(context->buffer, 8))
+			if (!Stream_SafeSeek(context->common.buffer, 8))
 				return FALSE;
 			size -= 32;
 		}
@@ -859,15 +867,15 @@ static BOOL freerdp_dsp_encode_ima_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT con
 			sample = (INT16)(((UINT16)(*src)) | (((UINT16)(*(src + 1))) << 8));
 			src += 2;
 			encoded |= dsp_encode_ima_adpcm_sample(&context->adpcm, 0, sample) << 4;
-			Stream_Write_UINT8(context->buffer, encoded);
+			Stream_Write_UINT8(context->common.buffer, encoded);
 			size -= 4;
 		}
 
-		if (Stream_GetPosition(context->buffer) >= context->adpcm.ima.packet_size)
+		if (Stream_GetPosition(context->common.buffer) >= context->adpcm.ima.packet_size)
 		{
-			BYTE* bsrc = Stream_Buffer(context->buffer);
+			BYTE* bsrc = Stream_Buffer(context->common.buffer);
 			Stream_Write(out, bsrc, context->adpcm.ima.packet_size);
-			Stream_SetPosition(context->buffer, 0);
+			Stream_SetPosition(context->common.buffer, 0);
 		}
 	}
 
@@ -919,8 +927,8 @@ static BOOL freerdp_dsp_decode_ms_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT cont
 {
 	BYTE sample;
 	const size_t out_size = size * 4;
-	const UINT32 channels = context->format.nChannels;
-	const UINT32 block_size = context->format.nBlockAlign;
+	const UINT32 channels = context->common.format.nChannels;
+	const UINT32 block_size = context->common.format.nBlockAlign;
 
 	if (!Stream_EnsureCapacity(out, out_size))
 		return FALSE;
@@ -1037,7 +1045,7 @@ static BOOL freerdp_dsp_encode_ms_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT cont
 {
 	size_t start;
 	INT32 sample;
-	const size_t step = 8 + ((context->format.nChannels > 1) ? 4 : 0);
+	const size_t step = 8 + ((context->common.format.nChannels > 1) ? 4 : 0);
 
 	if (!Stream_EnsureRemainingCapacity(out, size))
 		return FALSE;
@@ -1053,9 +1061,9 @@ static BOOL freerdp_dsp_encode_ms_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT cont
 	while (size >= step)
 	{
 		BYTE val;
-		if ((Stream_GetPosition(out) - start) % context->format.nBlockAlign == 0)
+		if ((Stream_GetPosition(out) - start) % context->common.format.nBlockAlign == 0)
 		{
-			if (context->format.nChannels > 1)
+			if (context->common.format.nChannels > 1)
 			{
 				Stream_Write_UINT8(out, context->adpcm.ms.predictor[0]);
 				Stream_Write_UINT8(out, context->adpcm.ms.predictor[1]);
@@ -1102,7 +1110,7 @@ static BOOL freerdp_dsp_encode_ms_adpcm(FREERDP_DSP_CONTEXT* WINPR_RESTRICT cont
 
 		Stream_Read_UINT8(out, val);
 		val += freerdp_dsp_encode_ms_adpcm_sample(&context->adpcm, sample,
-		                                          context->format.nChannels > 1 ? 1 : 0);
+		                                          context->common.format.nChannels > 1 ? 1 : 0);
 		Stream_Write_UINT8(out, val);
 		size -= 4;
 	}
@@ -1122,22 +1130,8 @@ FREERDP_DSP_CONTEXT* freerdp_dsp_context_new(BOOL encoder)
 	if (!context)
 		return NULL;
 
-	context->channelmix = Stream_New(NULL, 4096);
+	freerdp_dsp_common_context_init(&context->common, encoder);
 
-	if (!context->channelmix)
-		goto fail;
-
-	context->resample = Stream_New(NULL, 4096);
-
-	if (!context->resample)
-		goto fail;
-
-	context->buffer = Stream_New(NULL, 4096);
-
-	if (!context->buffer)
-		goto fail;
-
-	context->encoder = encoder;
 #if defined(WITH_GSM)
 	context->gsm = gsm_create();
 
@@ -1197,15 +1191,14 @@ void freerdp_dsp_context_free(FREERDP_DSP_CONTEXT* context)
 
 	if (context)
 	{
-		Stream_Free(context->channelmix, TRUE);
-		Stream_Free(context->resample, TRUE);
-		Stream_Free(context->buffer, TRUE);
+		freerdp_dsp_common_context_uninit(&context->common);
+
 #if defined(WITH_GSM)
 		gsm_destroy(context->gsm);
 #endif
 #if defined(WITH_LAME)
 
-		if (context->encoder)
+		if (context->common.encoder)
 			lame_close(context->lame);
 		else
 			hip_decode_exit(context->hip);
@@ -1221,7 +1214,7 @@ void freerdp_dsp_context_free(FREERDP_DSP_CONTEXT* context)
 #endif
 #if defined(WITH_FAAD2)
 
-		if (!context->encoder)
+		if (!context->common.encoder)
 			NeAACDecClose(context->faad);
 
 #endif
@@ -1244,6 +1237,18 @@ BOOL freerdp_dsp_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
                         const AUDIO_FORMAT* WINPR_RESTRICT srcFormat,
                         const BYTE* WINPR_RESTRICT data, size_t length, wStream* WINPR_RESTRICT out)
 {
+#if defined(WITH_FDK_AAC)
+	FREERDP_DSP_COMMON_CONTEXT* ctx = (FREERDP_DSP_COMMON_CONTEXT*)context;
+	WINPR_ASSERT(ctx);
+	switch (ctx->format.wFormatTag)
+	{
+		case WAVE_FORMAT_AAC_MS:
+			return fdk_aac_dsp_encode(ctx, srcFormat, data, length, out);
+		default:
+			break;
+	}
+#endif
+
 #if defined(WITH_DSP_FFMPEG)
 	return freerdp_dsp_ffmpeg_encode(context, srcFormat, data, length, out);
 #else
@@ -1251,7 +1256,7 @@ BOOL freerdp_dsp_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	size_t resampleLength;
 	AUDIO_FORMAT format;
 
-	if (!context || !context->encoder || !srcFormat || !data || !out)
+	if (!context || !context->common.encoder || !srcFormat || !data || !out)
 		return FALSE;
 
 	format = *srcFormat;
@@ -1259,12 +1264,12 @@ BOOL freerdp_dsp_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	if (!freerdp_dsp_channel_mix(context, data, length, srcFormat, &resampleData, &resampleLength))
 		return FALSE;
 
-	format.nChannels = context->format.nChannels;
+	format.nChannels = context->common.format.nChannels;
 
 	if (!freerdp_dsp_resample(context, resampleData, resampleLength, &format, &data, &length))
 		return FALSE;
 
-	switch (context->format.wFormatTag)
+	switch (context->common.format.wFormatTag)
 	{
 		case WAVE_FORMAT_PCM:
 			if (!Stream_EnsureRemainingCapacity(out, length))
@@ -1310,14 +1315,26 @@ BOOL freerdp_dsp_decode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
                         const AUDIO_FORMAT* WINPR_RESTRICT srcFormat,
                         const BYTE* WINPR_RESTRICT data, size_t length, wStream* WINPR_RESTRICT out)
 {
+#if defined(WITH_FDK_AAC)
+	FREERDP_DSP_COMMON_CONTEXT* ctx = (FREERDP_DSP_COMMON_CONTEXT*)context;
+	WINPR_ASSERT(ctx);
+	switch (ctx->format.wFormatTag)
+	{
+		case WAVE_FORMAT_AAC_MS:
+			return fdk_aac_dsp_decode(ctx, srcFormat, data, length, out);
+		default:
+			break;
+	}
+#endif
+
 #if defined(WITH_DSP_FFMPEG)
 	return freerdp_dsp_ffmpeg_decode(context, srcFormat, data, length, out);
 #else
 
-	if (!context || context->encoder || !srcFormat || !data || !out)
+	if (!context || context->common.encoder || !srcFormat || !data || !out)
 		return FALSE;
 
-	switch (context->format.wFormatTag)
+	switch (context->common.format.wFormatTag)
 	{
 		case WAVE_FORMAT_PCM:
 			if (!Stream_EnsureRemainingCapacity(out, length))
@@ -1361,6 +1378,17 @@ BOOL freerdp_dsp_decode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 
 BOOL freerdp_dsp_supports_format(const AUDIO_FORMAT* WINPR_RESTRICT format, BOOL encode)
 {
+#if defined(WITH_FDK_AAC)
+	switch (format->wFormatTag)
+	{
+		case WAVE_FORMAT_AAC_MS:
+			return TRUE;
+		default:
+			break;
+	}
+
+#endif
+
 #if defined(WITH_DSP_FFMPEG)
 	return freerdp_dsp_ffmpeg_supports_format(format, encode);
 #else
@@ -1428,6 +1456,17 @@ BOOL freerdp_dsp_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
                                const AUDIO_FORMAT* WINPR_RESTRICT targetFormat,
                                UINT32 FramesPerPacket)
 {
+#if defined(WITH_FDK_AAC)
+	WINPR_ASSERT(targetFormat);
+	if (targetFormat->wFormatTag == WAVE_FORMAT_AAC_MS)
+	{
+		FREERDP_DSP_COMMON_CONTEXT* ctx = (FREERDP_DSP_COMMON_CONTEXT*)context;
+		fdk_aac_dsp_uninit(ctx);
+		ctx->format = *targetFormat;
+		return fdk_aac_dsp_init(ctx, FramesPerPacket);
+	}
+#endif
+
 #if defined(WITH_DSP_FFMPEG)
 	return freerdp_dsp_ffmpeg_context_reset(context, targetFormat);
 #else
@@ -1435,33 +1474,35 @@ BOOL freerdp_dsp_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	if (!context || !targetFormat)
 		return FALSE;
 
-	context->format = *targetFormat;
+	context->common.format = *targetFormat;
 
-	if (context->format.wFormatTag == WAVE_FORMAT_DVI_ADPCM)
+	if (context->common.format.wFormatTag == WAVE_FORMAT_DVI_ADPCM)
 	{
-		size_t min_frame_data =
-		    1ull * context->format.wBitsPerSample * context->format.nChannels * FramesPerPacket;
-		size_t data_per_block = (context->format.nBlockAlign - 4 * context->format.nChannels) * 8;
+		size_t min_frame_data = 1ull * context->common.format.wBitsPerSample *
+		                        context->common.format.nChannels * FramesPerPacket;
+		size_t data_per_block =
+		    (context->common.format.nBlockAlign - 4 * context->common.format.nChannels) * 8;
 		size_t nb_block_per_packet = min_frame_data / data_per_block;
 
 		if (min_frame_data % data_per_block)
 			nb_block_per_packet++;
 
-		context->adpcm.ima.packet_size = nb_block_per_packet * context->format.nBlockAlign;
-		Stream_EnsureCapacity(context->buffer, context->adpcm.ima.packet_size);
-		Stream_SetPosition(context->buffer, 0);
+		context->adpcm.ima.packet_size = nb_block_per_packet * context->common.format.nBlockAlign;
+		Stream_EnsureCapacity(context->common.buffer, context->adpcm.ima.packet_size);
+		Stream_SetPosition(context->common.buffer, 0);
 	}
 
 #if defined(WITH_OPUS)
 
-	if (opus_is_valid_samplerate(&context->format))
+	if (opus_is_valid_samplerate(&context->common.format))
 	{
-		if (!context->encoder)
+		if (!context->common.encoder)
 		{
 			int opus_error = OPUS_OK;
 
-			context->opus_decoder = opus_decoder_create(context->format.nSamplesPerSec,
-			                                            context->format.nChannels, &opus_error);
+			context->opus_decoder =
+			    opus_decoder_create(context->common.format.nSamplesPerSec,
+			                        context->common.format.nChannels, &opus_error);
 			if (opus_error != OPUS_OK)
 				return FALSE;
 		}
@@ -1469,14 +1510,15 @@ BOOL freerdp_dsp_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		{
 			int opus_error = OPUS_OK;
 
-			context->opus_encoder =
-			    opus_encoder_create(context->format.nSamplesPerSec, context->format.nChannels,
-			                        OPUS_APPLICATION_VOIP, &opus_error);
+			context->opus_encoder = opus_encoder_create(context->common.format.nSamplesPerSec,
+			                                            context->common.format.nChannels,
+			                                            OPUS_APPLICATION_VOIP, &opus_error);
 			if (opus_error != OPUS_OK)
 				return FALSE;
 
-			opus_error = opus_encoder_ctl(context->opus_encoder,
-			                              OPUS_SET_BITRATE(context->format.nAvgBytesPerSec * 8));
+			opus_error =
+			    opus_encoder_ctl(context->opus_encoder,
+			                     OPUS_SET_BITRATE(context->common.format.nAvgBytesPerSec * 8));
 			if (opus_error != OPUS_OK)
 				return FALSE;
 		}
@@ -1516,8 +1558,9 @@ BOOL freerdp_dsp_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 		soxr_io_spec_t iospec = soxr_io_spec(SOXR_INT16, SOXR_INT16);
 		soxr_error_t error;
 		soxr_delete(context->sox);
-		context->sox = soxr_create(context->format.nSamplesPerSec, targetFormat->nSamplesPerSec,
-		                           targetFormat->nChannels, &error, &iospec, NULL, NULL);
+		context->sox =
+		    soxr_create(context->common.format.nSamplesPerSec, targetFormat->nSamplesPerSec,
+		                targetFormat->nChannels, &error, &iospec, NULL, NULL);
 
 		if (!context->sox || (error != 0))
 			return FALSE;
@@ -1526,3 +1569,39 @@ BOOL freerdp_dsp_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	return TRUE;
 #endif
 }
+
+BOOL freerdp_dsp_common_context_init(FREERDP_DSP_COMMON_CONTEXT* context, BOOL encode)
+{
+	WINPR_ASSERT(context);
+	context->encoder = encode;
+	context->buffer = Stream_New(NULL, 1024);
+	if (!context->buffer)
+		goto fail;
+
+	context->channelmix = Stream_New(NULL, 1024);
+	if (!context->channelmix)
+		goto fail;
+
+	context->resample = Stream_New(NULL, 1024);
+	if (!context->resample)
+		goto fail;
+
+	return TRUE;
+
+fail:
+	freerdp_dsp_common_context_uninit(context);
+	return FALSE;
+}
+
+void freerdp_dsp_common_context_uninit(FREERDP_DSP_COMMON_CONTEXT* context)
+{
+	WINPR_ASSERT(context);
+
+	Stream_Free(context->buffer, TRUE);
+	Stream_Free(context->channelmix, TRUE);
+	Stream_Free(context->resample, TRUE);
+
+	context->buffer = NULL;
+	context->channelmix = NULL;
+	context->resample = NULL;
+}
diff --git a/libfreerdp/codec/dsp.h b/libfreerdp/codec/dsp.h
index 1325c31e2..c6d2e51a6 100644
--- a/libfreerdp/codec/dsp.h
+++ b/libfreerdp/codec/dsp.h
@@ -25,10 +25,22 @@
 #include <freerdp/codec/audio.h>
 #include <freerdp/codec/dsp.h>
 
-struct S_FREERDP_DSP_COMMON_CONTEXT
+typedef struct
 {
-	wStream* buffer;
-	wStream* resample;
-};
+	ALIGN64 AUDIO_FORMAT format;
+	ALIGN64 BOOL encoder;
+	ALIGN64 wStream* buffer;
+	ALIGN64 wStream* resample;
+	ALIGN64 wStream* channelmix;
+#if defined(WITH_FDK_AAC)
+	ALIGN64 BOOL fdkSetup;
+	ALIGN64 void* fdkAacInstance;
+	ALIGN64 size_t buffersize;
+	ALIGN64 unsigned frames_per_packet;
+#endif
+} FREERDP_DSP_COMMON_CONTEXT;
+
+BOOL freerdp_dsp_common_context_init(FREERDP_DSP_COMMON_CONTEXT* context, BOOL encode);
+void freerdp_dsp_common_context_uninit(FREERDP_DSP_COMMON_CONTEXT* context);
 
 #endif /* FREERDP_LIB_CODEC_DSP_H */
diff --git a/libfreerdp/codec/dsp_fdk_aac.c b/libfreerdp/codec/dsp_fdk_aac.c
new file mode 100644
index 000000000..61cb80ed9
--- /dev/null
+++ b/libfreerdp/codec/dsp_fdk_aac.c
@@ -0,0 +1,154 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Digital Sound Processing
+ *
+ * Copyright 2022 Armin Novak <anovak@thincast.com>
+ * Copyright 2022 Thincast Technologies GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp_fdk_aac.h"
+#include "dsp_fdk_impl.h"
+
+#include <freerdp/log.h>
+#define TAG FREERDP_TAG("dsp.fdk")
+
+static void write_log(unsigned log_level, const char* fmt, ...)
+{
+	wLog* log = WLog_Get(TAG);
+
+	if (WLog_IsLevelActive(log, log_level))
+	{
+		char buffer[1024] = { 0 };
+
+		va_list ap;
+		va_start(ap, fmt);
+		vsnprintf(buffer, sizeof(buffer), fmt, ap);
+		va_end(ap);
+
+		WLog_PrintMessage(log, WLOG_MESSAGE_TEXT, log_level, __LINE__, __FILE__, __FUNCTION__, "%s",
+		                  buffer);
+	}
+}
+
+BOOL fdk_aac_dsp_encode(FREERDP_DSP_COMMON_CONTEXT* context, const AUDIO_FORMAT* srcFormat,
+                        const BYTE* data, size_t length, wStream* out)
+{
+	WINPR_ASSERT(context);
+	WINPR_ASSERT(srcFormat);
+
+	if (srcFormat->wFormatTag != WAVE_FORMAT_PCM)
+	{
+		WLog_WARN(TAG, "Feeding %s format data to encoder function, but require %s",
+		          audio_format_get_tag_string(srcFormat->wFormatTag),
+		          audio_format_get_tag_string(WAVE_FORMAT_PCM));
+		return FALSE;
+	}
+
+	if (!context->fdkSetup)
+	{
+		ssize_t rc = fdk_aac_dsp_impl_config(
+		    context->fdkAacInstance, &context->buffersize, context->encoder,
+		    context->format.nSamplesPerSec, context->format.nChannels,
+		    context->format.nAvgBytesPerSec, context->frames_per_packet, write_log);
+		if (rc < 0)
+			return FALSE;
+
+		context->fdkSetup = TRUE;
+	}
+
+	if (!Stream_EnsureRemainingCapacity(out, context->buffersize))
+		return FALSE;
+
+	{
+		const ssize_t encoded =
+		    fdk_aac_dsp_impl_encode(context->fdkAacInstance, data, length, Stream_Pointer(out),
+		                            Stream_GetRemainingCapacity(out), write_log);
+		if (encoded < 0)
+			return FALSE;
+		Stream_Seek(out, (size_t)encoded);
+		return TRUE;
+	}
+}
+
+BOOL fdk_aac_dsp_decode(FREERDP_DSP_COMMON_CONTEXT* context, const AUDIO_FORMAT* srcFormat,
+                        const BYTE* data, size_t length, wStream* out)
+{
+	WINPR_ASSERT(context);
+	WINPR_ASSERT(srcFormat);
+
+	if (srcFormat->wFormatTag != WAVE_FORMAT_AAC_MS)
+	{
+		WLog_WARN(TAG, "Feeding %s format data to encoder function, but require %s",
+		          audio_format_get_tag_string(srcFormat->wFormatTag),
+		          audio_format_get_tag_string(WAVE_FORMAT_AAC_MS));
+		return FALSE;
+	}
+
+	if (!context->fdkSetup)
+	{
+		ssize_t rc = fdk_aac_dsp_impl_config(
+		    context->fdkAacInstance, &context->buffersize, context->encoder,
+		    context->format.nSamplesPerSec, context->format.nChannels,
+		    context->format.nAvgBytesPerSec, context->frames_per_packet, write_log);
+		if (rc < 0)
+			return FALSE;
+
+		context->fdkSetup = TRUE;
+	}
+
+	ssize_t rest = 0;
+	do
+	{
+		rest = fdk_aac_dsp_impl_decode_fill(context->fdkAacInstance, data, length, write_log);
+		if (rest < 0)
+		{
+			WLog_WARN(TAG, "DecodeFill() failed");
+			return FALSE;
+		}
+
+		ssize_t ret = -1;
+		do
+		{
+			const size_t expect = context->buffersize;
+			if (!Stream_EnsureRemainingCapacity(out, expect))
+				return FALSE;
+
+			ret = fdk_aac_dsp_impl_decode_read(context->fdkAacInstance, Stream_Pointer(out), expect,
+			                                   write_log);
+			if (ret < 0)
+				return FALSE;
+
+			Stream_Seek(out, (size_t)ret);
+		} while (ret > 0);
+	} while (rest > 0);
+
+	return TRUE;
+}
+
+void fdk_aac_dsp_uninit(FREERDP_DSP_COMMON_CONTEXT* context)
+{
+	WINPR_ASSERT(context);
+
+	fdk_aac_dsp_impl_uninit(&context->fdkAacInstance, context->encoder, write_log);
+}
+
+BOOL fdk_aac_dsp_init(FREERDP_DSP_COMMON_CONTEXT* context, size_t frames_per_packet)
+{
+	WINPR_ASSERT(context);
+	context->fdkSetup = FALSE;
+	WINPR_ASSERT(frames_per_packet <= UINT_MAX);
+	context->frames_per_packet = (unsigned)frames_per_packet;
+	return fdk_aac_dsp_impl_init(&context->fdkAacInstance, context->encoder, write_log);
+}
diff --git a/libfreerdp/codec/dsp_fdk_aac.h b/libfreerdp/codec/dsp_fdk_aac.h
new file mode 100644
index 000000000..fc633a982
--- /dev/null
+++ b/libfreerdp/codec/dsp_fdk_aac.h
@@ -0,0 +1,38 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Digital Sound Processing
+ *
+ * Copyright 2022 Armin Novak <anovak@thincast.com>
+ * Copyright 2022 Thincast Technologies GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FREERDP_DSP_FDK_AAC_H_
+#define FREERDP_DSP_FDK_AAC_H_
+
+#include <winpr/stream.h>
+#include <freerdp/codec/audio.h>
+
+#include "dsp.h"
+
+BOOL fdk_aac_dsp_init(FREERDP_DSP_COMMON_CONTEXT* context, size_t frames_per_packet);
+void fdk_aac_dsp_uninit(FREERDP_DSP_COMMON_CONTEXT* context);
+
+BOOL fdk_aac_dsp_encode(FREERDP_DSP_COMMON_CONTEXT* context, const AUDIO_FORMAT* srcFormat,
+                        const BYTE* data, size_t length, wStream* out);
+
+BOOL fdk_aac_dsp_decode(FREERDP_DSP_COMMON_CONTEXT* context, const AUDIO_FORMAT* srcFormat,
+                        const BYTE* data, size_t length, wStream* out);
+
+#endif
diff --git a/libfreerdp/codec/dsp_fdk_impl.c b/libfreerdp/codec/dsp_fdk_impl.c
new file mode 100644
index 000000000..28ca3d10b
--- /dev/null
+++ b/libfreerdp/codec/dsp_fdk_impl.c
@@ -0,0 +1,597 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Digital Sound Processing
+ *
+ * Copyright 2022 Armin Novak <anovak@thincast.com>
+ * Copyright 2022 Thincast Technologies GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+#include <inttypes.h>
+
+#include <fdk-aac/aacdecoder_lib.h>
+#include <fdk-aac/aacenc_lib.h>
+
+#include "dsp_fdk_impl.h"
+
+#define WLOG_TRACE 0
+#define WLOG_DEBUG 1
+#define WLOG_INFO 2
+#define WLOG_WARN 3
+#define WLOG_ERROR 4
+#define WLOG_FATAL 5
+
+static const char* enc_err_str(AACENC_ERROR err)
+{
+	switch (err)
+	{
+		case AACENC_OK:
+			return "AACENC_OK";
+		case AACENC_INVALID_HANDLE:
+			return "AACENC_INVALID_HANDLE";
+		case AACENC_MEMORY_ERROR:
+			return "AACENC_MEMORY_ERROR";
+		case AACENC_UNSUPPORTED_PARAMETER:
+			return "AACENC_UNSUPPORTED_PARAMETER";
+		case AACENC_INVALID_CONFIG:
+			return "AACENC_INVALID_CONFIG";
+		case AACENC_INIT_ERROR:
+			return "AACENC_INIT_ERROR";
+		case AACENC_INIT_AAC_ERROR:
+			return "AACENC_INIT_AAC_ERROR";
+		case AACENC_INIT_SBR_ERROR:
+			return "AACENC_INIT_SBR_ERROR";
+		case AACENC_INIT_TP_ERROR:
+			return "AACENC_INIT_TP_ERROR";
+		case AACENC_INIT_META_ERROR:
+			return "AACENC_INIT_META_ERROR";
+#ifdef AACENC_INIT_MPS_ERROR
+		case AACENC_INIT_MPS_ERROR:
+			return "AACENC_INIT_MPS_ERROR";
+#endif
+		case AACENC_ENCODE_ERROR:
+			return "AACENC_ENCODE_ERROR";
+		case AACENC_ENCODE_EOF:
+			return "AACENC_ENCODE_EOF";
+		default:
+			return "AACENC_UNKNOWN";
+	}
+}
+
+static const char* dec_err_str(AAC_DECODER_ERROR err)
+{
+	switch (err)
+	{
+		case AAC_DEC_OK:
+			return "AAC_DEC_OK";
+		case AAC_DEC_OUT_OF_MEMORY:
+			return "AAC_DEC_OUT_OF_MEMORY";
+		case AAC_DEC_UNKNOWN:
+			return "AAC_DEC_UNKNOWN";
+		case aac_dec_sync_error_start:
+			return "aac_dec_sync_error_start";
+		case AAC_DEC_TRANSPORT_SYNC_ERROR:
+			return "AAC_DEC_TRANSPORT_SYNC_ERROR";
+		case AAC_DEC_NOT_ENOUGH_BITS:
+			return "AAC_DEC_NOT_ENOUGH_BITS";
+		case aac_dec_sync_error_end:
+			return "aac_dec_sync_error_end";
+		case aac_dec_init_error_start:
+			return "aac_dec_init_error_start";
+		case AAC_DEC_INVALID_HANDLE:
+			return "AAC_DEC_INVALID_HANDLE";
+		case AAC_DEC_UNSUPPORTED_FORMAT:
+			return "AAC_DEC_UNSUPPORTED_FORMAT";
+		case AAC_DEC_UNSUPPORTED_ER_FORMAT:
+			return "AAC_DEC_UNSUPPORTED_ER_FORMAT";
+		case AAC_DEC_UNSUPPORTED_EPCONFIG:
+			return "AAC_DEC_UNSUPPORTED_EPCONFIG";
+		case AAC_DEC_UNSUPPORTED_MULTILAYER:
+			return "AAC_DEC_UNSUPPORTED_MULTILAYER";
+		case AAC_DEC_UNSUPPORTED_CHANNELCONFIG:
+			return "AAC_DEC_UNSUPPORTED_CHANNELCONFIG";
+		case AAC_DEC_UNSUPPORTED_SAMPLINGRATE:
+			return "AAC_DEC_UNSUPPORTED_SAMPLINGRATE";
+		case AAC_DEC_INVALID_SBR_CONFIG:
+			return "AAC_DEC_INVALID_SBR_CONFIG";
+		case AAC_DEC_SET_PARAM_FAIL:
+			return "AAC_DEC_SET_PARAM_FAIL";
+		case AAC_DEC_NEED_TO_RESTART:
+			return "AAC_DEC_NEED_TO_RESTART";
+		case AAC_DEC_OUTPUT_BUFFER_TOO_SMALL:
+			return "AAC_DEC_OUTPUT_BUFFER_TOO_SMALL";
+		case aac_dec_init_error_end:
+			return "aac_dec_init_error_end";
+		case aac_dec_decode_error_start:
+			return "aac_dec_decode_error_start";
+		case AAC_DEC_TRANSPORT_ERROR:
+			return "AAC_DEC_TRANSPORT_ERROR";
+		case AAC_DEC_PARSE_ERROR:
+			return "AAC_DEC_PARSE_ERROR";
+		case AAC_DEC_UNSUPPORTED_EXTENSION_PAYLOAD:
+			return "AAC_DEC_UNSUPPORTED_EXTENSION_PAYLOAD";
+		case AAC_DEC_DECODE_FRAME_ERROR:
+			return "AAC_DEC_DECODE_FRAME_ERROR";
+		case AAC_DEC_CRC_ERROR:
+			return "AAC_DEC_CRC_ERROR";
+		case AAC_DEC_INVALID_CODE_BOOK:
+			return "AAC_DEC_INVALID_CODE_BOOK";
+		case AAC_DEC_UNSUPPORTED_PREDICTION:
+			return "AAC_DEC_UNSUPPORTED_PREDICTION";
+		case AAC_DEC_UNSUPPORTED_CCE:
+			return "AAC_DEC_UNSUPPORTED_CCE";
+		case AAC_DEC_UNSUPPORTED_LFE:
+			return "AAC_DEC_UNSUPPORTED_LFE";
+		case AAC_DEC_UNSUPPORTED_GAIN_CONTROL_DATA:
+			return "AAC_DEC_UNSUPPORTED_GAIN_CONTROL_DATA";
+		case AAC_DEC_UNSUPPORTED_SBA:
+			return "AAC_DEC_UNSUPPORTED_SBA";
+		case AAC_DEC_TNS_READ_ERROR:
+			return "AAC_DEC_TNS_READ_ERROR";
+		case AAC_DEC_RVLC_ERROR:
+			return "AAC_DEC_RVLC_ERROR";
+		case aac_dec_decode_error_end:
+			return "aac_dec_decode_error_end";
+		case aac_dec_anc_data_error_start:
+			return "aac_dec_anc_data_error_start";
+		case AAC_DEC_ANC_DATA_ERROR:
+			return "AAC_DEC_ANC_DATA_ERROR";
+		case AAC_DEC_TOO_SMALL_ANC_BUFFER:
+			return "AAC_DEC_TOO_SMALL_ANC_BUFFER";
+		case AAC_DEC_TOO_MANY_ANC_ELEMENTS:
+			return "AAC_DEC_TOO_MANY_ANC_ELEMENTS";
+		case aac_dec_anc_data_error_end:
+			return "aac_dec_anc_data_error_end";
+		default:
+			return "AAC_DEC unknown value";
+	}
+}
+
+static void log_dec_info(const CStreamInfo* info, void (*log)(const char* fmt, ...))
+{
+	assert(info);
+	assert(log);
+
+	log("info:"
+	    "aacSampleRate: %d, "
+	    "frameSize: %d, "
+	    "numChannels: %d, "
+	    "pChannelType: %p, "
+	    "pChannelIndices: %p, "
+	    "aacSampleRate: %d, "
+	    "profile: %d, "
+	    "aot: %d, " /* TODO: Enum 2 string */
+	    "channelConfig: %d, "
+	    "bitRate: %d, "
+	    "aacSamplesPerFrame: %d, "
+	    "aacNumChannels: %d, "
+	    "extAot: %d" /* TODO: Enum 2 string */
+	    "extSamplingRate: %d, "
+	    "outputDelay: %u, "
+	    "flags: %u, "
+	    "epConfig: %d, "
+	    "numLostAccessUnits: %d, "
+	    "numTotalBytes: %" PRIu64 ", "
+	    "numBadBytes: %" PRIu64 ", "
+	    "numTotalAccessUnits: %" PRIu64 ", "
+	    "numBadAccessUnits: %" PRIu64 ", "
+	    "drcProgRefLev: %d, "
+	    "drcPresMode: %d, ",
+	    info->aacSampleRate, info->frameSize, info->numChannels, info->pChannelType,
+	    info->pChannelIndices, info->aacSampleRate, info->profile, info->aot, info->channelConfig,
+	    info->bitRate, info->aacSamplesPerFrame, info->aacNumChannels, info->extAot,
+	    info->extSamplingRate, info->outputDelay, info->flags, (int)info->epConfig,
+	    info->numLostAccessUnits,
+
+	    info->numTotalBytes, info->numBadBytes, info->numTotalAccessUnits, info->numBadAccessUnits,
+
+	    (int)info->drcProgRefLev, (int)info->drcPresMode);
+}
+
+static void log_enc_info(const AACENC_InfoStruct* info, fdk_log_fkt_t log)
+{
+	size_t x;
+	char confBuf[1024] = { 0 };
+
+	assert(info);
+	assert(log);
+
+	strcat(confBuf, "{");
+	for (x = 0; x < 64; x++)
+	{
+		char tmp[12] = { 0 };
+		sprintf(tmp, "0x%02x", (int)info->confBuf[x]);
+		if (x > 0)
+			strcat(confBuf, ", ");
+		strcat(confBuf, tmp);
+	}
+	strcat(confBuf, "}");
+
+	log(WLOG_DEBUG,
+	    "[encoder info] "
+	    "maxOutBufBytes : %u, "
+	    "maxAncBytes    : %u, "
+	    "inBufFillLevel : %u, "
+	    "inputChannels  : %u, "
+	    "frameLength    : %u, "
+#ifdef MODE_7_1_BACK
+	    "nDelay         : %u, "
+	    "nDelayCore     : %u, "
+#endif
+	    "confBuf[64]    : %s, "
+	    "confSize       : %u",
+	    info->maxOutBufBytes, info->maxAncBytes, info->inBufFillLevel, info->inputChannels,
+	    info->frameLength,
+#ifdef MODE_7_1_BACK
+	    info->nDelay, info->nDelayCore,
+#endif
+	    confBuf, info->confSize);
+}
+
+static const char* aac_enc_param_str(AACENC_PARAM param)
+{
+	switch (param)
+	{
+		case AACENC_AOT:
+			return "AACENC_AOT";
+		case AACENC_BITRATE:
+			return "AACENC_BITRATE";
+		case AACENC_BITRATEMODE:
+			return "AACENC_BITRATEMODE";
+		case AACENC_SAMPLERATE:
+			return "AACENC_SAMPLERATE";
+		case AACENC_SBR_MODE:
+			return "AACENC_SBR_MODE";
+		case AACENC_GRANULE_LENGTH:
+			return "AACENC_GRANULE_LENGTH";
+		case AACENC_CHANNELMODE:
+			return "AACENC_CHANNELMODE";
+		case AACENC_CHANNELORDER:
+			return "AACENC_CHANNELORDER";
+		case AACENC_SBR_RATIO:
+			return "AACENC_SBR_RATIO";
+		case AACENC_AFTERBURNER:
+			return "AACENC_AFTERBURNER";
+		case AACENC_BANDWIDTH:
+			return "AACENC_BANDWIDTH";
+		case AACENC_PEAK_BITRATE:
+			return "AACENC_PEAK_BITRATE";
+		case AACENC_TRANSMUX:
+			return "AACENC_TRANSMUX";
+		case AACENC_HEADER_PERIOD:
+			return "AACENC_HEADER_PERIOD";
+		case AACENC_SIGNALING_MODE:
+			return "AACENC_SIGNALING_MODE";
+		case AACENC_TPSUBFRAMES:
+			return "AACENC_TPSUBFRAMES";
+		case AACENC_AUDIOMUXVER:
+			return "AACENC_AUDIOMUXVER";
+		case AACENC_PROTECTION:
+			return "AACENC_PROTECTION";
+		case AACENC_ANCILLARY_BITRATE:
+			return "AACENC_ANCILLARY_BITRATE";
+		case AACENC_METADATA_MODE:
+			return "AACENC_METADATA_MODE";
+		case AACENC_CONTROL_STATE:
+			return "AACENC_CONTROL_STATE";
+		default:
+			return "AACENC_UNKNOWN";
+	}
+}
+
+int fdk_aac_dsp_impl_init(void** handle, int encoder, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert(log);
+
+	if (encoder)
+	{
+		HANDLE_AACENCODER* h = (HANDLE_AACENCODER*)handle;
+		AACENC_ERROR err = aacEncOpen(h, 0, 0);
+		if (err != AACENC_OK)
+		{
+			log(WLOG_ERROR, "aacEncOpen failed with %s", enc_err_str(err));
+			return 0;
+		}
+	}
+	else
+	{
+		HANDLE_AACDECODER* h = (HANDLE_AACDECODER*)handle;
+		assert(NULL == *h);
+
+		*h = aacDecoder_Open(TT_MP4_RAW, 1);
+		if (!*h)
+		{
+			log(WLOG_ERROR, "aacDecoder_Open failed");
+			return 0;
+		}
+	}
+	return 1;
+}
+
+void fdk_aac_dsp_impl_uninit(void** handle, int encoder, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert(log);
+
+	if (encoder)
+	{
+		HANDLE_AACENCODER* h = (HANDLE_AACENCODER*)handle;
+		AACENC_ERROR err = aacEncClose(h);
+		if (err != AACENC_OK)
+			log(WLOG_ERROR, "aacEncClose failed with %s", enc_err_str(err));
+	}
+	else
+	{
+		HANDLE_AACDECODER* h = (HANDLE_AACDECODER*)handle;
+		if (h)
+			aacDecoder_Close(*h);
+	}
+
+	*handle = NULL;
+}
+
+ssize_t fdk_aac_dsp_impl_decode_read(void* handle, void* dst, size_t dstSize, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert((dstSize / sizeof(INT_PCM)) <= INT_MAX);
+
+	const INT nrsamples = (INT)(dstSize / sizeof(INT_PCM));
+	UINT flags = 0;
+	HANDLE_AACDECODER self = (HANDLE_AACDECODER)handle;
+	AAC_DECODER_ERROR err = aacDecoder_DecodeFrame(self, dst, nrsamples, flags);
+	switch (err)
+	{
+		case AAC_DEC_OK:
+			return fdk_aac_dsp_impl_stream_info(handle, 0, log);
+		case AAC_DEC_NOT_ENOUGH_BITS:
+			return 0;
+		default:
+			log(WLOG_ERROR, "aacDecoder_DecodeFrame failed with %s", dec_err_str(err));
+			return -1;
+	}
+}
+
+static unsigned get_channelmode(unsigned channels)
+{
+	switch (channels)
+	{
+		case 1:
+			return MODE_1;
+		case 2:
+			return MODE_2;
+		case 3:
+			return MODE_1_2;
+		case 4:
+			return MODE_1_2_1;
+		case 5:
+			return MODE_1_2_2;
+		case 6:
+			return MODE_1_2_2_1;
+		case 7:
+			return MODE_1_2_2_2_1;
+#ifdef MODE_7_1_BACK
+		case 8:
+			return MODE_7_1_BACK;
+#endif
+
+		default:
+			return MODE_2;
+	}
+}
+
+int fdk_aac_dsp_impl_config(void* handle, size_t* pbuffersize, int encoder, unsigned samplerate,
+                            unsigned channels, unsigned bytes_per_second,
+                            unsigned frames_per_packet, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert(log);
+	assert(pbuffersize);
+
+	log(WLOG_DEBUG,
+	    "fdk_aac_dsp_impl_config: samplerate: %ld, channels: %ld, bytes_pers_second: %ld",
+	    samplerate, channels, bytes_per_second);
+
+	size_t x;
+	AACENC_ERROR err;
+	struct t_param_pair
+	{
+		AACENC_PARAM param;
+		UINT value;
+	};
+
+	const struct t_param_pair params[] = { { AACENC_AOT, 2 },
+		                                   { AACENC_SAMPLERATE, samplerate },
+		                                   { AACENC_CHANNELMODE, get_channelmode(channels) },
+		                                   { AACENC_CHANNELORDER, 0 },
+		                                   { AACENC_BITRATE, bytes_per_second * 8 },
+		                                   { AACENC_TRANSMUX, 0 },
+		                                   { AACENC_AFTERBURNER, 1 } };
+	HANDLE_AACENCODER self;
+	if (encoder)
+		self = (HANDLE_AACENCODER)handle;
+	else
+	{
+		AACENC_ERROR err = aacEncOpen(&self, 0, channels);
+		if (err != AACENC_OK)
+		{
+			log(WLOG_ERROR, "aacEncOpen failed with %s", enc_err_str(err));
+			return -1;
+		}
+	}
+
+	for (x = 0; x < sizeof(params) / sizeof(params[0]); x++)
+	{
+		const struct t_param_pair* param = &params[x];
+
+		err = aacEncoder_SetParam(self, param->param, param->value);
+		if (err != AACENC_OK)
+		{
+			log(WLOG_ERROR, "aacEncoder_SetParam(%s, %d) failed with %s",
+			    aac_enc_param_str(param->param), param->value, enc_err_str(err));
+			return -1;
+		}
+	}
+
+	err = aacEncEncode(self, NULL, NULL, NULL, NULL);
+	if (err != AACENC_OK)
+	{
+		log(WLOG_ERROR, "aacEncEncode failed with %s", enc_err_str(err));
+		return -1;
+	}
+
+	AACENC_InfoStruct info = { 0 };
+	err = aacEncInfo(self, &info);
+	if (err != AACENC_OK)
+	{
+		log(WLOG_ERROR, "aacEncInfo failed with %s", enc_err_str(err));
+		return -1;
+	}
+
+	if (encoder)
+	{
+		*pbuffersize = info.maxOutBufBytes;
+		log_enc_info(&info, log);
+		return 0;
+	}
+	else
+	{
+		err = aacEncClose(&self);
+		if (err != AACENC_OK)
+			log(WLOG_WARN, "aacEncClose failed with %s", enc_err_str(err));
+
+		*pbuffersize = info.frameLength * info.inputChannels * sizeof(INT_PCM);
+
+		AAC_DECODER_ERROR decerr;
+		HANDLE_AACDECODER aacdec = (HANDLE_AACDECODER)handle;
+
+		UCHAR* asc[] = { info.confBuf };
+		UINT ascSize[] = { info.confSize };
+
+		assert(handle);
+
+		decerr = aacDecoder_ConfigRaw(aacdec, asc, ascSize);
+		if (decerr != AAC_DEC_OK)
+		{
+			log(WLOG_ERROR, "aacDecoder_ConfigRaw failed with %s", dec_err_str(decerr));
+			return -1;
+		}
+		return 0;
+	}
+}
+
+ssize_t fdk_aac_dsp_impl_decode_fill(void* handle, const void* data, size_t size, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert(log);
+
+	UINT leftBytes = size;
+	AAC_DECODER_ERROR err;
+	HANDLE_AACDECODER self = (HANDLE_AACDECODER)handle;
+	UCHAR* pBuffer[] = { data };
+	const UINT bufferSize[] = { size };
+
+	assert(handle);
+	assert(data || (size == 0));
+
+	err = aacDecoder_Fill(self, pBuffer, bufferSize, &leftBytes);
+	if (err != AAC_DEC_OK)
+	{
+		log(WLOG_ERROR, "aacDecoder_Fill failed with %s", dec_err_str(err));
+		return -1;
+	}
+	return leftBytes;
+}
+
+ssize_t fdk_aac_dsp_impl_stream_info(void* handle, int encoder, fdk_log_fkt_t log)
+{
+	assert(handle);
+	assert(log);
+
+	if (encoder)
+	{
+		AACENC_InfoStruct info = { 0 };
+		HANDLE_AACENCODER self = (HANDLE_AACENCODER)handle;
+		AACENC_ERROR err = aacEncInfo(self, &info);
+		if (err != AAC_DEC_OK)
+		{
+			log(WLOG_ERROR, "aacEncInfo failed with %s", enc_err_str(err));
+			return -1;
+		}
+		return info.maxOutBufBytes;
+	}
+	else
+	{
+		HANDLE_AACDECODER self = (HANDLE_AACDECODER)handle;
+		CStreamInfo* info = aacDecoder_GetStreamInfo(self);
+		if (!info)
+		{
+			log(WLOG_ERROR, "aacDecoder_GetStreamInfo failed");
+			return -1;
+		}
+
+		return sizeof(INT_PCM) * info->numChannels * info->frameSize;
+	}
+}
+
+ssize_t fdk_aac_dsp_impl_encode(void* handle, const void* data, size_t size, void* dst,
+                                size_t dstSize, fdk_log_fkt_t log)
+{
+	AACENC_ERROR err;
+
+	INT inSizes[] = { size };
+	INT inElSizes[] = { sizeof(INT_PCM) };
+	INT inIdentifiers[] = { IN_AUDIO_DATA };
+	void* inBuffers[] = { data };
+
+	const AACENC_BufDesc inBufDesc = {
+		.numBufs = 1,
+		.bufs = inBuffers,
+		.bufferIdentifiers = inIdentifiers,
+		.bufSizes = inSizes,
+		.bufElSizes = inElSizes /* TODO: 8/16 bit input? */
+	};
+
+	INT outSizes[] = { dstSize };
+	INT outElSizes[] = { 1 };
+	INT outIdentifiers[] = { OUT_BITSTREAM_DATA };
+	void* outBuffers[] = { dst };
+	const AACENC_BufDesc outBufDesc = { .numBufs = 1,
+		                                .bufs = outBuffers,
+		                                .bufferIdentifiers = outIdentifiers,
+		                                .bufSizes = outSizes,
+		                                .bufElSizes = outElSizes };
+
+	const AACENC_InArgs inArgs = { .numInSamples =
+		                               size / sizeof(INT_PCM), /* TODO: 8/16 bit input? */
+		                           .numAncBytes = 0 };
+	AACENC_OutArgs outArgs = { 0 };
+
+	HANDLE_AACENCODER self = (HANDLE_AACENCODER)handle;
+
+	assert(handle);
+	assert(log);
+
+	err = aacEncEncode(self, &inBufDesc, &outBufDesc, &inArgs, &outArgs);
+	if (err != AACENC_OK)
+	{
+		log(WLOG_ERROR, "aacEncEncode failed with %s", enc_err_str(err));
+		return -1;
+	}
+	return outArgs.numOutBytes;
+}
diff --git a/libfreerdp/codec/dsp_fdk_impl.h b/libfreerdp/codec/dsp_fdk_impl.h
new file mode 100644
index 000000000..e9914c3e1
--- /dev/null
+++ b/libfreerdp/codec/dsp_fdk_impl.h
@@ -0,0 +1,45 @@
+/**
+ * FreeRDP: A Remote Desktop Protocol Implementation
+ * Digital Sound Processing
+ *
+ * Copyright 2022 Armin Novak <anovak@thincast.com>
+ * Copyright 2022 Thincast Technologies GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FREERDP_DSP_FDK_IMPL_H_
+#define FREERDP_DSP_FDK_IMPL_H_
+
+#include <stdlib.h>
+
+typedef void (*fdk_log_fkt_t)(unsigned log_level, const char* fmt, ...);
+
+int fdk_aac_dsp_impl_init(void** handle, int encoder, fdk_log_fkt_t log);
+void fdk_aac_dsp_impl_uninit(void** handle, int encoder, fdk_log_fkt_t log);
+
+ssize_t fdk_aac_dsp_impl_stream_info(void* handle, int encoder, fdk_log_fkt_t log);
+
+int fdk_aac_dsp_impl_config(void* handle, size_t* pbuffersize, int encoder, unsigned samplerate,
+                            unsigned channels, unsigned bytes_per_second,
+                            unsigned frames_per_packet, fdk_log_fkt_t log);
+
+ssize_t fdk_aac_dsp_impl_decode_fill(void* handle, const void* data, size_t size,
+                                     fdk_log_fkt_t log);
+
+ssize_t fdk_aac_dsp_impl_encode(void* handle, const void* data, size_t size, void* dst,
+                                size_t dstSize, fdk_log_fkt_t log);
+
+ssize_t fdk_aac_dsp_impl_decode_read(void* handle, void* dst, size_t dstSize, fdk_log_fkt_t log);
+
+#endif
diff --git a/libfreerdp/codec/dsp_ffmpeg.c b/libfreerdp/codec/dsp_ffmpeg.c
index 703340dcf..41d4caa4c 100644
--- a/libfreerdp/codec/dsp_ffmpeg.c
+++ b/libfreerdp/codec/dsp_ffmpeg.c
@@ -40,10 +40,9 @@
 
 struct S_FREERDP_DSP_CONTEXT
 {
-	AUDIO_FORMAT format;
+	FREERDP_DSP_COMMON_CONTEXT common;
 
 	BOOL isOpen;
-	BOOL encoder;
 
 	UINT32 bufferedSamples;
 
@@ -59,7 +58,6 @@ struct S_FREERDP_DSP_CONTEXT
 #else
 	AVAudioResampleContext* rcontext;
 #endif
-	wStream* channelmix;
 };
 
 static BOOL ffmpeg_codec_is_filtered(enum AVCodecID id, BOOL encoder)
@@ -92,11 +90,15 @@ static BOOL ffmpeg_codec_is_filtered(enum AVCodecID id, BOOL encoder)
 	}
 }
 
-static enum AVCodecID ffmpeg_get_avcodec(const AUDIO_FORMAT* WINPR_RESTRICT format)
+static enum AVCodecID ffmpeg_get_avcodec(const AUDIO_FORMAT* format)
 {
+	const char* id;
+
 	if (!format)
 		return AV_CODEC_ID_NONE;
 
+	id = audio_format_get_tag_string(format->wFormatTag);
+
 	switch (format->wFormatTag)
 	{
 		case WAVE_FORMAT_UNKNOWN:
@@ -136,15 +138,12 @@ static enum AVCodecID ffmpeg_get_avcodec(const AUDIO_FORMAT* WINPR_RESTRICT form
 		case WAVE_FORMAT_AAC_MS:
 			return AV_CODEC_ID_AAC;
 
-		case WAVE_FORMAT_OPUS:
-			return AV_CODEC_ID_OPUS;
-
 		default:
 			return AV_CODEC_ID_NONE;
 	}
 }
 
-static int ffmpeg_sample_format(const AUDIO_FORMAT* WINPR_RESTRICT format)
+static int ffmpeg_sample_format(const AUDIO_FORMAT* format)
 {
 	switch (format->wFormatTag)
 	{
@@ -169,9 +168,6 @@ static int ffmpeg_sample_format(const AUDIO_FORMAT* WINPR_RESTRICT format)
 		case WAVE_FORMAT_AAC_MS:
 			return AV_SAMPLE_FMT_FLTP;
 
-		case WAVE_FORMAT_OPUS:
-			return AV_SAMPLE_FMT_S16;
-
 		case WAVE_FORMAT_MSG723:
 		case WAVE_FORMAT_GSM610:
 			return AV_SAMPLE_FMT_S16P;
@@ -184,7 +180,7 @@ static int ffmpeg_sample_format(const AUDIO_FORMAT* WINPR_RESTRICT format)
 	}
 }
 
-static void ffmpeg_close_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
+static void ffmpeg_close_context(FREERDP_DSP_CONTEXT* context)
 {
 	if (context)
 	{
@@ -223,26 +219,25 @@ static void ffmpeg_close_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
 	}
 }
 
-static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
+static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* context)
 {
-	int ret = 0;
+	int ret;
+	const AUDIO_FORMAT* format;
 
 	if (!context || context->isOpen)
 		return FALSE;
 
-	const AUDIO_FORMAT* format = &context->format;
+	format = &context->common.format;
 
 	if (!format)
 		return FALSE;
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
-	const int layout = av_get_default_channel_layout(format->nChannels);
-#endif
+
 	context->id = ffmpeg_get_avcodec(format);
 
-	if (ffmpeg_codec_is_filtered(context->id, context->encoder))
+	if (ffmpeg_codec_is_filtered(context->id, context->common.encoder))
 		goto fail;
 
-	if (context->encoder)
+	if (context->common.encoder)
 		context->codec = avcodec_find_encoder(context->id);
 	else
 		context->codec = avcodec_find_decoder(context->id);
@@ -272,11 +267,13 @@ static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
 
 	context->context->max_b_frames = 1;
 	context->context->delay = 0;
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
-	context->context->channels = format->nChannels;
-	context->context->channel_layout = layout;
-#else
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
 	av_channel_layout_default(&context->context->ch_layout, format->nChannels);
+#else
+	context->context->channels = format->nChannels;
+	const int64_t layout = av_get_default_channel_layout(format->nChannels);
+	context->context->channel_layout = layout;
 #endif
 	context->context->sample_rate = format->nSamplesPerSec;
 	context->context->block_align = format->nBlockAlign;
@@ -320,16 +317,16 @@ static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
 	if (!context->rcontext)
 		goto fail;
 
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+	av_channel_layout_default(&context->frame->ch_layout, format->nChannels);
+#else
 	context->frame->channel_layout = layout;
 	context->frame->channels = format->nChannels;
-#else
-	av_channel_layout_default(&context->frame->ch_layout, format->nChannels);
 #endif
 	context->frame->sample_rate = format->nSamplesPerSec;
 	context->frame->format = AV_SAMPLE_FMT_S16;
 
-	if (context->encoder)
+	if (context->common.encoder)
 	{
 		context->resampled->format = context->context->sample_fmt;
 		context->resampled->sample_rate = context->context->sample_rate;
@@ -340,20 +337,22 @@ static BOOL ffmpeg_open_context(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context)
 		context->resampled->sample_rate = format->nSamplesPerSec;
 	}
 
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+	av_channel_layout_default(&context->resampled->ch_layout, format->nChannels);
+#else
 	context->resampled->channel_layout = layout;
 	context->resampled->channels = format->nChannels;
-#else
-	av_channel_layout_default(&context->resampled->ch_layout, format->nChannels);
 #endif
 
 	if (context->context->frame_size > 0)
 	{
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+		ret = av_channel_layout_copy(&context->buffered->ch_layout, &context->resampled->ch_layout);
+		if (ret != 0)
+			goto fail;
+#else
 		context->buffered->channel_layout = context->resampled->channel_layout;
 		context->buffered->channels = context->resampled->channels;
-#else
-		av_channel_layout_copy(&context->buffered->ch_layout, &context->resampled->ch_layout);
 #endif
 		context->buffered->format = context->resampled->format;
 		context->buffered->nb_samples = context->context->frame_size;
@@ -370,10 +369,9 @@ fail:
 }
 
 #if defined(SWRESAMPLE_FOUND)
-static BOOL ffmpeg_resample_frame(SwrContext* WINPR_RESTRICT context, AVFrame* WINPR_RESTRICT in,
-                                  AVFrame* WINPR_RESTRICT out)
+static BOOL ffmpeg_resample_frame(SwrContext* context, AVFrame* in, AVFrame* out)
 {
-	int ret = 0;
+	int ret;
 
 	if (!swr_is_initialized(context))
 	{
@@ -402,8 +400,7 @@ static BOOL ffmpeg_resample_frame(SwrContext* WINPR_RESTRICT context, AVFrame* W
 	return TRUE;
 }
 #else
-static BOOL ffmpeg_resample_frame(AVAudioResampleContext* WINPR_RESTRICT context,
-                                  AVFrame* WINPR_RESTRICT in, AVFrame* WINPR_RESTRICT out)
+static BOOL ffmpeg_resample_frame(AVAudioResampleContext* context, AVFrame* in, AVFrame* out)
 {
 	int ret;
 
@@ -435,38 +432,12 @@ static BOOL ffmpeg_resample_frame(AVAudioResampleContext* WINPR_RESTRICT context
 }
 #endif
 
-static BOOL ffmpeg_encode_frame(AVCodecContext* WINPR_RESTRICT context, AVFrame* WINPR_RESTRICT in,
-                                AVPacket* WINPR_RESTRICT packet, wStream* WINPR_RESTRICT out)
+static BOOL ffmpeg_encode_frame(AVCodecContext* context, AVFrame* in, AVPacket* packet,
+                                wStream* out)
 {
-	if (in->format == AV_SAMPLE_FMT_FLTP)
-	{
-		uint8_t** pp = in->extended_data;
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
-		const int nr_channels = in->channels;
-#else
-		const int nr_channels = in->ch_layout.nb_channels;
-#endif
-
-		for (int y = 0; y < nr_channels; y++)
-		{
-			float* data = (float*)pp[y];
-			for (int x = 0; x < in->nb_samples; x++)
-			{
-				const float val1 = data[x];
-				if (isnan(val1))
-					data[x] = 0.0f;
-				else if (isinf(val1))
-				{
-					if (val1 < 0.0f)
-						data[x] = -1.0f;
-					else
-						data[x] = 1.0f;
-				}
-			}
-		}
-	}
+	int ret;
 	/* send the packet with the compressed data to the encoder */
-	int ret = avcodec_send_frame(context, in);
+	ret = avcodec_send_frame(context, in);
 
 	if (ret < 0)
 	{
@@ -499,17 +470,16 @@ static BOOL ffmpeg_encode_frame(AVCodecContext* WINPR_RESTRICT context, AVFrame*
 	return TRUE;
 }
 
-static BOOL ffmpeg_fill_frame(AVFrame* WINPR_RESTRICT frame,
-                              const AUDIO_FORMAT* WINPR_RESTRICT inputFormat,
-                              const BYTE* WINPR_RESTRICT data, size_t size)
+static BOOL ffmpeg_fill_frame(AVFrame* frame, const AUDIO_FORMAT* inputFormat, const BYTE* data,
+                              size_t size)
 {
-	int ret = 0;
-	int bpp = 0;
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
+	int ret, bpp;
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+	av_channel_layout_default(&frame->ch_layout, inputFormat->nChannels);
+#else
 	frame->channels = inputFormat->nChannels;
 	frame->channel_layout = av_get_default_channel_layout(frame->channels);
-#else
-	av_channel_layout_default(&frame->ch_layout, inputFormat->nChannels);
 #endif
 	frame->sample_rate = inputFormat->nSamplesPerSec;
 	frame->format = ffmpeg_sample_format(inputFormat);
@@ -528,15 +498,14 @@ static BOOL ffmpeg_fill_frame(AVFrame* WINPR_RESTRICT frame,
 	return TRUE;
 }
 #if defined(SWRESAMPLE_FOUND)
-static BOOL ffmpeg_decode(AVCodecContext* WINPR_RESTRICT dec_ctx, AVPacket* WINPR_RESTRICT pkt,
-                          AVFrame* WINPR_RESTRICT frame, SwrContext* WINPR_RESTRICT resampleContext,
-                          AVFrame* WINPR_RESTRICT resampled, wStream* WINPR_RESTRICT out)
+static BOOL ffmpeg_decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame,
+                          SwrContext* resampleContext, AVFrame* resampled, wStream* out)
 #else
 static BOOL ffmpeg_decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame,
                           AVAudioResampleContext* resampleContext, AVFrame* resampled, wStream* out)
 #endif
 {
-	int ret = 0;
+	int ret;
 	/* send the packet with the compressed data to the decoder */
 	ret = avcodec_send_packet(dec_ctx, pkt);
 
@@ -601,12 +570,13 @@ static BOOL ffmpeg_decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame
 		}
 
 		{
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
-			const size_t channels = resampled->channels;
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+			const size_t nrchannels = resampled->ch_layout.nb_channels;
 #else
-			const size_t channels = resampled->ch_layout.nb_channels;
+			const size_t nrchannels = resampled->channels;
 #endif
-			const size_t data_size = channels * resampled->nb_samples * 2;
+			const size_t data_size = nrchannels * resampled->nb_samples * 2;
 			if (!Stream_EnsureRemainingCapacity(out, data_size))
 				return FALSE;
 			Stream_Write(out, resampled->data[0], data_size);
@@ -616,7 +586,7 @@ static BOOL ffmpeg_decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame
 	return TRUE;
 }
 
-BOOL freerdp_dsp_ffmpeg_supports_format(const AUDIO_FORMAT* WINPR_RESTRICT format, BOOL encode)
+BOOL freerdp_dsp_ffmpeg_supports_format(const AUDIO_FORMAT* format, BOOL encode)
 {
 	enum AVCodecID id = ffmpeg_get_avcodec(format);
 
@@ -631,26 +601,23 @@ BOOL freerdp_dsp_ffmpeg_supports_format(const AUDIO_FORMAT* WINPR_RESTRICT forma
 
 FREERDP_DSP_CONTEXT* freerdp_dsp_ffmpeg_context_new(BOOL encode)
 {
-	FREERDP_DSP_CONTEXT* context = NULL;
+	FREERDP_DSP_CONTEXT* context;
 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
 	avcodec_register_all();
 #endif
 	context = calloc(1, sizeof(FREERDP_DSP_CONTEXT));
 
 	if (!context)
-		return NULL;
+		goto fail;
+
+	if (!freerdp_dsp_common_context_init(&context->common, encode))
+		goto fail;
 
-	context->channelmix = Stream_New(NULL, 1024);
-	if (!context->channelmix)
-	{
-		WINPR_PRAGMA_DIAG_PUSH
-		WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC
-		freerdp_dsp_ffmpeg_context_free(context);
-		WINPR_PRAGMA_DIAG_POP
-		return NULL;
-	}
-	context->encoder = encode;
 	return context;
+
+fail:
+	freerdp_dsp_ffmpeg_context_free(context);
+	return NULL;
 }
 
 void freerdp_dsp_ffmpeg_context_free(FREERDP_DSP_CONTEXT* context)
@@ -658,30 +625,29 @@ void freerdp_dsp_ffmpeg_context_free(FREERDP_DSP_CONTEXT* context)
 	if (context)
 	{
 		ffmpeg_close_context(context);
-		Stream_Free(context->channelmix, TRUE);
+		freerdp_dsp_common_context_uninit(&context->common);
 		free(context);
 	}
 }
 
-BOOL freerdp_dsp_ffmpeg_context_reset(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
-                                      const AUDIO_FORMAT* WINPR_RESTRICT targetFormat)
+BOOL freerdp_dsp_ffmpeg_context_reset(FREERDP_DSP_CONTEXT* context,
+                                      const AUDIO_FORMAT* targetFormat)
 {
 	if (!context || !targetFormat)
 		return FALSE;
 
 	ffmpeg_close_context(context);
-	context->format = *targetFormat;
+	context->common.format = *targetFormat;
 	return ffmpeg_open_context(context);
 }
 
-static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
-                                    const BYTE* WINPR_RESTRICT src, size_t size,
-                                    const AUDIO_FORMAT* WINPR_RESTRICT srcFormat,
-                                    const BYTE** WINPR_RESTRICT data, size_t* WINPR_RESTRICT length,
-                                    AUDIO_FORMAT* WINPR_RESTRICT dstFormat)
+static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* context, const BYTE* src, size_t size,
+                                    const AUDIO_FORMAT* srcFormat, const BYTE** data,
+                                    size_t* length, AUDIO_FORMAT* dstFormat)
 {
-	UINT32 bpp = 0;
-	size_t samples = 0;
+	UINT32 bpp;
+	size_t samples;
+	size_t x, y;
 
 	if (!context || !data || !length || !dstFormat)
 		return FALSE;
@@ -693,42 +659,43 @@ static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	samples = size / bpp / srcFormat->nChannels;
 
 	*dstFormat = *srcFormat;
-	if (context->format.nChannels == srcFormat->nChannels)
+	if (context->common.format.nChannels == srcFormat->nChannels)
 	{
 		*data = src;
 		*length = size;
 		return TRUE;
 	}
 
-	Stream_SetPosition(context->channelmix, 0);
+	Stream_SetPosition(context->common.channelmix, 0);
 
 	/* Destination has more channels than source */
-	if (context->format.nChannels > srcFormat->nChannels)
+	if (context->common.format.nChannels > srcFormat->nChannels)
 	{
 		switch (srcFormat->nChannels)
 		{
 			case 1:
-				if (!Stream_EnsureCapacity(context->channelmix, size * 2))
+				if (!Stream_EnsureCapacity(context->common.channelmix, size * 2))
 					return FALSE;
 
-				for (UINT32 x = 0; x < samples; x++)
+				for (x = 0; x < samples; x++)
 				{
-					for (UINT32 y = 0; y < bpp; y++)
-						Stream_Write_UINT8(context->channelmix, src[x * bpp + y]);
+					for (y = 0; y < bpp; y++)
+						Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
 
-					for (UINT32 y = 0; y < bpp; y++)
-						Stream_Write_UINT8(context->channelmix, src[x * bpp + y]);
+					for (y = 0; y < bpp; y++)
+						Stream_Write_UINT8(context->common.channelmix, src[x * bpp + y]);
 				}
 
-				Stream_SealLength(context->channelmix);
-				*data = Stream_Buffer(context->channelmix);
-				*length = Stream_Length(context->channelmix);
+				Stream_SealLength(context->common.channelmix);
+				*data = Stream_Buffer(context->common.channelmix);
+				*length = Stream_Length(context->common.channelmix);
 				dstFormat->nChannels = 2;
 				return TRUE;
 
 			case 2:  /* We only support stereo, so we can not handle this case. */
 			default: /* Unsupported number of channels */
-				WLog_WARN(TAG, "unsupported source channel count %" PRIu16, srcFormat->nChannels);
+				WLog_WARN(TAG, "[%s] unsuported source channel count %" PRIu16, __FUNCTION__,
+				          srcFormat->nChannels);
 				return FALSE;
 		}
 	}
@@ -737,40 +704,40 @@ static BOOL freerdp_dsp_channel_mix(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	switch (srcFormat->nChannels)
 	{
 		case 2:
-			if (!Stream_EnsureCapacity(context->channelmix, size / 2))
+			if (!Stream_EnsureCapacity(context->common.channelmix, size / 2))
 				return FALSE;
 
 			/* Simply drop second channel.
 			 * TODO: Calculate average */
-			for (UINT32 x = 0; x < samples; x++)
+			for (x = 0; x < samples; x++)
 			{
-				for (UINT32 y = 0; y < bpp; y++)
-					Stream_Write_UINT8(context->channelmix, src[2 * x * bpp + y]);
+				for (y = 0; y < bpp; y++)
+					Stream_Write_UINT8(context->common.channelmix, src[2 * x * bpp + y]);
 			}
 
-			Stream_SealLength(context->channelmix);
-			*data = Stream_Buffer(context->channelmix);
-			*length = Stream_Length(context->channelmix);
+			Stream_SealLength(context->common.channelmix);
+			*data = Stream_Buffer(context->common.channelmix);
+			*length = Stream_Length(context->common.channelmix);
 			dstFormat->nChannels = 1;
 			return TRUE;
 
 		case 1:  /* Invalid, do we want to use a 0 channel sound? */
 		default: /* Unsupported number of channels */
-			WLog_WARN(TAG, "unsupported channel count %" PRIu16, srcFormat->nChannels);
+			WLog_WARN(TAG, "[%s] unsuported channel count %" PRIu16, __FUNCTION__,
+			          srcFormat->nChannels);
 			return FALSE;
 	}
 
 	return FALSE;
 }
 
-BOOL freerdp_dsp_ffmpeg_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
-                               const AUDIO_FORMAT* WINPR_RESTRICT format,
-                               const BYTE* WINPR_RESTRICT data, size_t length,
-                               wStream* WINPR_RESTRICT out)
+BOOL freerdp_dsp_ffmpeg_encode(FREERDP_DSP_CONTEXT* context, const AUDIO_FORMAT* format,
+                               const BYTE* data, size_t length, wStream* out)
 {
+	int rc;
 	AUDIO_FORMAT fmt = { 0 };
 
-	if (!context || !format || !data || !out || !context->encoder)
+	if (!context || !format || !data || !out || !context->common.encoder)
 		return FALSE;
 
 	if (!context || !data || !out)
@@ -810,17 +777,15 @@ BOOL freerdp_dsp_ffmpeg_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 			if (inSamples + (int)context->bufferedSamples > context->context->frame_size)
 				inSamples = context->context->frame_size - (int)context->bufferedSamples;
 
-#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100)
-			const int channels = context->context->channels;
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100)
+			const size_t nrchannels = context->context->ch_layout.nb_channels;
 #else
-			const int channels = context->context->ch_layout.nb_channels;
+			const size_t nrchannels = context->context->channels;
 #endif
-			const int rc =
-			    av_samples_copy(context->buffered->extended_data, context->resampled->extended_data,
-			                    (int)context->bufferedSamples, copied, inSamples, channels,
-			                    context->context->sample_fmt);
-			if (rc < 0)
-				return FALSE;
+
+			rc = av_samples_copy(context->buffered->extended_data,
+			                     context->resampled->extended_data, (int)context->bufferedSamples,
+			                     copied, inSamples, nrchannels, context->context->sample_fmt);
 			rest -= inSamples;
 			copied += inSamples;
 			context->bufferedSamples += (UINT32)inSamples;
@@ -839,12 +804,10 @@ BOOL freerdp_dsp_ffmpeg_encode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
 	}
 }
 
-BOOL freerdp_dsp_ffmpeg_decode(FREERDP_DSP_CONTEXT* WINPR_RESTRICT context,
-                               const AUDIO_FORMAT* WINPR_RESTRICT srcFormat,
-                               const BYTE* WINPR_RESTRICT data, size_t length,
-                               wStream* WINPR_RESTRICT out)
+BOOL freerdp_dsp_ffmpeg_decode(FREERDP_DSP_CONTEXT* context, const AUDIO_FORMAT* srcFormat,
+                               const BYTE* data, size_t length, wStream* out)
 {
-	if (!context || !srcFormat || !data || !out || context->encoder)
+	if (!context || !srcFormat || !data || !out || context->common.encoder)
 		return FALSE;
 
 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 133, 100)