plugin_audio_dsp_denoiser.cxx 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /* Copyright (C) 2013 Mamadou DIOP
  2. * Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
  3. *
  4. * This file is part of Open Source Doubango Framework.
  5. *
  6. * DOUBANGO is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * DOUBANGO is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with DOUBANGO.
  18. */
  19. // MS Voice Capture DSP: http://msdn.microsoft.com/en-us/library/windows/desktop/ff819492(v=vs.85).aspx
  20. // Features:
  21. // - Acoustic echo cancellation (AEC)
  22. // - Microphone array processing
  23. // - Noise suppression
  24. // - Automatic gain control
  25. // - Voice activity detection
  26. #include "plugin_audio_dsp_utils.h"
  27. #include "plugin_audio_dsp_mediabuffer.h"
  28. #include "tinymedia/tmedia_denoise.h"
  29. #include "tinymedia/tmedia_defaults.h"
  30. #include "tsk_string.h"
  31. #include "tsk_memory.h"
  32. #include "tsk_debug.h"
  33. #include <Wmcodecdsp.h>
  34. #include <Dmo.h>
  35. static const UINT32 g_nMicrophoneStreamIndex = 0;
  36. static const UINT32 g_nSpeakerStreamIndex = 0;
  37. static const UINT32 g_nBitsPerSample = 16;
  38. static const UINT32 g_nChannles = 1; // FIXME
  39. static const UINT32 g_nFrameDuration = 20; // FIXME
  40. /** Speex denoiser*/
  41. typedef struct plugin_audio_dsp_denoise_s {
  42. TMEDIA_DECLARE_DENOISE;
  43. bool bOpened;
  44. LONGLONG rtStart;
  45. UINT64 rtDuration;
  46. uint32_t echo_tail;
  47. tsk_size_t playback_size_samples;
  48. tsk_size_t playback_size_bytes;
  49. tsk_size_t playback_channels;
  50. tsk_size_t record_size_samples;
  51. tsk_size_t record_size_bytes;
  52. tsk_size_t record_channels;
  53. IMediaObject* pInst;
  54. IMediaBuffer *pBufferIn;
  55. IMediaBuffer *pBufferOut;
  56. }
  57. plugin_audio_dsp_denoise_t;
  58. static int plugin_audio_dsp_denoise_set(tmedia_denoise_t* _self, const tmedia_param_t* param)
  59. {
  60. plugin_audio_dsp_denoise_t *self = (plugin_audio_dsp_denoise_t *)_self;
  61. if(!self || !param) {
  62. TSK_DEBUG_ERROR("Invalid parameter");
  63. return -1;
  64. }
  65. if(param->value_type == tmedia_pvt_int32) {
  66. if(tsk_striequals(param->key, "echo-tail")) {
  67. _self->echo_tail = *((int32_t*)param->value);
  68. TSK_DEBUG_INFO("ms_voice_dsp_set_echo_tail(%d)", _self->echo_tail);
  69. if(self->pInst) {
  70. IPropertyStore* pPropStore = NULL;
  71. HRESULT hr = self->pInst->QueryInterface(IID_PPV_ARGS(&pPropStore));
  72. if(SUCCEEDED(hr)) {
  73. DMO_MEDIA_TYPE mt = {0};
  74. PROPVARIANT var = {0};
  75. var.vt = VT_UI4;
  76. var.ulVal = _self->echo_tail;
  77. hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_ECHO_LENGTH , var);
  78. }
  79. SafeRelease(&pPropStore);
  80. }
  81. return 0;
  82. }
  83. }
  84. return -1;
  85. }
  86. static int plugin_audio_dsp_denoise_open(tmedia_denoise_t* self, uint32_t record_frame_size_samples, uint32_t record_sampling_rate, uint32_t playback_frame_size_samples, uint32_t playback_sampling_rate)
  87. {
  88. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t *)self;
  89. HRESULT hr = S_OK;
  90. DMO_MEDIA_TYPE mt = {0};
  91. PROPVARIANT var = {0};
  92. IPropertyStore* pPropStore = NULL;
  93. TSK_DEBUG_INFO("[MS Voice Capture DSP] AEC_ENABLED=%d ECHO_TAIL=%d,\nAGC_ENABLED=%d,\nNOISE_SUPP_ENABLED=%d,\nVAD_ENABLED=%d",
  94. self->echo_supp_enabled, self->echo_tail,
  95. self->agc_enabled,
  96. self->noise_supp_enabled,
  97. self->vad_enabled
  98. );
  99. if(denoiser->bOpened) {
  100. TSK_DEBUG_ERROR("Denoiser already opened");
  101. CHECK_HR(hr = E_FAIL);
  102. }
  103. CHECK_HR(hr = AudioDSPUtils::MoInitMediaType(
  104. record_sampling_rate,
  105. g_nBitsPerSample,
  106. g_nChannles,
  107. &mt));
  108. CHECK_HR(hr = CoCreateInstance(CLSID_CWMAudioAEC, NULL,
  109. CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&denoiser->pInst)));
  110. CHECK_HR(hr = denoiser->pInst->QueryInterface(IID_PPV_ARGS(&pPropStore)));
  111. // If the input format does not match the output format, the DMO automatically performs sample-rate conversion.
  112. CHECK_HR(hr = denoiser->pInst->SetInputType(0, &mt, 0));
  113. CHECK_HR(hr = denoiser->pInst->SetOutputType(0, &mt, 0));
  114. // Enables the application to override the default settings on various properties of the Voice Capture DSP
  115. // http://msdn.microsoft.com/en-us/library/windows/desktop/ff819422(v=vs.85).aspx
  116. var.vt = VT_BOOL;
  117. var.boolVal = VARIANT_TRUE;
  118. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATURE_MODE, var));
  119. // Switch to filter mode: http://msdn.microsoft.com/en-us/library/windows/desktop/ff819410(v=vs.85).aspx
  120. var.vt = VT_BOOL;
  121. var.boolVal = VARIANT_FALSE; /* VARIANT_FALSE: Filter, VARIANT_TRUE: Source */
  122. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_DMO_SOURCE_MODE, var));
  123. // Enable AEC
  124. if(self->echo_supp_enabled) {
  125. // Enable AEC: http://msdn.microsoft.com/en-us/library/windows/desktop/ff819427(v=vs.85).aspx
  126. var.vt = VT_I4;
  127. var.lVal = SINGLE_CHANNEL_AEC;
  128. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, var));
  129. // Echo Tail (milliseconds): http://msdn.microsoft.com/en-us/library/windows/desktop/ff819414(v=vs.85).aspx
  130. if(!self->echo_tail) {
  131. self->echo_tail = tmedia_defaults_get_echo_tail();
  132. }
  133. var.vt = VT_I4;
  134. var.lVal = self->echo_tail ? self->echo_tail : 256;
  135. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_ECHO_LENGTH, var));
  136. }
  137. // Automatic Gain Control (AGC): http://msdn.microsoft.com/en-us/library/windows/desktop/ff819412(v=vs.85).aspx
  138. var.vt = VT_BOOL;
  139. var.boolVal = self->agc_enabled ? VARIANT_TRUE : VARIANT_FALSE;
  140. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_AGC, var));
  141. // Noise suppression (NS): http://msdn.microsoft.com/en-us/library/windows/desktop/ff819420(v=vs.85).aspx
  142. var.vt = VT_I4;
  143. var.lVal = self->noise_supp_enabled ? 1 : 0;
  144. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_NS, var));
  145. // Automatic Gain Control (AGC): http://msdn.microsoft.com/en-us/library/windows/desktop/ff819412(v=vs.85).aspx
  146. var.vt = VT_BOOL;
  147. var.boolVal = self->agc_enabled ? VARIANT_TRUE : VARIANT_FALSE;
  148. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_AGC, var));
  149. // Voice Activity Detection (VAD): http://msdn.microsoft.com/en-us/library/windows/desktop/ff819421(v=vs.85).aspx
  150. var.vt = VT_I4;
  151. var.lVal = self->vad_enabled ? AEC_VAD_FOR_SILENCE_SUPPRESSION : AEC_VAD_DISABLED;
  152. CHECK_HR(hr = pPropStore->SetValue(MFPKEY_WMAAECMA_FEATR_VAD, var));
  153. // Recommended to allocate resources
  154. CHECK_HR(hr = denoiser->pInst->AllocateStreamingResources()); // FIXME: returns E_FAIL
  155. denoiser->record_channels = g_nChannles;
  156. denoiser->record_size_samples = ((record_sampling_rate * g_nFrameDuration) / 1000) << (denoiser->record_channels == 2 ? 1 : 0);
  157. denoiser->record_size_bytes = (denoiser->record_size_samples * (g_nBitsPerSample >> 3));
  158. denoiser->playback_channels = g_nChannles;
  159. denoiser->playback_size_samples = ((playback_sampling_rate * g_nFrameDuration) / 1000) << (denoiser->playback_channels == 2 ? 1 : 0);
  160. denoiser->playback_size_bytes = (denoiser->playback_size_samples * (g_nBitsPerSample >> 3));
  161. denoiser->rtStart = 0;
  162. denoiser->rtDuration = PLUGIN_AUDIO_DSP_MILLIS_TO_100NS(g_nFrameDuration); // milliseconds -> 100ns
  163. bail:
  164. denoiser->bOpened = SUCCEEDED(hr);
  165. MoFreeMediaType(&mt);
  166. SafeRelease(&pPropStore);
  167. return denoiser->bOpened ? 0 : -1;
  168. }
  169. // playback = "stream 1"
  170. // /!\Thread safety: could be called at the same time as plugin_audio_dsp_denoise_process_record()
  171. static int plugin_audio_dsp_denoise_echo_playback(tmedia_denoise_t* self, const void* echo_frame, uint32_t echo_frame_size_bytes)
  172. {
  173. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t *)self;
  174. HRESULT hr = S_OK;
  175. if(!self || !echo_frame || !echo_frame_size_bytes) {
  176. CHECK_HR(hr = E_POINTER);
  177. }
  178. if(!denoiser->bOpened) {
  179. TSK_DEBUG_ERROR("Denoiser not opened");
  180. CHECK_HR(hr = E_FAIL);
  181. }
  182. if(denoiser->record_size_bytes != echo_frame_size_bytes) {
  183. TSK_DEBUG_ERROR("Size mismatch: %u<>%u", denoiser->record_size_bytes, echo_frame_size_bytes);
  184. CHECK_HR(hr = E_INVALIDARG);
  185. }
  186. bail:
  187. return SUCCEEDED(hr) ? 0 : -1;
  188. }
  189. // record = "stream 0"
  190. // /!\Thread safety: could be called at the same time as plugin_audio_dsp_denoise_echo_playback()
  191. static int plugin_audio_dsp_denoise_process_record(tmedia_denoise_t* self, void* audio_frame, uint32_t audio_frame_size_bytes, tsk_bool_t* silence_or_noise)
  192. {
  193. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t *)self;
  194. HRESULT hr = S_OK;
  195. BYTE* pBufferInPtr = NULL;
  196. DWORD dwBufferInSize = 0;
  197. if(!self || !audio_frame || !audio_frame_size_bytes || !silence_or_noise) {
  198. CHECK_HR(hr = E_POINTER);
  199. }
  200. if(!denoiser->bOpened) {
  201. TSK_DEBUG_ERROR("Denoiser not opened");
  202. CHECK_HR(hr = E_FAIL);
  203. }
  204. if(denoiser->record_size_bytes != audio_frame_size_bytes) {
  205. TSK_DEBUG_ERROR("Size mismatch: %u<>%u", denoiser->record_size_bytes, audio_frame_size_bytes);
  206. CHECK_HR(hr = E_INVALIDARG);
  207. }
  208. if(!denoiser->pBufferIn) {
  209. CHECK_HR(hr = AudioDSPMediaBuffer::Create(denoiser->record_size_bytes, &denoiser->pBufferIn));
  210. }
  211. else {
  212. DWORD dwMaxLength = 0;
  213. CHECK_HR(hr = denoiser->pBufferIn->GetMaxLength(&dwMaxLength));
  214. if(dwMaxLength < denoiser->record_size_bytes) {
  215. SafeRelease(&denoiser->pBufferIn);
  216. CHECK_HR(hr = AudioDSPMediaBuffer::Create(denoiser->record_size_bytes, &denoiser->pBufferIn));
  217. }
  218. }
  219. // Get memory pointer to the input buffer
  220. CHECK_HR(hr = denoiser->pBufferIn->GetBufferAndLength(&pBufferInPtr, NULL));
  221. // Copy data
  222. dwBufferInSize = TSK_MIN(audio_frame_size_bytes, denoiser->record_size_bytes);
  223. memcpy(pBufferInPtr, audio_frame, dwBufferInSize);
  224. CHECK_HR(hr = denoiser->pBufferIn->SetLength(dwBufferInSize));
  225. // Process input
  226. hr = denoiser->pInst->ProcessInput(
  227. g_nMicrophoneStreamIndex,
  228. denoiser->pBufferIn,
  229. (/*DMO_INPUT_DATA_BUFFERF_TIME | DMO_INPUT_DATA_BUFFERF_TIMELENGTH*/0),
  230. denoiser->rtStart,
  231. denoiser->rtDuration);
  232. if(hr == DMO_E_NOTACCEPTING) {
  233. hr = S_OK;
  234. }
  235. CHECK_HR(hr);
  236. denoiser->rtStart += denoiser->rtDuration;
  237. bail:
  238. return SUCCEEDED(hr) ? 0 : -1;
  239. }
  240. static int plugin_audio_dsp_denoise_process_playback(tmedia_denoise_t* self, void* audio_frame, uint32_t audio_frame_size_bytes)
  241. {
  242. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t *)self;
  243. (void)(denoiser);
  244. // Not mandatory to denoise audio before playback.
  245. // All Doubango clients support noise suppression.
  246. return 0;
  247. }
  248. static int plugin_audio_dsp_denoise_close(tmedia_denoise_t* self)
  249. {
  250. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t *)self;
  251. if(!self) {
  252. TSK_DEBUG_ERROR("Invalid parameter");
  253. return -1;
  254. }
  255. denoiser->bOpened = false;
  256. SafeRelease(&denoiser->pBufferIn);
  257. SafeRelease(&denoiser->pBufferOut);
  258. SafeRelease(&denoiser->pInst);
  259. return 0;
  260. }
  261. //
  262. // MS Voice Capture DSP Plugin definition
  263. //
  264. /* constructor */
  265. static tsk_object_t* plugin_audio_dsp_denoise_ctor(tsk_object_t * self, va_list * app)
  266. {
  267. plugin_audio_dsp_denoise_t *denoise = (plugin_audio_dsp_denoise_t*)self;
  268. if(denoise) {
  269. AudioDSPUtils::Startup();
  270. /* init base */
  271. tmedia_denoise_init(TMEDIA_DENOISE(denoise));
  272. /* init self */
  273. }
  274. return self;
  275. }
  276. /* destructor */
  277. static tsk_object_t* plugin_audio_dsp_denoise_dtor(tsk_object_t * self)
  278. {
  279. plugin_audio_dsp_denoise_t *denoiser = (plugin_audio_dsp_denoise_t*)self;
  280. if(denoiser) {
  281. /* deinit base */
  282. tmedia_denoise_deinit(TMEDIA_DENOISE(denoiser));
  283. /* deinit self */
  284. SafeRelease(&denoiser->pBufferIn);
  285. SafeRelease(&denoiser->pBufferOut);
  286. SafeRelease(&denoiser->pInst);
  287. TSK_DEBUG_INFO("*** MS Voice Capture DSP destroyed ***");
  288. }
  289. return self;
  290. }
  291. /* object definition */
  292. static const tsk_object_def_t plugin_audio_dsp_denoise_def_s = {
  293. sizeof(plugin_audio_dsp_denoise_t),
  294. plugin_audio_dsp_denoise_ctor,
  295. plugin_audio_dsp_denoise_dtor,
  296. tsk_null,
  297. };
  298. /* plugin definition*/
  299. static const tmedia_denoise_plugin_def_t plugin_audio_dsp_denoise_plugin_def_s = {
  300. &plugin_audio_dsp_denoise_def_s,
  301. "MS Voice Capture DSP", /* http://msdn.microsoft.com/en-us/library/windows/desktop/ff819492(v=vs.85).aspx */
  302. plugin_audio_dsp_denoise_set,
  303. plugin_audio_dsp_denoise_open,
  304. plugin_audio_dsp_denoise_echo_playback,
  305. plugin_audio_dsp_denoise_process_record,
  306. plugin_audio_dsp_denoise_process_playback,
  307. plugin_audio_dsp_denoise_close,
  308. };
  309. const tmedia_denoise_plugin_def_t *plugin_audio_dsp_denoise_plugin_def_t = &plugin_audio_dsp_denoise_plugin_def_s;