plugin_cuda_codec_h264.cxx 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. /* Copyright (C) 2013 Mamadou DIOP
  2. * Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
  3. *
  4. * This file is part of Open Source Doubango Framework.
  5. *
  6. * DOUBANGO is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * DOUBANGO is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with DOUBANGO.
  18. */
  19. #include "plugin_cuda_config.h"
  20. #include "plugin_cuda_utils.h"
  21. #include "tinydav/codecs/h264/tdav_codec_h264_common.h"
  22. #include "tinyrtp/rtp/trtp_rtp_packet.h"
  23. #include "tinymedia/tmedia_codec.h"
  24. #include "tinymedia/tmedia_params.h"
  25. #include "tinymedia/tmedia_defaults.h"
  26. #include "tsk_mutex.h"
  27. #include "tsk_params.h"
  28. #include "tsk_memory.h"
  29. #include "tsk_debug.h"
  30. #include <unknwn.h>
  31. #include <nvcuvid.h>
  32. #include <cuviddec.h>
  33. #include <NVEncoderAPI.h>
  34. #include <NVEncodeDataTypes.h>
  35. #include <d3d9.h>
  36. #include <cudad3d9.h>
  37. #include <cuda/types.h>
  38. #include <cuda.h>
  39. #include <Windows.h>
  40. typedef struct cuda_codec_h264_s {
  41. TDAV_DECLARE_CODEC_H264_COMMON;
  42. // Encoder
  43. struct {
  44. NVEncoder pInst;
  45. NVEncoderParams ctxParams;
  46. NVVE_CallbackParams clbParams;
  47. void* pBufferPtr;
  48. tsk_size_t nBufferSize;
  49. int64_t frame_count;
  50. tsk_bool_t force_idr;
  51. int32_t quality; // [1-31]
  52. int rotation;
  53. int neg_width;
  54. int neg_height;
  55. int neg_fps;
  56. int max_bitrate_bps;
  57. int32_t max_bw_kpbs;
  58. tsk_bool_t passthrough; // whether to bypass encoding
  59. } encoder;
  60. // decoder
  61. struct {
  62. CUvideodecoder pInst;
  63. CUVIDDECODECREATEINFO cuInfo;
  64. CUvideoparser cuParser;
  65. CUVIDPARSERPARAMS cuPaserParams;
  66. CUdevice cuDevice;
  67. IDirect3D9 *pD3D9;
  68. IDirect3DDevice9 *pD3D9Device;
  69. CUcontext cuContext;
  70. struct {
  71. void *pcuPtr; // MUST bee freed using cuMemFreeHost()
  72. tsk_size_t nSize;
  73. tsk_size_t nPitch;
  74. tsk_bool_t bAvail;
  75. } cuBuffer;
  76. void* accumulator;
  77. tsk_size_t accumulator_pos;
  78. tsk_size_t accumulator_size;
  79. uint16_t last_seq;
  80. tsk_bool_t passthrough; // whether to bypass decoding
  81. tsk_mutex_handle_t *phMutex;
  82. } decoder;
  83. }
  84. cuda_codec_h264_t;
  85. #if !defined(PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS)
  86. # define PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS 25
  87. #endif
  88. #if !defined(PLUGIN_CUDA_H264_MAX_FRM_CNT)
  89. # define PLUGIN_CUDA_H264_MAX_FRM_CNT 2
  90. #endif
  91. static int cuda_codec_h264_init(cuda_codec_h264_t* self, profile_idc_t profile);
  92. static int cuda_codec_h264_deinit(cuda_codec_h264_t* self);
  93. static int cuda_codec_h264_open_encoder(cuda_codec_h264_t* self);
  94. static int cuda_codec_h264_close_encoder(cuda_codec_h264_t* self);
  95. static int cuda_codec_h264_open_decoder(cuda_codec_h264_t* self);
  96. static int cuda_codec_h264_close_decoder(cuda_codec_h264_t* self);
  97. static inline tsk_size_t _cuda_codec_h264_pict_layout(cuda_codec_h264_t* self, void**output, tsk_size_t *output_size);
  98. static int CUDAAPI _NVCallback_HandleVideoSequence(void *pvUserData, CUVIDEOFORMAT *pFormat);
  99. static int CUDAAPI _NVCallback_HandlePictureDecode(void *pvUserData, CUVIDPICPARAMS *pPicParams);
  100. static int CUDAAPI _NVCallback_HandlePictureDisplay(void *pvUserData, CUVIDPARSERDISPINFO *pPicParams);
  101. static unsigned char* CUDAAPI _NVCallback_HandleAcquireBitStream(int *pBufferSize, void *pUserdata);
  102. static void CUDAAPI _NVCallback_HandleReleaseBitStream(int nBytesInBuffer, unsigned char *cb,void *pUserdata);
  103. static void CUDAAPI _NVCallback_HandleOnBeginFrame(const NVVE_BeginFrameInfo *pbfi, void *pUserdata);
  104. static void CUDAAPI _NVCallback_HandleOnEndFrame(const NVVE_EndFrameInfo *pefi, void *pUserdata);
  105. /* ============ H.264 Base/Main Profile X.X Plugin interface functions ================= */
  106. static int cuda_codec_h264_set(tmedia_codec_t* self, const tmedia_param_t* param)
  107. {
  108. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
  109. if(!self->opened) {
  110. TSK_DEBUG_ERROR("Codec not opened");
  111. return -1;
  112. }
  113. if(param->value_type == tmedia_pvt_int32) {
  114. if(tsk_striequals(param->key, "action")) {
  115. tmedia_codec_action_t action = (tmedia_codec_action_t)TSK_TO_INT32((uint8_t*)param->value);
  116. switch(action) {
  117. case tmedia_codec_action_encode_idr: {
  118. h264->encoder.force_idr = tsk_true;
  119. break;
  120. }
  121. case tmedia_codec_action_bw_down: {
  122. h264->encoder.quality = TSK_CLAMP(1, (h264->encoder.quality + 1), 31);
  123. break;
  124. }
  125. case tmedia_codec_action_bw_up: {
  126. h264->encoder.quality = TSK_CLAMP(1, (h264->encoder.quality - 1), 31);
  127. break;
  128. }
  129. }
  130. return 0;
  131. }
  132. else if(tsk_striequals(param->key, "bypass-encoding")) {
  133. h264->encoder.passthrough = *((int32_t*)param->value) ? tsk_true : tsk_false;
  134. TSK_DEBUG_INFO("[H.264] bypass-encoding = %d", h264->encoder.passthrough);
  135. return 0;
  136. }
  137. else if(tsk_striequals(param->key, "bypass-decoding")) {
  138. h264->decoder.passthrough = *((int32_t*)param->value) ? tsk_true : tsk_false;
  139. TSK_DEBUG_INFO("[H.264] bypass-decoding = %d", h264->decoder.passthrough);
  140. return 0;
  141. }
  142. else if(tsk_striequals(param->key, "rotation")) {
  143. int rotation = *((int32_t*)param->value);
  144. if(h264->encoder.rotation != rotation) {
  145. if(self->opened) {
  146. int ret;
  147. h264->encoder.rotation = rotation;
  148. if((ret = cuda_codec_h264_close_encoder(h264))) {
  149. return ret;
  150. }
  151. if((ret = cuda_codec_h264_open_encoder(h264))) {
  152. return ret;
  153. }
  154. }
  155. }
  156. return 0;
  157. }
  158. }
  159. return -1;
  160. }
  161. static int cuda_codec_h264_open(tmedia_codec_t* self)
  162. {
  163. int ret;
  164. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
  165. if(!h264) {
  166. TSK_DEBUG_ERROR("Invalid parameter");
  167. return -1;
  168. }
  169. /* the caller (base class) already checked that the codec is not opened */
  170. // Encoder
  171. if((ret = cuda_codec_h264_open_encoder(h264))) {
  172. return ret;
  173. }
  174. // Decoder
  175. if((ret = cuda_codec_h264_open_decoder(h264))) {
  176. return ret;
  177. }
  178. return 0;
  179. }
  180. static int cuda_codec_h264_close(tmedia_codec_t* self)
  181. {
  182. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
  183. if(!h264) {
  184. TSK_DEBUG_ERROR("Invalid parameter");
  185. return -1;
  186. }
  187. /* the caller (base class) alreasy checked that the codec is opened */
  188. // Encoder
  189. cuda_codec_h264_close_encoder(h264);
  190. // Decoder
  191. cuda_codec_h264_close_decoder(h264);
  192. return 0;
  193. }
  194. static tsk_size_t cuda_codec_h264_encode(tmedia_codec_t* self, const void* in_data, tsk_size_t in_size, void** out_data, tsk_size_t* out_max_size)
  195. {
  196. int ret = 0;
  197. NVVE_EncodeFrameParams efparams;
  198. tsk_bool_t send_idr, send_hdr;
  199. unsigned long flags = 0;
  200. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
  201. tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
  202. if(!self || !in_data || !in_size) {
  203. TSK_DEBUG_ERROR("Invalid parameter");
  204. return 0;
  205. }
  206. if(h264->encoder.passthrough) {
  207. tdav_codec_h264_rtp_encap(common, (const uint8_t*)in_data, in_size);
  208. return 0;
  209. }
  210. if((h264->encoder.ctxParams.iOutputSize[1] * h264->encoder.ctxParams.iOutputSize[0] * 3) >> 1 != in_size) {
  211. /* guard */
  212. TSK_DEBUG_ERROR("Invalid size");
  213. return 0;
  214. }
  215. if(!self->opened || !h264->encoder.pInst /*|| !h264->encoder.pInst->IsReady()*/) {
  216. TSK_DEBUG_ERROR("Encoder not opened or not ready");
  217. return 0;
  218. }
  219. if(h264->encoder.passthrough) {
  220. tdav_codec_h264_rtp_encap(TDAV_CODEC_H264_COMMON(h264), (const uint8_t*)in_data, in_size);
  221. return 0;
  222. }
  223. HRESULT hr = S_OK;
  224. efparams.Width = h264->encoder.ctxParams.iOutputSize[0];
  225. efparams.Height = h264->encoder.ctxParams.iOutputSize[1];
  226. efparams.Pitch = (h264->encoder.ctxParams.nDeviceMemPitch ? h264->encoder.ctxParams.nDeviceMemPitch : h264->encoder.ctxParams.iOutputSize[0]);
  227. efparams.PictureStruc = (NVVE_PicStruct)h264->encoder.ctxParams.iPictureType;
  228. efparams.SurfFmt = (NVVE_SurfaceFormat)h264->encoder.ctxParams.iSurfaceFormat;
  229. efparams.progressiveFrame = (h264->encoder.ctxParams.iSurfaceFormat == 3) ? 1 : 0;
  230. efparams.repeatFirstField = 0;
  231. efparams.topfieldfirst = (h264->encoder.ctxParams.iSurfaceFormat == 1) ? 1 : 0;
  232. efparams.picBuf = (unsigned char *)in_data;
  233. efparams.bLast = 0;
  234. // send IDR for:
  235. // - the first frame
  236. // - remote peer requested an IDR
  237. // - every second within the first 4seconds
  238. send_idr = (
  239. h264->encoder.frame_count++ == 0
  240. || h264 ->encoder.force_idr
  241. || ( (h264->encoder.frame_count < h264->encoder.neg_fps * 4) && ((h264->encoder.frame_count % h264->encoder.neg_fps)==0) )
  242. );
  243. if(send_idr) {
  244. flags |= 0x04; // FORCE IDR
  245. }
  246. // send SPS and PPS headers for:
  247. // - IDR frames (not required but it's the easiest way to deal with pkt loss)
  248. // - every 5 seconds after the first 4seconds
  249. send_hdr = (
  250. send_idr
  251. || ( (h264->encoder.frame_count % (h264->encoder.neg_fps * 5))==0 )
  252. );
  253. if(send_hdr) {
  254. if(h264->encoder.ctxParams.iDisableSPSPPS) {
  255. unsigned char SPSPPSBuff[1024];
  256. int SPSPPSBuffSize = sizeof(SPSPPSBuff);
  257. hr = NVGetSPSPPS(h264->encoder.pInst, SPSPPSBuff, SPSPPSBuffSize, &SPSPPSBuffSize);
  258. if(SUCCEEDED(hr)) {
  259. int size = 0;
  260. while(size < SPSPPSBuffSize - 2) {
  261. int16_t next_size = ((int16_t)SPSPPSBuff[size])<<1 | ((int16_t)SPSPPSBuff[size + 1]);
  262. tdav_codec_h264_rtp_encap(common, &SPSPPSBuff[size + 2], next_size);
  263. size += next_size + 2;
  264. }
  265. }
  266. else {
  267. TSK_DEBUG_ERROR("NVGetSPSPPS failed with error code = %08x", hr)
  268. }
  269. }
  270. }
  271. // Encode data
  272. CHECK_HR(hr = NVEncodeFrame(h264->encoder.pInst, &efparams, flags, NULL));
  273. // reset
  274. h264->encoder.force_idr = tsk_false;
  275. bail:
  276. return 0;
  277. }
  278. static tsk_size_t cuda_codec_h264_decode(tmedia_codec_t* self, const void* in_data, tsk_size_t in_size, void** out_data, tsk_size_t* out_max_size, const tsk_object_t* proto_hdr)
  279. {
  280. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)self;
  281. const trtp_rtp_header_t* rtp_hdr = (const trtp_rtp_header_t*)proto_hdr;
  282. const uint8_t* pay_ptr = tsk_null;
  283. tsk_size_t pay_size = 0;
  284. int ret;
  285. tsk_bool_t append_scp;
  286. tsk_bool_t sps_or_pps;
  287. tsk_size_t retsize = 0, size_to_copy = 0;
  288. static const tsk_size_t xmax_size = (3840 * 2160 * 3) >> 3; // >>3 instead of >>1 (not an error)
  289. static tsk_size_t start_code_prefix_size = sizeof(H264_START_CODE_PREFIX);
  290. if(!h264 || !in_data || !in_size || !out_data) {
  291. TSK_DEBUG_ERROR("Invalid parameter");
  292. return 0;
  293. }
  294. if(!self->opened || !h264->encoder.pInst) {
  295. TSK_DEBUG_ERROR("Decoder not opened or not ready");
  296. return 0;
  297. }
  298. HRESULT hr = S_OK;
  299. /* Packet lost? */
  300. if((h264->decoder.last_seq + 1) != rtp_hdr->seq_num && h264->decoder.last_seq) {
  301. TSK_DEBUG_INFO("[H.264] Packet loss, seq_num=%d", (h264->decoder.last_seq + 1));
  302. }
  303. h264->decoder.last_seq = rtp_hdr->seq_num;
  304. /* 5.3. NAL Unit Octet Usage
  305. +---------------+
  306. |0|1|2|3|4|5|6|7|
  307. +-+-+-+-+-+-+-+-+
  308. |F|NRI| Type |
  309. +---------------+
  310. */
  311. if(*((uint8_t*)in_data) & 0x80) {
  312. TSK_DEBUG_WARN("F=1");
  313. /* reset accumulator */
  314. h264->decoder.accumulator = 0;
  315. return 0;
  316. }
  317. /* get payload */
  318. if((ret = tdav_codec_h264_get_pay(in_data, in_size, (const void**)&pay_ptr, &pay_size, &append_scp)) || !pay_ptr || !pay_size) {
  319. TSK_DEBUG_ERROR("Depayloader failed to get H.264 content");
  320. return 0;
  321. }
  322. //append_scp = tsk_true;
  323. size_to_copy = pay_size + (append_scp ? start_code_prefix_size : 0);
  324. // whether it's SPS or PPS (append_scp is false for subsequent FUA chuncks)
  325. sps_or_pps = append_scp && pay_ptr && ((pay_ptr[0] & 0x1F) == 7 || (pay_ptr[0] & 0x1F) == 8);
  326. // start-accumulator
  327. if(!h264->decoder.accumulator) {
  328. if(size_to_copy > xmax_size) {
  329. TSK_DEBUG_ERROR("%u too big to contain valid encoded data. xmax_size=%u", size_to_copy, xmax_size);
  330. return 0;
  331. }
  332. if(!(h264->decoder.accumulator = tsk_calloc(size_to_copy, sizeof(uint8_t)))) {
  333. TSK_DEBUG_ERROR("Failed to allocated new buffer");
  334. return 0;
  335. }
  336. h264->decoder.accumulator_size = size_to_copy;
  337. }
  338. if((h264->decoder.accumulator_pos + size_to_copy) >= xmax_size) {
  339. TSK_DEBUG_ERROR("BufferOverflow");
  340. h264->decoder.accumulator_pos = 0;
  341. return 0;
  342. }
  343. if((h264->decoder.accumulator_pos + size_to_copy) > h264->decoder.accumulator_size) {
  344. if(!(h264->decoder.accumulator = tsk_realloc(h264->decoder.accumulator, (h264->decoder.accumulator_pos + size_to_copy)))) {
  345. TSK_DEBUG_ERROR("Failed to reallocated new buffer");
  346. h264->decoder.accumulator_pos = 0;
  347. h264->decoder.accumulator_size = 0;
  348. return 0;
  349. }
  350. h264->decoder.accumulator_size = (h264->decoder.accumulator_pos + size_to_copy);
  351. }
  352. if(append_scp) {
  353. memcpy(&((uint8_t*)h264->decoder.accumulator)[h264->decoder.accumulator_pos], H264_START_CODE_PREFIX, start_code_prefix_size);
  354. h264->decoder.accumulator_pos += start_code_prefix_size;
  355. }
  356. memcpy(&((uint8_t*)h264->decoder.accumulator)[h264->decoder.accumulator_pos], pay_ptr, pay_size);
  357. h264->decoder.accumulator_pos += pay_size;
  358. // end-accumulator
  359. if(sps_or_pps) {
  360. // http://libav-users.943685.n4.nabble.com/Decode-H264-streams-how-to-fill-AVCodecContext-from-SPS-PPS-td2484472.html
  361. // SPS and PPS should be bundled with IDR
  362. TSK_DEBUG_INFO("Receiving SPS or PPS ...to be tied to an IDR");
  363. }
  364. else if(rtp_hdr->marker) {
  365. if(h264->decoder.passthrough) {
  366. if(*out_max_size < h264->decoder.accumulator_pos) {
  367. if((*out_data = tsk_realloc(*out_data, h264->decoder.accumulator_pos))) {
  368. *out_max_size = h264->decoder.accumulator_pos;
  369. }
  370. else {
  371. *out_max_size = 0;
  372. return 0;
  373. }
  374. }
  375. memcpy(*out_data, h264->decoder.accumulator, h264->decoder.accumulator_pos);
  376. retsize = h264->decoder.accumulator_pos;
  377. }
  378. else {
  379. // !h264->decoder.passthrough
  380. CUVIDSOURCEDATAPACKET pkt;
  381. CUresult cuResult;
  382. pkt.flags = 0;
  383. pkt.payload_size = (unsigned long) h264->decoder.accumulator_pos;
  384. pkt.payload = (unsigned char *)h264->decoder.accumulator;
  385. pkt.timestamp = 0;
  386. // reset accumulator
  387. h264->decoder.accumulator_pos = 0;
  388. cuResult = cuvidParseVideoData(h264->decoder.cuParser, &pkt);
  389. if(cuResult != CUDA_SUCCESS) {
  390. TSK_DEBUG_ERROR("cuvidParseVideoData() failed with error code = %d", (int)cuResult);
  391. CHECK_HR(hr = E_FAIL);
  392. }
  393. if(h264->decoder.cuBuffer.bAvail) {
  394. h264->decoder.cuBuffer.bAvail = tsk_false;
  395. if((retsize = _cuda_codec_h264_pict_layout(h264, out_data, out_max_size)) == 0) {
  396. TSK_DEBUG_ERROR("_cuda_codec_h264_pict_layout failed");
  397. CHECK_HR(hr = E_FAIL);
  398. }
  399. }
  400. }// else(!h264->decoder.passthrough)
  401. } // else if(rtp_hdr->marker)
  402. bail:
  403. if(FAILED(hr)) {
  404. TSK_DEBUG_INFO("Failed to decode the buffer with error code =%d, size=%u, append=%s", ret, h264->decoder.accumulator_pos, append_scp ? "yes" : "no");
  405. if(TMEDIA_CODEC_VIDEO(self)->in.callback) {
  406. TMEDIA_CODEC_VIDEO(self)->in.result.type = tmedia_video_decode_result_type_error;
  407. TMEDIA_CODEC_VIDEO(self)->in.result.proto_hdr = proto_hdr;
  408. TMEDIA_CODEC_VIDEO(self)->in.callback(&TMEDIA_CODEC_VIDEO(self)->in.result);
  409. }
  410. }
  411. return retsize;
  412. }
  413. static tsk_bool_t cuda_codec_h264_sdp_att_match(const tmedia_codec_t* self, const char* att_name, const char* att_value)
  414. {
  415. return tdav_codec_h264_common_sdp_att_match((tdav_codec_h264_common_t*)self, att_name, att_value);
  416. }
  417. static char* cuda_codec_h264_sdp_att_get(const tmedia_codec_t* self, const char* att_name)
  418. {
  419. char* att = tdav_codec_h264_common_sdp_att_get((const tdav_codec_h264_common_t*)self, att_name);
  420. if(att && tsk_striequals(att_name, "fmtp")) {
  421. tsk_strcat(&att, "; impl=CUDA");
  422. }
  423. return att;
  424. }
  425. /* ============ H.264 Base Profile Plugin interface ================= */
  426. /* constructor */
  427. static tsk_object_t* cuda_codec_h264_base_ctor(tsk_object_t * self, va_list * app)
  428. {
  429. cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
  430. if(h264) {
  431. /* init base: called by tmedia_codec_create() */
  432. /* init self */
  433. if(cuda_codec_h264_init(h264, profile_idc_baseline) != 0) {
  434. return tsk_null;
  435. }
  436. }
  437. return self;
  438. }
  439. /* destructor */
  440. static tsk_object_t* cuda_codec_h264_base_dtor(tsk_object_t * self)
  441. {
  442. cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
  443. if(h264) {
  444. /* deinit base */
  445. tdav_codec_h264_common_deinit(TDAV_CODEC_H264_COMMON(self));
  446. /* deinit self */
  447. cuda_codec_h264_deinit(h264);
  448. }
  449. return self;
  450. }
  451. /* object definition */
  452. static const tsk_object_def_t cuda_codec_h264_base_def_s = {
  453. sizeof(cuda_codec_h264_t),
  454. cuda_codec_h264_base_ctor,
  455. cuda_codec_h264_base_dtor,
  456. tmedia_codec_cmp,
  457. };
  458. /* plugin definition*/
  459. static const tmedia_codec_plugin_def_t cuda_codec_h264_base_plugin_def_s = {
  460. &cuda_codec_h264_base_def_s,
  461. tmedia_video,
  462. tmedia_codec_id_h264_bp,
  463. "H264",
  464. "H264 Base Profile (NVIDIA CUDA)",
  465. TMEDIA_CODEC_FORMAT_H264_BP,
  466. tsk_true,
  467. 90000, // rate
  468. /* audio */
  469. { 0 },
  470. /* video (width, height, fps) */
  471. {176, 144, 0}, // fps is @deprecated
  472. cuda_codec_h264_set,
  473. cuda_codec_h264_open,
  474. cuda_codec_h264_close,
  475. cuda_codec_h264_encode,
  476. cuda_codec_h264_decode,
  477. cuda_codec_h264_sdp_att_match,
  478. cuda_codec_h264_sdp_att_get
  479. };
  480. const tmedia_codec_plugin_def_t *cuda_codec_h264_base_plugin_def_t = &cuda_codec_h264_base_plugin_def_s;
  481. /* ============ H.264 Main Profile Plugin interface ================= */
  482. /* constructor */
  483. static tsk_object_t* cuda_codec_h264_main_ctor(tsk_object_t * self, va_list * app)
  484. {
  485. cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
  486. if(h264) {
  487. /* init base: called by tmedia_codec_create() */
  488. /* init self */
  489. if(cuda_codec_h264_init(h264, profile_idc_main) != 0) {
  490. return tsk_null;
  491. }
  492. }
  493. return self;
  494. }
  495. /* destructor */
  496. static tsk_object_t* cuda_codec_h264_main_dtor(tsk_object_t * self)
  497. {
  498. cuda_codec_h264_t *h264 = (cuda_codec_h264_t*)self;
  499. if(h264) {
  500. /* deinit base */
  501. tdav_codec_h264_common_deinit(TDAV_CODEC_H264_COMMON(self));
  502. /* deinit self */
  503. cuda_codec_h264_deinit(h264);
  504. }
  505. return self;
  506. }
  507. /* object definition */
  508. static const tsk_object_def_t cuda_codec_h264_main_def_s = {
  509. sizeof(cuda_codec_h264_t),
  510. cuda_codec_h264_main_ctor,
  511. cuda_codec_h264_main_dtor,
  512. tmedia_codec_cmp,
  513. };
  514. /* plugin definition*/
  515. static const tmedia_codec_plugin_def_t cuda_codec_h264_main_plugin_def_s = {
  516. &cuda_codec_h264_main_def_s,
  517. tmedia_video,
  518. tmedia_codec_id_h264_mp,
  519. "H264",
  520. "H264 Main Profile (NVIDIA CUDA)",
  521. TMEDIA_CODEC_FORMAT_H264_MP,
  522. tsk_true,
  523. 90000, // rate
  524. /* audio */
  525. { 0 },
  526. /* video (width, height, fps)*/
  527. {176, 144, 0},// fps is @deprecated
  528. cuda_codec_h264_set,
  529. cuda_codec_h264_open,
  530. cuda_codec_h264_close,
  531. cuda_codec_h264_encode,
  532. cuda_codec_h264_decode,
  533. cuda_codec_h264_sdp_att_match,
  534. cuda_codec_h264_sdp_att_get
  535. };
  536. const tmedia_codec_plugin_def_t *cuda_codec_h264_main_plugin_def_t = &cuda_codec_h264_main_plugin_def_s;
  537. /* ============ Common To all H264 codecs ================= */
  538. int cuda_codec_h264_open_encoder(cuda_codec_h264_t* self)
  539. {
  540. HRESULT hr = S_OK;
  541. int32_t max_bw_kpbs;
  542. int bestGPU = 0, gpuPerf = 0;
  543. static int low_latency = 1;
  544. tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
  545. if(self->encoder.pInst) {
  546. TSK_DEBUG_ERROR("Encoder already initialized");
  547. #if defined(E_ILLEGAL_METHOD_CALL)
  548. CHECK_HR(hr = E_ILLEGAL_METHOD_CALL);
  549. #else
  550. CHECK_HR(hr = 0x8000000EL);
  551. #endif
  552. }
  553. memset(&self->encoder.clbParams, 0, sizeof(self->encoder.clbParams));
  554. memset(&self->encoder.ctxParams, 0, sizeof(self->encoder.ctxParams));
  555. // create encoder
  556. CHECK_HR(hr = NVCreateEncoder(&self->encoder.pInst));
  557. CHECK_HR(hr = NVSetCodec(self->encoder.pInst, NV_CODEC_TYPE_H264));
  558. CHECK_HR(hr = NVSetDefaultParam(self->encoder.pInst));
  559. CHECK_HR(hr = NVGetParamValue(self->encoder.pInst, NVVE_GET_GPU_COUNT, &self->encoder.ctxParams.GPU_count));
  560. {
  561. int temp = 0, deviceCount;
  562. for (deviceCount=0; deviceCount < self->encoder.ctxParams.GPU_count; deviceCount++) {
  563. NVVE_GPUAttributes GPUAttributes = {0};
  564. GPUAttributes.iGpuOrdinal = deviceCount;
  565. hr = NVGetParamValue(self->encoder.pInst, NVVE_GET_GPU_ATTRIBUTES, &GPUAttributes);
  566. if(FAILED(hr)) {
  567. TSK_DEBUG_ERROR("NVGetParamValue(NVVE_GET_GPU_ATTRIBUTES) failed with error code = %08x", hr);
  568. continue;
  569. }
  570. temp = GPUAttributes.iClockRate * GPUAttributes.iMultiProcessorCount;
  571. temp = temp * CudaUtils::ConvertSMVer2Cores(GPUAttributes.iMajor, GPUAttributes.iMinor);
  572. if(temp > gpuPerf) {
  573. gpuPerf = temp;
  574. bestGPU = deviceCount;
  575. }
  576. }
  577. }
  578. self->encoder.neg_width = (self->encoder.rotation == 90 || self->encoder.rotation == 270) ? TMEDIA_CODEC_VIDEO(self)->out.height : TMEDIA_CODEC_VIDEO(self)->out.width;
  579. self->encoder.neg_height = (self->encoder.rotation == 90 || self->encoder.rotation == 270) ? TMEDIA_CODEC_VIDEO(self)->out.width : TMEDIA_CODEC_VIDEO(self)->out.height;
  580. self->encoder.neg_fps = TMEDIA_CODEC_VIDEO(self)->out.fps;
  581. max_bw_kpbs = TSK_CLAMP(
  582. 0,
  583. tmedia_get_video_bandwidth_kbps_2(self->encoder.neg_width, self->encoder.neg_height, self->encoder.neg_fps),
  584. self->encoder.max_bw_kpbs
  585. );
  586. self->encoder.max_bitrate_bps = (max_bw_kpbs * 1024);
  587. TSK_DEBUG_INFO("[H.264 CUDA Encoder] neg_width=%d, neg_height=%d, neg_fps=%d, max_bitrate_bps=%d",
  588. self->encoder.neg_width,
  589. self->encoder.neg_height,
  590. self->encoder.neg_fps,
  591. self->encoder.max_bitrate_bps
  592. );
  593. self->encoder.ctxParams.iForcedGPU = bestGPU;
  594. self->encoder.ctxParams.iInputSize[0] = self->encoder.neg_width;
  595. self->encoder.ctxParams.iInputSize[1] = self->encoder.neg_height;
  596. self->encoder.ctxParams.iOutputSize[0] = self->encoder.neg_width;
  597. self->encoder.ctxParams.iOutputSize[1] = self->encoder.neg_height;
  598. self->encoder.ctxParams.GPUOffloadLevel= NVVE_GPU_OFFLOAD_ALL;
  599. self->encoder.ctxParams.iSurfaceFormat = (int)IYUV;
  600. self->encoder.ctxParams.iPictureType = (int)FRAME_PICTURE;
  601. self->encoder.ctxParams.Fieldmode = MODE_FRAME;
  602. self->encoder.ctxParams.Presets = (NVVE_PRESETS_TARGET)-1;//Should be iPod, Zune ...
  603. // self->encoder.ctxParams.iP_Interval = 1;
  604. self->encoder.ctxParams.iAspectRatio[0] = 1;
  605. self->encoder.ctxParams.iAspectRatio[1] = 1;
  606. self->encoder.ctxParams.iAspectRatio[2] = 0;
  607. self->encoder.ctxParams.iIDR_Period = (self->encoder.neg_fps * PLUGIN_CUDA_H264_GOP_SIZE_IN_SECONDS);
  608. self->encoder.ctxParams.iUseDeviceMem = 0;
  609. self->encoder.ctxParams.iDynamicGOP = 0;
  610. self->encoder.ctxParams.RCType = RC_CBR;
  611. self->encoder.ctxParams.iAvgBitrate = self->encoder.max_bitrate_bps;
  612. self->encoder.ctxParams.iPeakBitrate = self->encoder.max_bitrate_bps;
  613. self->encoder.ctxParams.iQP_Level_Intra = 25;
  614. self->encoder.ctxParams.iQP_Level_InterP = 28;
  615. self->encoder.ctxParams.iQP_Level_InterB = 31;
  616. self->encoder.ctxParams.iFrameRate[0] = self->encoder.neg_fps;
  617. self->encoder.ctxParams.iFrameRate[1] = 1;
  618. self->encoder.ctxParams.iDeblockMode = 1;
  619. self->encoder.ctxParams.iForceIntra = 0;
  620. self->encoder.ctxParams.iForceIDR = 0;
  621. self->encoder.ctxParams.iClearStat = 0;
  622. self->encoder.ctxParams.DIMode = DI_MEDIAN;
  623. self->encoder.ctxParams.iDisableSPSPPS = 1; // Do not include SPS/PPS frames
  624. self->encoder.ctxParams.iNaluFramingType = 0; // StartCodes
  625. self->encoder.ctxParams.iMultiGPU = 1;
  626. switch(common->profile) {
  627. case profile_idc_baseline: {
  628. self->encoder.ctxParams.iDisableCabac = 1;
  629. self->encoder.ctxParams.iProfileLevel = 0xff42; // 0xff -> autoselect level
  630. break;
  631. }
  632. case profile_idc_main: {
  633. self->encoder.ctxParams.iDisableCabac = 0;
  634. self->encoder.ctxParams.iProfileLevel = 0xff4d; // 0xff -> autoselect level
  635. break;
  636. }
  637. default: {
  638. CHECK_HR(hr = E_NOTIMPL);
  639. break;
  640. }
  641. }
  642. //
  643. // Allocate memory
  644. //
  645. self->encoder.nBufferSize = (self->encoder.ctxParams.iOutputSize[1] * self->encoder.ctxParams.iOutputSize[0] * 3) >> 4;
  646. if(!self->encoder.pBufferPtr && !(self->encoder.pBufferPtr = tsk_realloc(self->encoder.pBufferPtr, self->encoder.nBufferSize))) {
  647. self->encoder.nBufferSize = 0;
  648. CHECK_HR(hr = E_OUTOFMEMORY);
  649. }
  650. //
  651. // Set parameters
  652. //
  653. hr = NVSetParamValue(self->encoder.pInst, NVVE_FORCE_GPU_SELECTION, &self->encoder.ctxParams.iForcedGPU);
  654. if(FAILED(hr)) {
  655. TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_GPU_SELECTION) failed with error code = %08x", hr);
  656. }
  657. CHECK_HR(hr = NVSetParamValue(self->encoder.pInst, NVVE_DEVICE_MEMORY_INPUT, &(self->encoder.ctxParams.iUseDeviceMem)));
  658. hr = NVSetParamValue(self->encoder.pInst,NVVE_OUT_SIZE, &(self->encoder.ctxParams.iOutputSize));
  659. if (hr!=S_OK) {
  660. TSK_DEBUG_WARN("NVSetParamValue(NVVE_OUT_SIZE) failed with error code = %08x", hr);
  661. }
  662. hr = NVSetParamValue(self->encoder.pInst,NVVE_IN_SIZE, &(self->encoder.ctxParams.iInputSize));
  663. if (hr!=S_OK) {
  664. TSK_DEBUG_WARN("NVSetParamValue(NVVE_IN_SIZE) failed with error code = %08x", hr);
  665. }
  666. hr = NVSetParamValue(self->encoder.pInst,NVVE_MULTI_GPU, &(self->encoder.ctxParams.iMultiGPU));
  667. if (hr!=S_OK) {
  668. TSK_DEBUG_WARN("NVSetParamValue(NVVE_MULTI_GPU) failed with error code = %08x", hr);
  669. }
  670. hr = NVSetParamValue(self->encoder.pInst,NVVE_ASPECT_RATIO, &(self->encoder.ctxParams.iAspectRatio));
  671. if (hr!=S_OK) {
  672. TSK_DEBUG_WARN("NVSetParamValue(NVVE_ASPECT_RATIO) failed with error code = %08x", hr);
  673. }
  674. hr = NVSetParamValue(self->encoder.pInst,NVVE_FIELD_ENC_MODE, &(self->encoder.ctxParams.Fieldmode));
  675. if (hr!=S_OK) {
  676. TSK_DEBUG_WARN("NVSetParamValue(NVVE_FIELD_ENC_MODE) failed with error code = %08x", hr);
  677. }
  678. hr = NVSetParamValue(self->encoder.pInst,NVVE_P_INTERVAL, &(self->encoder.ctxParams.iP_Interval));
  679. if (hr!=S_OK) {
  680. TSK_DEBUG_WARN("NVSetParamValue(NVVE_P_INTERVAL) failed with error code = %08x", hr);
  681. }
  682. hr = NVSetParamValue(self->encoder.pInst,NVVE_IDR_PERIOD, &(self->encoder.ctxParams.iIDR_Period));
  683. if (hr!=S_OK) {
  684. TSK_DEBUG_WARN("NVSetParamValue(NVVE_IDR_PERIOD) failed with error code = %08x", hr);
  685. }
  686. hr = NVSetParamValue(self->encoder.pInst,NVVE_DYNAMIC_GOP, &(self->encoder.ctxParams.iDynamicGOP));
  687. if (hr!=S_OK) {
  688. TSK_DEBUG_WARN("NVSetParamValue(NVVE_DYNAMIC_GOP) failed with error code = %08x", hr);
  689. }
  690. hr = NVSetParamValue(self->encoder.pInst,NVVE_RC_TYPE, &(self->encoder.ctxParams.RCType));
  691. if (hr!=S_OK) {
  692. TSK_DEBUG_WARN("NVSetParamValue(NVVE_RC_TYPE) failed with error code = %08x", hr);
  693. }
  694. hr = NVSetParamValue(self->encoder.pInst,NVVE_AVG_BITRATE, &(self->encoder.ctxParams.iAvgBitrate));
  695. if (hr!=S_OK) {
  696. TSK_DEBUG_WARN("NVSetParamValue(NVVE_AVG_BITRATE) failed with error code = %08x", hr);
  697. }
  698. hr = NVSetParamValue(self->encoder.pInst,NVVE_PEAK_BITRATE, &(self->encoder.ctxParams.iPeakBitrate));
  699. if (hr!=S_OK) {
  700. TSK_DEBUG_WARN("NVSetParamValue(NVVE_PEAK_BITRATE) failed with error code = %08x", hr);
  701. }
  702. hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTRA, &(self->encoder.ctxParams.iQP_Level_Intra));
  703. if (hr!=S_OK) {
  704. TSK_DEBUG_WARN("NVSetParamValue(NVVE_OUT_SIZE) failed with error code = %08x", hr);
  705. }
  706. hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTER_P,&(self->encoder.ctxParams.iQP_Level_InterP));
  707. if (hr!=S_OK) {
  708. TSK_DEBUG_WARN("NVSetParamValue(NVVE_QP_LEVEL_INTER_P) failed with error code = %08x", hr);
  709. }
  710. hr = NVSetParamValue(self->encoder.pInst,NVVE_QP_LEVEL_INTER_B,&(self->encoder.ctxParams.iQP_Level_InterB));
  711. if (hr!=S_OK) {
  712. TSK_DEBUG_WARN("NVSetParamValue(NVVE_QP_LEVEL_INTER_B) failed with error code = %08x", hr);
  713. }
  714. hr = NVSetParamValue(self->encoder.pInst,NVVE_FRAME_RATE, &(self->encoder.ctxParams.iFrameRate));
  715. if (hr!=S_OK) {
  716. TSK_DEBUG_WARN("NVSetParamValue(NVVE_FRAME_RATE) failed with error code = %08x", hr);
  717. }
  718. hr = NVSetParamValue(self->encoder.pInst,NVVE_DEBLOCK_MODE, &(self->encoder.ctxParams.iDeblockMode));
  719. if (hr!=S_OK) {
  720. TSK_DEBUG_WARN("NVSetParamValue(NVVE_DEBLOCK_MODE) failed with error code = %08x", hr);
  721. }
  722. hr = NVSetParamValue(self->encoder.pInst,NVVE_PROFILE_LEVEL, &(self->encoder.ctxParams.iProfileLevel));
  723. if (hr!=S_OK) {
  724. TSK_DEBUG_WARN("NVSetParamValue(NVVE_PROFILE_LEVEL) failed with error code = %08x", hr);
  725. }
  726. hr = NVSetParamValue(self->encoder.pInst,NVVE_FORCE_INTRA, &(self->encoder.ctxParams.iForceIntra));
  727. if (hr!=S_OK) {
  728. TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_INTRA) failed with error code = %08x", hr);
  729. }
  730. hr = NVSetParamValue(self->encoder.pInst,NVVE_FORCE_IDR, &(self->encoder.ctxParams.iForceIDR));
  731. if (hr!=S_OK) {
  732. TSK_DEBUG_WARN("NVSetParamValue(NVVE_FORCE_IDR) failed with error code = %08x", hr);
  733. }
  734. hr = NVSetParamValue(self->encoder.pInst,NVVE_CLEAR_STAT, &(self->encoder.ctxParams.iClearStat));
  735. if (hr!=S_OK) {
  736. TSK_DEBUG_WARN("NVSetParamValue(NVVE_CLEAR_STAT) failed with error code = %08x", hr);
  737. }
  738. hr = NVSetParamValue(self->encoder.pInst,NVVE_SET_DEINTERLACE,&(self->encoder.ctxParams.DIMode));
  739. if (hr!=S_OK) {
  740. TSK_DEBUG_WARN("NVSetParamValue(NVVE_SET_DEINTERLACE) failed with error code = %08x", hr);
  741. }
  742. if (self->encoder.ctxParams.Presets != -1) {
  743. hr = NVSetParamValue(self->encoder.pInst,NVVE_PRESETS, &(self->encoder.ctxParams.Presets));
  744. if (hr!=S_OK) {
  745. TSK_DEBUG_WARN("NVSetParamValue(NVVE_PRESETS) failed with error code = %08x", hr);
  746. }
  747. }
  748. hr = NVSetParamValue(self->encoder.pInst,NVVE_DISABLE_CABAC, &(self->encoder.ctxParams.iDisableCabac));
  749. if (hr!=S_OK) {
  750. TSK_DEBUG_WARN("NVSetParamValue(NVVE_DISABLE_CABAC) failed with error code = %08x", hr);
  751. }
  752. hr = NVSetParamValue(self->encoder.pInst,NVVE_CONFIGURE_NALU_FRAMING_TYPE, &(self->encoder.ctxParams.iNaluFramingType));
  753. if (hr!=S_OK) {
  754. TSK_DEBUG_WARN("NVSetParamValue(NVVE_CONFIGURE_NALU_FRAMING_TYPE) failed with error code = %08x", hr);
  755. }
  756. hr = NVSetParamValue(self->encoder.pInst,NVVE_DISABLE_SPS_PPS,&(self->encoder.ctxParams.iDisableSPSPPS));
  757. if (hr!=S_OK) {
  758. TSK_DEBUG_WARN("NVSetParamValue(NVVE_DISABLE_SPS_PPS) failed with error code = %08x", hr);
  759. }
  760. hr = NVSetParamValue(self->encoder.pInst,NVVE_LOW_LATENCY,&low_latency);
  761. if (hr!=S_OK) {
  762. TSK_DEBUG_WARN("NVSetParamValue(NVVE_LOW_LATENCY) failed with error code = %08x", hr);
  763. }
  764. self->encoder.clbParams.pfnacquirebitstream = _NVCallback_HandleAcquireBitStream;
  765. self->encoder.clbParams.pfnonbeginframe = _NVCallback_HandleOnBeginFrame;
  766. self->encoder.clbParams.pfnonendframe = _NVCallback_HandleOnEndFrame;
  767. self->encoder.clbParams.pfnreleasebitstream = _NVCallback_HandleReleaseBitStream;
  768. NVRegisterCB(self->encoder.pInst, self->encoder.clbParams, self);
  769. CHECK_HR(hr = NVCreateHWEncoder(self->encoder.pInst));
  770. bail:
  771. return SUCCEEDED(hr) ? 0 : -1;
  772. }
  773. int cuda_codec_h264_close_encoder(cuda_codec_h264_t* self)
  774. {
  775. if(self) {
  776. if(self->encoder.pInst) {
  777. NVDestroyEncoder(self->encoder.pInst);
  778. self->encoder.pInst = NULL;
  779. }
  780. if(self->encoder.pBufferPtr) {
  781. TSK_FREE(self->encoder.pBufferPtr);
  782. self->encoder.nBufferSize = 0;
  783. }
  784. self->encoder.frame_count = 0;
  785. }
  786. return 0;
  787. }
  788. int cuda_codec_h264_open_decoder(cuda_codec_h264_t* self)
  789. {
  790. HRESULT hr = S_OK;
  791. tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
  792. int i, adapterCount;
  793. CUresult cuResult;
  794. D3DPRESENT_PARAMETERS d3dpp;
  795. if(self->decoder.pInst || self->decoder.cuDevice || self->decoder.cuContext || self->decoder.pD3D9 || self->decoder.pD3D9Device) {
  796. TSK_DEBUG_ERROR("Decoder already initialized");
  797. #if defined(E_ILLEGAL_METHOD_CALL)
  798. CHECK_HR(hr = E_ILLEGAL_METHOD_CALL);
  799. #else
  800. CHECK_HR(hr = 0x8000000EL);
  801. #endif
  802. }
  803. TSK_DEBUG_INFO("[H.264 MF Decoder] neg_width=%d, neg_height=%d, neg_fps=%d",
  804. TMEDIA_CODEC_VIDEO(self)->in.width,
  805. TMEDIA_CODEC_VIDEO(self)->in.height,
  806. TMEDIA_CODEC_VIDEO(self)->in.fps
  807. );
  808. memset(&self->decoder.cuInfo, 0, sizeof(self->decoder.cuInfo));
  809. self->decoder.cuInfo.ulCreationFlags = cudaVideoCreate_PreferCUDA;
  810. self->decoder.cuInfo.CodecType = cudaVideoCodec_H264;
  811. self->decoder.cuInfo.ulWidth = TMEDIA_CODEC_VIDEO(self)->in.width;
  812. self->decoder.cuInfo.ulTargetWidth = TMEDIA_CODEC_VIDEO(self)->in.width;
  813. self->decoder.cuInfo.ulHeight = TMEDIA_CODEC_VIDEO(self)->in.height;
  814. self->decoder.cuInfo.ulTargetHeight = TMEDIA_CODEC_VIDEO(self)->in.height;
  815. self->decoder.cuInfo.ulNumDecodeSurfaces = PLUGIN_CUDA_H264_MAX_FRM_CNT;
  816. self->decoder.cuInfo.ulNumOutputSurfaces = 1;
  817. self->decoder.cuInfo.ChromaFormat = cudaVideoChromaFormat_420;
  818. self->decoder.cuInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  819. self->decoder.cuInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
  820. self->decoder.cuDevice = CudaUtils::GetMaxGflopsDeviceId();
  821. #if _DEBUG || DEBUG
  822. {
  823. int major, minor;
  824. size_t totalGlobalMem;
  825. char deviceName[256];
  826. cuDeviceComputeCapability(&major, &minor, self->decoder.cuDevice);
  827. cuDeviceGetName(deviceName, sizeof(deviceName), self->decoder.cuDevice);
  828. TSK_DEBUG_INFO("[CUDA H.264 decoder] Using GPU Device %d: %s has SM %d.%d compute capability", self->decoder.cuDevice, deviceName, major, minor);
  829. /*cutilDrvSafeCallNoSync(*/cuDeviceTotalMem(&totalGlobalMem, self->decoder.cuDevice)/*)*/;
  830. TSK_DEBUG_INFO("[CUDA H.264 decoder] Total amount of global memory in GPU device: %4.4f MB", (float)totalGlobalMem/(1024*1024));
  831. }
  832. #endif
  833. // create Direct3D instance
  834. self->decoder.pD3D9 = Direct3DCreate9(D3D_SDK_VERSION);
  835. if(!self->decoder.pD3D9) {
  836. CHECK_HR(hr = E_OUTOFMEMORY);
  837. }
  838. adapterCount = self->decoder.pD3D9->GetAdapterCount();
  839. for(i = 0; i < adapterCount; ++i) {
  840. ZeroMemory(&d3dpp, sizeof(d3dpp));
  841. d3dpp.Windowed = TRUE;
  842. d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
  843. d3dpp.BackBufferWidth = self->decoder.cuInfo.ulTargetWidth;
  844. d3dpp.BackBufferHeight = self->decoder.cuInfo.ulTargetHeight;
  845. d3dpp.BackBufferCount = 1;
  846. d3dpp.SwapEffect = D3DSWAPEFFECT_COPY;
  847. d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
  848. d3dpp.Flags = D3DPRESENTFLAG_VIDEO;
  849. hr = self->decoder.pD3D9->CreateDevice(i,
  850. D3DDEVTYPE_HAL,
  851. GetDesktopWindow(),
  852. D3DCREATE_FPU_PRESERVE | D3DCREATE_MULTITHREADED | D3DCREATE_HARDWARE_VERTEXPROCESSING,
  853. &d3dpp,
  854. &self->decoder.pD3D9Device);
  855. if(hr == S_OK) {
  856. cuResult = cuD3D9CtxCreate(&self->decoder.cuContext, &self->decoder.cuDevice, 0, self->decoder.pD3D9Device);
  857. if(cuResult == CUDA_SUCCESS) {
  858. break;
  859. }
  860. SafeRelease(&self->decoder.pD3D9Device);
  861. if(self->decoder.cuContext) {
  862. cuCtxDestroy(self->decoder.cuContext);
  863. self->decoder.cuContext = NULL;
  864. }
  865. }
  866. }
  867. if(!self->decoder.pD3D9Device) {
  868. TSK_DEBUG_ERROR("Failed to create D3D9 device");
  869. CHECK_HR(hr = E_FAIL);
  870. }
  871. memset(&self->decoder.cuPaserParams, 0, sizeof(self->decoder.cuPaserParams));
  872. self->decoder.cuPaserParams.CodecType = cudaVideoCodec_H264;
  873. self->decoder.cuPaserParams.ulMaxNumDecodeSurfaces = PLUGIN_CUDA_H264_MAX_FRM_CNT;
  874. self->decoder.cuPaserParams.pUserData = self;
  875. self->decoder.cuPaserParams.pfnSequenceCallback = _NVCallback_HandleVideoSequence;
  876. self->decoder.cuPaserParams.pfnDecodePicture = _NVCallback_HandlePictureDecode;
  877. self->decoder.cuPaserParams.pfnDisplayPicture = _NVCallback_HandlePictureDisplay;
  878. cuResult = cuvidCreateVideoParser(&self->decoder.cuParser, &self->decoder.cuPaserParams);
  879. if(cuResult != CUDA_SUCCESS) {
  880. TSK_DEBUG_ERROR("cuvidCreateVideoParser(0) failed with error code = %d", (int)cuResult);
  881. CHECK_HR(hr = E_FAIL);
  882. }
  883. cuResult = cuvidCreateDecoder(&self->decoder.pInst, &self->decoder.cuInfo);
  884. if(CUDA_SUCCESS != cuResult) {
  885. TSK_DEBUG_ERROR("cuvidCreateDecoder failed with error code=%d", (int)cuResult);
  886. CHECK_HR(hr = E_FAIL);
  887. }
  888. if(!self->decoder.phMutex && !(self->decoder.phMutex = tsk_mutex_create())) {
  889. TSK_DEBUG_ERROR("Failed to create mutex");
  890. CHECK_HR(hr = E_FAIL);
  891. }
  892. bail:
  893. return SUCCEEDED(hr) ? 0 : -1;
  894. }
  895. int cuda_codec_h264_close_decoder(cuda_codec_h264_t* self)
  896. {
  897. if(self) {
  898. if(self->decoder.pInst) {
  899. cuvidDestroyDecoder(self->decoder.pInst);
  900. self->decoder.pInst = NULL;
  901. }
  902. if(self->decoder.cuContext) {
  903. cuCtxDestroy(self->decoder.cuContext);
  904. self->decoder.cuContext = NULL;
  905. }
  906. SafeRelease(&self->decoder.pD3D9Device);
  907. SafeRelease(&self->decoder.pD3D9);
  908. if(self->decoder.cuParser) {
  909. cuvidDestroyVideoParser(self->decoder.cuParser);
  910. self->decoder.cuParser = NULL;
  911. }
  912. {/* cuBuffer.XXX */
  913. if(self->decoder.cuBuffer.pcuPtr) {
  914. cuMemFreeHost(self->decoder.cuBuffer.pcuPtr);
  915. self->decoder.cuBuffer.pcuPtr = NULL;
  916. }
  917. self->decoder.cuBuffer.nSize = self->decoder.cuBuffer.nPitch = 0;
  918. self->decoder.cuBuffer.bAvail = tsk_false;
  919. }
  920. if(self->decoder.phMutex) {
  921. tsk_mutex_destroy(&self->decoder.phMutex);
  922. }
  923. TSK_FREE(self->decoder.accumulator);
  924. self->decoder.accumulator_pos = 0;
  925. }
  926. return 0;
  927. }
  928. int cuda_codec_h264_init(cuda_codec_h264_t* self, profile_idc_t profile)
  929. {
  930. int ret = 0;
  931. level_idc_t level;
  932. tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)self;
  933. if(!self) {
  934. TSK_DEBUG_ERROR("Invalid parameter");
  935. return -1;
  936. }
  937. CudaUtils::Startup();
  938. if((ret = tdav_codec_h264_common_init(common))) {
  939. TSK_DEBUG_ERROR("cuda_codec_h264_common_init() faile with error code=%d", ret);
  940. return ret;
  941. }
  942. if((ret = tdav_codec_h264_common_level_from_size(TMEDIA_CODEC_VIDEO(self)->out.width, TMEDIA_CODEC_VIDEO(self)->out.height, &level))) {
  943. TSK_DEBUG_ERROR("Failed to find level for size=[%u, %u]", TMEDIA_CODEC_VIDEO(self)->out.width, TMEDIA_CODEC_VIDEO(self)->out.height);
  944. return ret;
  945. }
  946. (self)->encoder.max_bw_kpbs = tmedia_defaults_get_bandwidth_video_upload_max();
  947. common->pack_mode = H264_PACKETIZATION_MODE;
  948. common->profile = profile;
  949. common->level = level;
  950. TMEDIA_CODEC_VIDEO(self)->in.max_mbps = TMEDIA_CODEC_VIDEO(self)->out.max_mbps = H264_MAX_MBPS*1000;
  951. TMEDIA_CODEC_VIDEO(self)->in.max_br = TMEDIA_CODEC_VIDEO(self)->out.max_br = H264_MAX_BR*1000;
  952. TMEDIA_CODEC_VIDEO(self)->in.chroma = tmedia_chroma_nv12; // decoder
  953. TMEDIA_CODEC_VIDEO(self)->out.chroma = tmedia_chroma_yuv420p; // encoder
  954. self->encoder.quality = 1;
  955. return ret;
  956. }
  957. int cuda_codec_h264_deinit(cuda_codec_h264_t* self)
  958. {
  959. if(!self) {
  960. TSK_DEBUG_ERROR("Invalid parameter");
  961. return -1;
  962. }
  963. cuda_codec_h264_close((tmedia_codec_t*)self);
  964. return 0;
  965. }
  966. static inline tsk_size_t _cuda_codec_h264_pict_layout(cuda_codec_h264_t* self, void**output, tsk_size_t *output_size)
  967. {
  968. if(self && self->decoder.cuBuffer.pcuPtr && self->decoder.cuBuffer.nSize) {
  969. const unsigned int w = TMEDIA_CODEC_VIDEO(self)->in.width;
  970. const unsigned int w_div_2 = (w >> 1);
  971. const unsigned int h = TMEDIA_CODEC_VIDEO(self)->in.height;
  972. const unsigned int h_div_2 = (h >> 1);
  973. const unsigned int pitch = self->decoder.cuBuffer.nPitch;
  974. const unsigned int pitch_div_2 = (pitch >> 1);
  975. const tsk_size_t xsize = (w * h * 3) >> 1;
  976. // resize if too short
  977. if(*output_size < xsize) {
  978. if((*output = tsk_realloc(*output, xsize))) {
  979. *output_size = xsize;
  980. }
  981. else {
  982. *output_size = 0;
  983. return 0;
  984. }
  985. }
  986. register unsigned int y;
  987. const unsigned char *p = (const unsigned char *)self->decoder.cuBuffer.pcuPtr, *q = p + (h * pitch);
  988. register unsigned char *i = (unsigned char *)*output, *j = i + (h * w);
  989. for (y = 0; y < h; y++) {
  990. // luma
  991. memcpy(i, p, w);
  992. i += w;
  993. p += pitch;
  994. // chroma
  995. memcpy(j, &q[(y&1) ? w_div_2 : 0], w_div_2);
  996. j += w_div_2;
  997. if(y&1) {
  998. q += pitch;
  999. }
  1000. }
  1001. return xsize;
  1002. }
  1003. return 0;
  1004. }
  1005. static int CUDAAPI _NVCallback_HandleVideoSequence(void *pvUserData, CUVIDEOFORMAT *pFormat)
  1006. {
  1007. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
  1008. CUresult cuResult;
  1009. if(!h264 || !pFormat) {
  1010. TSK_DEBUG_ERROR("Invalid parameter");
  1011. return 0;//error
  1012. }
  1013. tsk_mutex_lock(h264->decoder.phMutex);
  1014. int ret = 1;
  1015. // http://corecodec.com/products/coreavc/guide
  1016. // CROP 1088 to 1080
  1017. // H.264 encoded video size is always a multiple of 16, and sequences that are 1080 pixels high are encoded as 1088 padded at the bottom.
  1018. // Also H.264 specifications provides a set of cropping parameters to signal that parts of the encoded picture are not important and should not be displayed.
  1019. // Some H.264 encoders fail to specify cropping parameters when encoding 1080 video.
  1020. int newWidth = pFormat->coded_width;//pFormat->display_area.right - pFormat->display_area.left;
  1021. int newHeight = pFormat->coded_height;//pFormat->display_area.bottom - pFormat->display_area.top;
  1022. if(newWidth != TMEDIA_CODEC_VIDEO(h264)->in.width || pFormat->coded_height != newHeight) {
  1023. TSK_DEBUG_INFO("[H.264 CUDA decoder] display area = left:%d, right:%d, bottom:%d, top:%d",
  1024. pFormat->display_area.left,
  1025. pFormat->display_area.right,
  1026. pFormat->display_area.bottom,
  1027. pFormat->display_area.top
  1028. );
  1029. h264->decoder.cuInfo.ulWidth = newWidth;
  1030. h264->decoder.cuInfo.ulTargetWidth = newWidth;
  1031. h264->decoder.cuInfo.ulHeight = newHeight;
  1032. h264->decoder.cuInfo.ulTargetHeight = newHeight;
  1033. CUresult cuResult = cuCtxPushCurrent(h264->decoder.cuContext);
  1034. if(cuResult != CUDA_SUCCESS) {
  1035. TSK_DEBUG_ERROR("cuCtxPushCurrent failed with error code=%d", (int)cuResult);
  1036. ret = 0; //error
  1037. goto bail;
  1038. }
  1039. if(h264->decoder.pInst) {
  1040. cuvidDestroyDecoder(h264->decoder.pInst);
  1041. h264->decoder.pInst = NULL;
  1042. }
  1043. cuResult = cuvidCreateDecoder(&h264->decoder.pInst, &h264->decoder.cuInfo);
  1044. if(CUDA_SUCCESS != cuResult) {
  1045. TSK_DEBUG_ERROR("cuvidCreateDecoder failed with error code=%d", (int)cuResult);
  1046. ret = 0; //error
  1047. goto bail;
  1048. }
  1049. else {
  1050. TMEDIA_CODEC_VIDEO(h264)->in.width = /*pFormat->coded_width*/newWidth;
  1051. TMEDIA_CODEC_VIDEO(h264)->in.height = /*pFormat->coded_height*/newHeight;
  1052. ret = 1; //success
  1053. }
  1054. }
  1055. bail:
  1056. cuResult = cuCtxPopCurrent(NULL);
  1057. tsk_mutex_unlock(h264->decoder.phMutex);
  1058. return ret;//success
  1059. }
  1060. static int CUDAAPI _NVCallback_HandlePictureDecode(void *pvUserData, CUVIDPICPARAMS *pPicParams)
  1061. {
  1062. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
  1063. if(!h264 || !pPicParams) {
  1064. TSK_DEBUG_ERROR("Invalid parameter");
  1065. return 0;//error
  1066. }
  1067. tsk_mutex_lock(h264->decoder.phMutex);
  1068. CUresult cuResult = cuvidDecodePicture(h264->decoder.pInst, pPicParams);
  1069. tsk_mutex_unlock(h264->decoder.phMutex);
  1070. if(cuResult != CUDA_SUCCESS) {
  1071. TSK_DEBUG_ERROR("cuvidDecodePicture failed with error code= %d", cuResult);
  1072. return 0;//error
  1073. }
  1074. return 1;//success
  1075. }
  1076. static int CUDAAPI _NVCallback_HandlePictureDisplay(void *pvUserData, CUVIDPARSERDISPINFO *pPicParams)
  1077. {
  1078. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pvUserData;
  1079. CUVIDPROCPARAMS vpp = {0};
  1080. CUdeviceptr devPtr;
  1081. CUresult cuResult;
  1082. tsk_size_t nv12_size;
  1083. tsk_bool_t mapped = tsk_false;
  1084. int ret = 1;//success
  1085. if(!h264 || !pPicParams) {
  1086. TSK_DEBUG_ERROR("Invalid parameter");
  1087. return 0;//error
  1088. }
  1089. cuResult = cuCtxPushCurrent(h264->decoder.cuContext);
  1090. if(cuResult != CUDA_SUCCESS) {
  1091. TSK_DEBUG_ERROR("cuCtxPushCurrent failed with error code = %d", (int)cuResult);
  1092. ret = 0;//error
  1093. goto bail;
  1094. }
  1095. vpp.progressive_frame = pPicParams->progressive_frame;
  1096. vpp.top_field_first = pPicParams->top_field_first;
  1097. cuResult = cuvidMapVideoFrame(h264->decoder.pInst, pPicParams->picture_index, &devPtr, &h264->decoder.cuBuffer.nPitch, &vpp);
  1098. if(cuResult != CUDA_SUCCESS) {
  1099. TSK_DEBUG_ERROR("cuvidMapVideoFrame failed with error code = %d", (int)cuResult);
  1100. ret = 0;//error
  1101. goto bail;
  1102. }
  1103. mapped = tsk_true;
  1104. nv12_size = ((h264->decoder.cuBuffer.nPitch * TMEDIA_CODEC_VIDEO(h264)->in.height) * 3) >> 1;
  1105. if ((!h264->decoder.cuBuffer.pcuPtr) || (nv12_size > h264->decoder.cuBuffer.nSize)) {
  1106. h264->decoder.cuBuffer.nSize = 0;
  1107. if (h264->decoder.cuBuffer.pcuPtr) {
  1108. cuResult = cuMemFreeHost(h264->decoder.cuBuffer.pcuPtr);
  1109. h264->decoder.cuBuffer.pcuPtr = NULL;
  1110. }
  1111. cuResult = cuMemAllocHost((void**)&h264->decoder.cuBuffer.pcuPtr, nv12_size);
  1112. if (cuResult != CUDA_SUCCESS) {
  1113. TSK_DEBUG_ERROR("cuMemAllocHost failed to allocate %d bytes (error code=%d)", nv12_size, (int)cuResult);
  1114. h264->decoder.cuBuffer.pcuPtr = tsk_null;
  1115. h264->decoder.cuBuffer.nSize = 0;
  1116. ret = 0;//error
  1117. }
  1118. else {
  1119. h264->decoder.cuBuffer.nSize = nv12_size;
  1120. }
  1121. }
  1122. if(h264->decoder.cuBuffer.pcuPtr) {
  1123. cuResult = cuMemcpyDtoH(h264->decoder.cuBuffer.pcuPtr, devPtr, nv12_size);
  1124. }
  1125. bail:
  1126. if(mapped) {
  1127. cuResult = cuvidUnmapVideoFrame(h264->decoder.pInst, devPtr);
  1128. }
  1129. cuResult = cuCtxPopCurrent(NULL);
  1130. h264->decoder.cuBuffer.bAvail = (ret == 1);
  1131. return ret;
  1132. }
  1133. static unsigned char* CUDAAPI _NVCallback_HandleAcquireBitStream(int *pBufferSize, void *pUserdata)
  1134. {
  1135. cuda_codec_h264_t* h264 = (cuda_codec_h264_t*)pUserdata;
  1136. if(!h264 || !pBufferSize) {
  1137. TSK_DEBUG_ERROR("Invalid parameter");
  1138. return tsk_null;
  1139. }
  1140. *pBufferSize = (int)h264->encoder.nBufferSize;
  1141. return (unsigned char*)h264->encoder.pBufferPtr;
  1142. }
  1143. static void CUDAAPI _NVCallback_HandleReleaseBitStream(int nBytesInBuffer, unsigned char *cb, void *pUserdata)
  1144. {
  1145. tdav_codec_h264_common_t* common = (tdav_codec_h264_common_t*)pUserdata;
  1146. if(!common || !cb || !nBytesInBuffer) {
  1147. TSK_DEBUG_ERROR("Invalid parameter");
  1148. return;
  1149. }
  1150. tdav_codec_h264_rtp_encap(common, (const uint8_t*)cb, (tsk_size_t)nBytesInBuffer);
  1151. }
  1152. static void CUDAAPI _NVCallback_HandleOnBeginFrame(const NVVE_BeginFrameInfo *pbfi, void *pUserdata)
  1153. {
  1154. return;
  1155. }
  1156. static void CUDAAPI _NVCallback_HandleOnEndFrame(const NVVE_EndFrameInfo *pefi, void *pUserdata)
  1157. {
  1158. return;
  1159. }