plugin_cuda_utils.cxx 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /* Copyright (C) 2013 Mamadou DIOP
  2. * Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
  3. *
  4. * This file is part of Open Source Doubango Framework.
  5. *
  6. * DOUBANGO is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * DOUBANGO is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with DOUBANGO.
  18. */
  19. #include "plugin_cuda_utils.h"
  20. #include "tsk_debug.h"
  21. #include <NVEncoderAPI.h>
  22. #include <cuda.h>
  23. #include <cuda_runtime_api.h>
  24. bool CudaUtils::g_bStarted = false;
  25. bool CudaUtils::g_bH264Checked = false;
  26. bool CudaUtils::g_bH264Supported = false;
  27. int CudaUtils::g_nCores = 0;
  28. HRESULT CudaUtils::Startup()
  29. {
  30. if(!g_bStarted) {
  31. CUresult cuResult = CUDA_SUCCESS;
  32. HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
  33. if(SUCCEEDED(hr) || hr == 0x80010106) { // 0x80010106 when called from managed code (e.g. Boghe) - More info: http://support.microsoft.com/kb/824480
  34. if((cuResult = cuInit(0)) != CUDA_SUCCESS) {
  35. TSK_DEBUG_ERROR("cuInit() failed with error code = %08x", cuResult);
  36. hr = E_FAIL;
  37. }
  38. else {
  39. hr = S_OK;
  40. }
  41. }
  42. g_bStarted = true;
  43. return hr;
  44. }
  45. return S_OK;
  46. }
  47. HRESULT CudaUtils::Shutdown()
  48. {
  49. // cuDeinit();
  50. return S_OK;
  51. }
  52. bool CudaUtils::IsH264Supported()
  53. {
  54. if(g_bH264Checked) {
  55. return g_bH264Supported;
  56. }
  57. HRESULT hr = S_OK;
  58. CHECK_HR(hr = Startup());
  59. g_bH264Checked = true;
  60. NVEncoder pEncoder = NULL;
  61. CHECK_HR(hr = NVGetHWEncodeCaps());
  62. CHECK_HR(hr = NVCreateEncoder(&pEncoder));
  63. // Both Base and Main profiles *must* be supported
  64. CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_BASELINE));
  65. CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_MAIN));
  66. g_bH264Supported = true;
  67. bail:
  68. if(pEncoder) {
  69. NVDestroyEncoder(pEncoder);
  70. pEncoder = NULL;
  71. }
  72. return g_bH264Supported;
  73. }
  74. int CudaUtils::ConvertSMVer2Cores(int nMajor, int nMinor)
  75. {
  76. if(g_nCores != 0) {
  77. return g_nCores;
  78. }
  79. // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
  80. typedef struct {
  81. int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
  82. int Cores;
  83. } sSMtoCores;
  84. sSMtoCores nGpuArchCoresPerSM[] = {
  85. { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
  86. { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
  87. { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
  88. { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
  89. { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
  90. { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
  91. { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
  92. { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
  93. };
  94. int index = 0;
  95. while (nGpuArchCoresPerSM[index].SM != -1) {
  96. if (nGpuArchCoresPerSM[index].SM == ((nMajor << 4) + nMinor)) {
  97. g_nCores = nGpuArchCoresPerSM[index].Cores;
  98. break;
  99. }
  100. index++;
  101. }
  102. // If we don't find the values, we default use the previous one to run properly
  103. TSK_DEBUG_INFO("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", nMajor, nMinor, nGpuArchCoresPerSM[7].Cores);
  104. g_nCores = nGpuArchCoresPerSM[7].Cores;
  105. return g_nCores;
  106. }
  107. int CudaUtils::GetMaxGflopsDeviceId()
  108. {
  109. int device_count = 0;
  110. cudaGetDeviceCount( &device_count );
  111. cudaDeviceProp device_properties;
  112. int max_gflops_device = 0;
  113. int max_gflops = 0;
  114. int current_device = 0;
  115. cudaGetDeviceProperties( &device_properties, current_device );
  116. max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
  117. ++current_device;
  118. while( current_device < device_count ) {
  119. cudaGetDeviceProperties( &device_properties, current_device );
  120. int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
  121. if( gflops > max_gflops ) {
  122. max_gflops = gflops;
  123. max_gflops_device = current_device;
  124. }
  125. ++current_device;
  126. }
  127. return max_gflops_device;
  128. }