/* Copyright (C) 2013 Mamadou DIOP * Copyright (C) 2013 Doubango Telecom * * This file is part of Open Source Doubango Framework. * * DOUBANGO is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * DOUBANGO is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with DOUBANGO. */ #include "plugin_cuda_utils.h" #include "tsk_debug.h" #include #include #include bool CudaUtils::g_bStarted = false; bool CudaUtils::g_bH264Checked = false; bool CudaUtils::g_bH264Supported = false; int CudaUtils::g_nCores = 0; HRESULT CudaUtils::Startup() { if(!g_bStarted) { CUresult cuResult = CUDA_SUCCESS; HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED); if(SUCCEEDED(hr) || hr == 0x80010106) { // 0x80010106 when called from managed code (e.g. Boghe) - More info: http://support.microsoft.com/kb/824480 if((cuResult = cuInit(0)) != CUDA_SUCCESS) { TSK_DEBUG_ERROR("cuInit() failed with error code = %08x", cuResult); hr = E_FAIL; } else { hr = S_OK; } } g_bStarted = true; return hr; } return S_OK; } HRESULT CudaUtils::Shutdown() { // cuDeinit(); return S_OK; } bool CudaUtils::IsH264Supported() { if(g_bH264Checked) { return g_bH264Supported; } HRESULT hr = S_OK; CHECK_HR(hr = Startup()); g_bH264Checked = true; NVEncoder pEncoder = NULL; CHECK_HR(hr = NVGetHWEncodeCaps()); CHECK_HR(hr = NVCreateEncoder(&pEncoder)); // Both Base and Main profiles *must* be supported CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_BASELINE)); CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_MAIN)); g_bH264Supported = true; bail: if(pEncoder) { NVDestroyEncoder(pEncoder); pEncoder = NULL; } return g_bH264Supported; } int CudaUtils::ConvertSMVer2Cores(int nMajor, int nMinor) { if(g_nCores != 0) { return g_nCores; } // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM typedef struct { int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version int Cores; } sSMtoCores; sSMtoCores nGpuArchCoresPerSM[] = { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class }; int index = 0; while (nGpuArchCoresPerSM[index].SM != -1) { if (nGpuArchCoresPerSM[index].SM == ((nMajor << 4) + nMinor)) { g_nCores = nGpuArchCoresPerSM[index].Cores; break; } index++; } // If we don't find the values, we default use the previous one to run properly TSK_DEBUG_INFO("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", nMajor, nMinor, nGpuArchCoresPerSM[7].Cores); g_nCores = nGpuArchCoresPerSM[7].Cores; return g_nCores; } int CudaUtils::GetMaxGflopsDeviceId() { int device_count = 0; cudaGetDeviceCount( &device_count ); cudaDeviceProp device_properties; int max_gflops_device = 0; int max_gflops = 0; int current_device = 0; cudaGetDeviceProperties( &device_properties, current_device ); max_gflops = device_properties.multiProcessorCount * device_properties.clockRate; ++current_device; while( current_device < device_count ) { cudaGetDeviceProperties( &device_properties, current_device ); int gflops = device_properties.multiProcessorCount * device_properties.clockRate; if( gflops > max_gflops ) { max_gflops = gflops; max_gflops_device = current_device; } ++current_device; } return max_gflops_device; }