mirror of
https://github.com/LizardByte/Sunshine.git
synced 2025-08-10 00:52:16 +00:00
Some checks failed
CodeQL / CodeQL (push) Has been cancelled
CI / GitHub Env Debug (push) Has been cancelled
CI / Release Setup (push) Has been cancelled
localize / Update Localization (push) Has been cancelled
Build GH-Pages / prep (push) Has been cancelled
CI / Docker (push) Has been cancelled
CI / Homebrew (push) Has been cancelled
CI / Linux (push) Has been cancelled
CI / Linux Copr (push) Has been cancelled
CI / Linux Flatpak (push) Has been cancelled
CI / Windows (push) Has been cancelled
CI / Coverage-Homebrew-macos-14 (push) Has been cancelled
CI / Coverage-Homebrew-macos-15 (push) Has been cancelled
CI / Coverage-Homebrew-ubuntu-latest (push) Has been cancelled
CI / Coverage-Linux-AppImage (push) Has been cancelled
CI / Coverage-Windows-AMD64 (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Release Homebrew Beta (push) Has been cancelled
Build GH-Pages / call-jekyll-build (push) Has been cancelled
270 lines
9.6 KiB
C++
270 lines
9.6 KiB
C++
/**
|
|
* @file src/nvenc/nvenc_d3d11_on_cuda.cpp
|
|
* @brief Definitions for CUDA NVENC encoder with Direct3D11 input surfaces.
|
|
*/
|
|
#ifdef _WIN32
|
|
// this include
|
|
#include "nvenc_d3d11_on_cuda.h"
|
|
|
|
// local includes
|
|
#include "nvenc_utils.h"
|
|
|
|
namespace nvenc {
|
|
|
|
nvenc_d3d11_on_cuda::nvenc_d3d11_on_cuda(ID3D11Device *d3d_device):
|
|
nvenc_d3d11(NV_ENC_DEVICE_TYPE_CUDA),
|
|
d3d_device(d3d_device) {
|
|
}
|
|
|
|
nvenc_d3d11_on_cuda::~nvenc_d3d11_on_cuda() {
|
|
if (encoder) {
|
|
destroy_encoder();
|
|
}
|
|
|
|
if (cuda_context) {
|
|
{
|
|
auto autopop_context = push_context();
|
|
|
|
if (cuda_d3d_input_texture) {
|
|
if (cuda_failed(cuda_functions.cuGraphicsUnregisterResource(cuda_d3d_input_texture))) {
|
|
BOOST_LOG(error) << "NvEnc: cuGraphicsUnregisterResource() failed: error " << last_cuda_error;
|
|
}
|
|
cuda_d3d_input_texture = nullptr;
|
|
}
|
|
|
|
if (cuda_surface) {
|
|
if (cuda_failed(cuda_functions.cuMemFree(cuda_surface))) {
|
|
BOOST_LOG(error) << "NvEnc: cuMemFree() failed: error " << last_cuda_error;
|
|
}
|
|
cuda_surface = 0;
|
|
}
|
|
}
|
|
|
|
if (cuda_failed(cuda_functions.cuCtxDestroy(cuda_context))) {
|
|
BOOST_LOG(error) << "NvEnc: cuCtxDestroy() failed: error " << last_cuda_error;
|
|
}
|
|
cuda_context = nullptr;
|
|
}
|
|
|
|
if (cuda_functions.dll) {
|
|
FreeLibrary(cuda_functions.dll);
|
|
cuda_functions = {};
|
|
}
|
|
}
|
|
|
|
ID3D11Texture2D *nvenc_d3d11_on_cuda::get_input_texture() {
|
|
return d3d_input_texture.GetInterfacePtr();
|
|
}
|
|
|
|
bool nvenc_d3d11_on_cuda::init_library() {
|
|
if (!nvenc_d3d11::init_library()) {
|
|
return false;
|
|
}
|
|
|
|
constexpr auto dll_name = "nvcuda.dll";
|
|
|
|
if ((cuda_functions.dll = LoadLibraryEx(dll_name, nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32))) {
|
|
auto load_function = [&]<typename T>(T &location, auto symbol) -> bool {
|
|
location = (T) GetProcAddress(cuda_functions.dll, symbol);
|
|
return location != nullptr;
|
|
};
|
|
if (!load_function(cuda_functions.cuInit, "cuInit") ||
|
|
!load_function(cuda_functions.cuD3D11GetDevice, "cuD3D11GetDevice") ||
|
|
!load_function(cuda_functions.cuCtxCreate, "cuCtxCreate_v2") ||
|
|
!load_function(cuda_functions.cuCtxDestroy, "cuCtxDestroy_v2") ||
|
|
!load_function(cuda_functions.cuCtxPushCurrent, "cuCtxPushCurrent_v2") ||
|
|
!load_function(cuda_functions.cuCtxPopCurrent, "cuCtxPopCurrent_v2") ||
|
|
!load_function(cuda_functions.cuMemAllocPitch, "cuMemAllocPitch_v2") ||
|
|
!load_function(cuda_functions.cuMemFree, "cuMemFree_v2") ||
|
|
!load_function(cuda_functions.cuGraphicsD3D11RegisterResource, "cuGraphicsD3D11RegisterResource") ||
|
|
!load_function(cuda_functions.cuGraphicsUnregisterResource, "cuGraphicsUnregisterResource") ||
|
|
!load_function(cuda_functions.cuGraphicsMapResources, "cuGraphicsMapResources") ||
|
|
!load_function(cuda_functions.cuGraphicsUnmapResources, "cuGraphicsUnmapResources") ||
|
|
!load_function(cuda_functions.cuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray") ||
|
|
!load_function(cuda_functions.cuMemcpy2D, "cuMemcpy2D_v2")) {
|
|
BOOST_LOG(error) << "NvEnc: missing CUDA functions in " << dll_name;
|
|
FreeLibrary(cuda_functions.dll);
|
|
cuda_functions = {};
|
|
}
|
|
} else {
|
|
BOOST_LOG(debug) << "NvEnc: couldn't load CUDA dynamic library " << dll_name;
|
|
}
|
|
|
|
if (cuda_functions.dll) {
|
|
IDXGIDevicePtr dxgi_device;
|
|
IDXGIAdapterPtr dxgi_adapter;
|
|
if (d3d_device &&
|
|
SUCCEEDED(d3d_device->QueryInterface(IID_PPV_ARGS(&dxgi_device))) &&
|
|
SUCCEEDED(dxgi_device->GetAdapter(&dxgi_adapter))) {
|
|
CUdevice cuda_device;
|
|
if (cuda_succeeded(cuda_functions.cuInit(0)) &&
|
|
cuda_succeeded(cuda_functions.cuD3D11GetDevice(&cuda_device, dxgi_adapter)) &&
|
|
cuda_succeeded(cuda_functions.cuCtxCreate(&cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, cuda_device)) &&
|
|
cuda_succeeded(cuda_functions.cuCtxPopCurrent(&cuda_context))) {
|
|
device = cuda_context;
|
|
} else {
|
|
BOOST_LOG(error) << "NvEnc: couldn't create CUDA interop context: error " << last_cuda_error;
|
|
}
|
|
} else {
|
|
BOOST_LOG(error) << "NvEnc: couldn't get DXGI adapter for CUDA interop";
|
|
}
|
|
}
|
|
|
|
return device != nullptr;
|
|
}
|
|
|
|
bool nvenc_d3d11_on_cuda::create_and_register_input_buffer() {
|
|
if (encoder_params.buffer_format != NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
|
|
BOOST_LOG(error) << "NvEnc: CUDA interop is expected to be used only for 10-bit 4:4:4 encoding";
|
|
return false;
|
|
}
|
|
|
|
if (!d3d_input_texture) {
|
|
D3D11_TEXTURE2D_DESC desc = {};
|
|
desc.Width = encoder_params.width;
|
|
desc.Height = encoder_params.height * 3; // Planar YUV
|
|
desc.MipLevels = 1;
|
|
desc.ArraySize = 1;
|
|
desc.Format = dxgi_format_from_nvenc_format(encoder_params.buffer_format);
|
|
desc.SampleDesc.Count = 1;
|
|
desc.Usage = D3D11_USAGE_DEFAULT;
|
|
desc.BindFlags = D3D11_BIND_RENDER_TARGET;
|
|
|
|
if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) {
|
|
BOOST_LOG(error) << "NvEnc: couldn't create input texture";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
{
|
|
auto autopop_context = push_context();
|
|
if (!autopop_context) {
|
|
return false;
|
|
}
|
|
|
|
if (!cuda_d3d_input_texture) {
|
|
if (cuda_failed(cuda_functions.cuGraphicsD3D11RegisterResource(
|
|
&cuda_d3d_input_texture,
|
|
d3d_input_texture,
|
|
CU_GRAPHICS_REGISTER_FLAGS_NONE
|
|
))) {
|
|
BOOST_LOG(error) << "NvEnc: cuGraphicsD3D11RegisterResource() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (!cuda_surface) {
|
|
if (cuda_failed(cuda_functions.cuMemAllocPitch(
|
|
&cuda_surface,
|
|
&cuda_surface_pitch,
|
|
// Planar 16-bit YUV
|
|
encoder_params.width * 2,
|
|
encoder_params.height * 3,
|
|
16
|
|
))) {
|
|
BOOST_LOG(error) << "NvEnc: cuMemAllocPitch() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!registered_input_buffer) {
|
|
NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)};
|
|
register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
|
|
register_resource.width = encoder_params.width;
|
|
register_resource.height = encoder_params.height;
|
|
register_resource.pitch = cuda_surface_pitch;
|
|
register_resource.resourceToRegister = (void *) cuda_surface;
|
|
register_resource.bufferFormat = encoder_params.buffer_format;
|
|
register_resource.bufferUsage = NV_ENC_INPUT_IMAGE;
|
|
|
|
if (nvenc_failed(nvenc->nvEncRegisterResource(encoder, ®ister_resource))) {
|
|
BOOST_LOG(error) << "NvEnc: NvEncRegisterResource() failed: " << last_nvenc_error_string;
|
|
return false;
|
|
}
|
|
|
|
registered_input_buffer = register_resource.registeredResource;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool nvenc_d3d11_on_cuda::synchronize_input_buffer() {
|
|
auto autopop_context = push_context();
|
|
if (!autopop_context) {
|
|
return false;
|
|
}
|
|
|
|
if (cuda_failed(cuda_functions.cuGraphicsMapResources(1, &cuda_d3d_input_texture, 0))) {
|
|
BOOST_LOG(error) << "NvEnc: cuGraphicsMapResources() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
|
|
auto unmap = [&]() -> bool {
|
|
if (cuda_failed(cuda_functions.cuGraphicsUnmapResources(1, &cuda_d3d_input_texture, 0))) {
|
|
BOOST_LOG(error) << "NvEnc: cuGraphicsUnmapResources() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
return true;
|
|
};
|
|
auto unmap_guard = util::fail_guard(unmap);
|
|
|
|
CUarray input_texture_array;
|
|
if (cuda_failed(cuda_functions.cuGraphicsSubResourceGetMappedArray(&input_texture_array, cuda_d3d_input_texture, 0, 0))) {
|
|
BOOST_LOG(error) << "NvEnc: cuGraphicsSubResourceGetMappedArray() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
|
|
{
|
|
CUDA_MEMCPY2D copy_params = {};
|
|
copy_params.srcMemoryType = CU_MEMORYTYPE_ARRAY;
|
|
copy_params.srcArray = input_texture_array;
|
|
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
copy_params.dstDevice = cuda_surface;
|
|
copy_params.dstPitch = cuda_surface_pitch;
|
|
// Planar 16-bit YUV
|
|
copy_params.WidthInBytes = encoder_params.width * 2;
|
|
copy_params.Height = encoder_params.height * 3;
|
|
|
|
if (cuda_failed(cuda_functions.cuMemcpy2D(©_params))) {
|
|
BOOST_LOG(error) << "NvEnc: cuMemcpy2D() failed: error " << last_cuda_error;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
unmap_guard.disable();
|
|
return unmap();
|
|
}
|
|
|
|
bool nvenc_d3d11_on_cuda::cuda_succeeded(CUresult result) {
|
|
last_cuda_error = result;
|
|
return result == CUDA_SUCCESS;
|
|
}
|
|
|
|
bool nvenc_d3d11_on_cuda::cuda_failed(CUresult result) {
|
|
last_cuda_error = result;
|
|
return result != CUDA_SUCCESS;
|
|
}
|
|
|
|
nvenc_d3d11_on_cuda::autopop_context::~autopop_context() {
|
|
if (pushed_context) {
|
|
CUcontext popped_context;
|
|
if (parent.cuda_failed(parent.cuda_functions.cuCtxPopCurrent(&popped_context))) {
|
|
BOOST_LOG(error) << "NvEnc: cuCtxPopCurrent() failed: error " << parent.last_cuda_error;
|
|
}
|
|
}
|
|
}
|
|
|
|
nvenc_d3d11_on_cuda::autopop_context nvenc_d3d11_on_cuda::push_context() {
|
|
if (cuda_context &&
|
|
cuda_succeeded(cuda_functions.cuCtxPushCurrent(cuda_context))) {
|
|
return {*this, cuda_context};
|
|
} else {
|
|
BOOST_LOG(error) << "NvEnc: cuCtxPushCurrent() failed: error " << last_cuda_error;
|
|
return {*this, nullptr};
|
|
}
|
|
}
|
|
|
|
} // namespace nvenc
|
|
#endif
|