From 44ad28ebf4a17779e483971e80c260c74e972244 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Tue, 10 Jan 2023 13:52:15 -0600 Subject: [PATCH] Fix a reference leak of hw_frames_ctx and prepare for QSV (#736) --- src/platform/common.h | 9 ++- src/platform/linux/cuda.cpp | 17 ++--- src/platform/linux/vaapi.cpp | 11 ++-- src/platform/macos/nv12_zero_device.cpp | 2 +- src/platform/macos/nv12_zero_device.h | 2 +- src/platform/windows/display_vram.cpp | 87 +++++++++++++++---------- src/video.cpp | 33 +++++----- 7 files changed, 92 insertions(+), 69 deletions(-) diff --git a/src/platform/common.h b/src/platform/common.h index fe074c5b..468df863 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -17,6 +17,8 @@ struct sockaddr; struct AVFrame; +struct AVBufferRef; +struct AVHWFramesContext; // Forward declarations of boost classes to avoid having to include boost headers // here, which results in issues with Windows.h and WinSock2.h include order. @@ -196,13 +198,18 @@ struct hwdevice_t { /** * implementations must take ownership of 'frame' */ - virtual int set_frame(AVFrame *frame) { + virtual int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) { BOOST_LOG(error) << "Illegal call to hwdevice_t::set_frame(). Did you forget to override it?"; return -1; }; virtual void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {}; + /** + * Implementations may set parameters during initialization of the hwframes context + */ + virtual void init_hwframes(AVHWFramesContext *frames) {}; + virtual ~hwdevice_t() = default; }; diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 963d7c35..9e3c95b4 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -94,20 +94,21 @@ public: return 0; } - int set_frame(AVFrame *frame) override { + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { this->hwframe.reset(frame); this->frame = frame; - auto hwframe_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data; + auto hwframe_ctx = (AVHWFramesContext *)hw_frames_ctx->data; if(hwframe_ctx->sw_format != AV_PIX_FMT_NV12) { BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv; return -1; } - if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) { - BOOST_LOG(error) << "Couldn't get hwframe for NVENC"sv; - - return -1; + if(!frame->buf[0]) { + if(av_hwframe_get_buffer(hw_frames_ctx, frame, 0)) { + BOOST_LOG(error) << "Couldn't get hwframe for NVENC"sv; + return -1; + } } auto cuda_ctx = (AVCUDADeviceContext *)hwframe_ctx->device_ctx->hwctx; @@ -180,8 +181,8 @@ public: return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get()); } - int set_frame(AVFrame *frame) { - if(cuda_t::set_frame(frame)) { + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) { + if(cuda_t::set_frame(frame, hw_frames_ctx)) { return -1; } diff --git a/src/platform/linux/vaapi.cpp b/src/platform/linux/vaapi.cpp index 9dcb2dfe..07f0d323 100644 --- a/src/platform/linux/vaapi.cpp +++ b/src/platform/linux/vaapi.cpp @@ -313,14 +313,15 @@ public: return 0; } - int set_frame(AVFrame *frame) override { + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { this->hwframe.reset(frame); this->frame = frame; - if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) { - BOOST_LOG(error) << "Couldn't get hwframe for VAAPI"sv; - - return -1; + if(!frame->buf[0]) { + if(av_hwframe_get_buffer(hw_frames_ctx, frame, 0)) { + BOOST_LOG(error) << "Couldn't get hwframe for VAAPI"sv; + return -1; + } } va::DRMPRIMESurfaceDescriptor prime; diff --git a/src/platform/macos/nv12_zero_device.cpp b/src/platform/macos/nv12_zero_device.cpp index 1af0e058..71e58307 100644 --- a/src/platform/macos/nv12_zero_device.cpp +++ b/src/platform/macos/nv12_zero_device.cpp @@ -53,7 +53,7 @@ int nv12_zero_device::convert(platf::img_t &img) { return result > 0 ? 0 : -1; } -int nv12_zero_device::set_frame(AVFrame *frame) { +int nv12_zero_device::set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) { this->frame = frame; av_frame.reset(frame); diff --git a/src/platform/macos/nv12_zero_device.h b/src/platform/macos/nv12_zero_device.h index 3b74ebcc..1863fb0f 100644 --- a/src/platform/macos/nv12_zero_device.h +++ b/src/platform/macos/nv12_zero_device.h @@ -20,7 +20,7 @@ public: int init(void *display, resolution_fn_t resolution_fn, pixel_format_fn_t pixel_format_fn); int convert(img_t &img); - int set_frame(AVFrame *frame); + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx); void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range); }; diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 4c0e8fa4..72ad6b7b 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -392,17 +392,63 @@ public: this->color_matrix = std::move(color_matrix); } - int set_frame(AVFrame *frame) { + void init_hwframes(AVHWFramesContext *frames) override { + // We may be called with a QSV or D3D11VA context + if(frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { + auto d3d11_frames = (AVD3D11VAFramesContext *)frames->hwctx; + + // The encoder requires textures with D3D11_BIND_RENDER_TARGET set + d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET; + d3d11_frames->MiscFlags = 0; + } + + // We require a single texture + frames->initial_pool_size = 1; + } + + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { this->hwframe.reset(frame); this->frame = frame; + // Populate this frame with a hardware buffer if one isn't there already + if(!frame->buf[0]) { + auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0); + if(err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + return -1; + } + } + + // If this is a frame from a derived context, we'll need to map it to D3D11 + ID3D11Texture2D *frame_texture; + if(frame->format != AV_PIX_FMT_D3D11) { + frame_t d3d11_frame { av_frame_alloc() }; + + d3d11_frame->format = AV_PIX_FMT_D3D11; + + auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE); + if(err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + return -1; + } + + // Get the texture from the mapped frame + frame_texture = (ID3D11Texture2D *)d3d11_frame->data[0]; + } + else { + // Otherwise, we can just use the texture inside the original frame + frame_texture = (ID3D11Texture2D *)frame->data[0]; + } + auto out_width = frame->width; auto out_height = frame->height; float in_width = img.display->width; float in_height = img.display->height; - // // Ensure aspect ratio is maintained + // Ensure aspect ratio is maintained auto scalar = std::fminf(out_width / in_width, out_height / in_height); auto out_width_f = in_width * scalar; auto out_height_f = in_height * scalar; @@ -414,21 +460,9 @@ public: outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; - D3D11_TEXTURE2D_DESC t {}; - t.Width = out_width; - t.Height = out_height; - t.MipLevels = 1; - t.ArraySize = 1; - t.SampleDesc.Count = 1; - t.Usage = D3D11_USAGE_DEFAULT; - t.Format = format; - t.BindFlags = D3D11_BIND_RENDER_TARGET; - - auto status = device->CreateTexture2D(&t, nullptr, &img.encoder_texture); - if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create render target texture [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } + // The underlying frame pool owns the texture, so we must reference it for ourselves + frame_texture->AddRef(); + img.encoder_texture.reset(frame_texture); img.width = out_width; img.height = out_height; @@ -449,7 +483,7 @@ public: D3D11_RTV_DIMENSION_TEXTURE2D }; - status = device->CreateRenderTargetView(img.encoder_texture.get(), &nv12_rt_desc, &nv12_Y_rt); + auto status = device->CreateRenderTargetView(img.encoder_texture.get(), &nv12_rt_desc, &nv12_Y_rt); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; @@ -463,23 +497,6 @@ public: return -1; } - // Need to have something refcounted - if(!frame->buf[0]) { - frame->buf[0] = av_buffer_allocz(sizeof(AVD3D11FrameDescriptor)); - } - - auto desc = (AVD3D11FrameDescriptor *)frame->buf[0]->data; - desc->texture = (ID3D11Texture2D *)img.data; - desc->index = 0; - - frame->data[0] = img.data; - frame->data[1] = 0; - - frame->linesize[0] = img.row_pitch; - - frame->height = img.height; - frame->width = img.width; - return 0; } diff --git a/src/video.cpp b/src/video.cpp index 1fd50e7e..cd13b0d2 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -71,7 +71,7 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format); +int hwframe_ctx(ctx_t &ctx, platf::hwdevice_t *hwdevice, buffer_t &hwdevice_ctx, AVPixelFormat format); class swdevice_t : public platf::hwdevice_t { public: @@ -116,17 +116,16 @@ public: return 0; } - int set_frame(AVFrame *frame) { + int set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) { this->frame = frame; // If it's a hwframe, allocate buffers for hardware - if(frame->hw_frames_ctx) { + if(hw_frames_ctx) { hw_frame.reset(frame); - if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) return -1; + if(av_hwframe_get_buffer(hw_frames_ctx, frame, 0)) return -1; } - - if(!frame->hw_frames_ctx) { + else { sw_frame.reset(frame); } @@ -181,9 +180,9 @@ public: return 0; } - int init(int in_width, int in_height, AVFrame *frame, AVPixelFormat format) { + int init(int in_width, int in_height, AVFrame *frame, AVPixelFormat format, bool hardware) { // If the device used is hardware, yet the image resides on main memory - if(frame->hw_frames_ctx) { + if(hardware) { sw_frame.reset(av_frame_alloc()); sw_frame->width = frame->width; @@ -981,7 +980,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & } hwdevice_ctx = std::move(buf_or_error.left()); - if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt)) { + if(hwframe_ctx(ctx, hwdevice.get(), hwdevice_ctx, sw_fmt)) { return std::nullopt; } @@ -1063,17 +1062,12 @@ std::optional make_session(const encoder_t &encoder, const config_t & frame->width = ctx->width; frame->height = ctx->height; - - if(hardware) { - frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); - } - std::shared_ptr device; if(!hwdevice->data) { auto device_tmp = std::make_unique(); - if(device_tmp->init(width, height, frame.get(), sw_fmt)) { + if(device_tmp->init(width, height, frame.get(), sw_fmt, hardware)) { return std::nullopt; } @@ -1083,7 +1077,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & device = std::move(hwdevice); } - if(device->set_frame(frame.release())) { + if(device->set_frame(frame.release(), ctx->hw_frames_ctx)) { return std::nullopt; } @@ -1812,8 +1806,8 @@ int init() { return 0; } -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { - buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get()) }; +int hwframe_ctx(ctx_t &ctx, platf::hwdevice_t *hwdevice, buffer_t &hwdevice_ctx, AVPixelFormat format) { + buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice_ctx.get()) }; auto frame_ctx = (AVHWFramesContext *)frame_ref->data; frame_ctx->format = ctx->pix_fmt; @@ -1822,6 +1816,9 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { frame_ctx->width = ctx->width; frame_ctx->initial_pool_size = 0; + // Allow the hwdevice to modify hwframe context parameters + hwdevice->init_hwframes(frame_ctx); + if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { return err; }