diff --git a/sunshine/platform/common.h b/sunshine/platform/common.h index 60d791b0..d8a8171e 100644 --- a/sunshine/platform/common.h +++ b/sunshine/platform/common.h @@ -67,12 +67,10 @@ public: struct hwdevice_ctx_t { void *hwdevice {}; - // Could be nullptr, depends on the encoder - std::shared_ptr lock; - virtual const platf::img_t*const convert(platf::img_t &img) { return nullptr; } + virtual void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {}; virtual ~hwdevice_ctx_t() = default; }; @@ -86,18 +84,10 @@ enum class capture_e : int { class display_t { public: - virtual capture_e snapshot(img_t *img, bool cursor) = 0; + virtual capture_e snapshot(img_t *img, std::chrono::milliseconds timeout, bool cursor) = 0; virtual std::shared_ptr alloc_img() = 0; - virtual int dummy_img(img_t *img, int &dummy_data_p) { - img->row_pitch = 4; - img->height = 1; - img->width = 1; - img->pixel_pitch = 4; - img->data = (std::uint8_t*)&dummy_data_p; - - return 0; - } + virtual int dummy_img(img_t *img) = 0; virtual std::shared_ptr make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) { return std::make_shared(); @@ -137,6 +127,8 @@ int alloc_gamepad(input_t &input, int nr); void free_gamepad(input_t &input, int nr); [[nodiscard]] std::unique_ptr init(); + +int thread_priority(); } #endif //SUNSHINE_COMMON_H diff --git a/sunshine/platform/windows.cpp b/sunshine/platform/windows.cpp index 98436ec7..3a113df8 100755 --- a/sunshine/platform/windows.cpp +++ b/sunshine/platform/windows.cpp @@ -331,6 +331,10 @@ void gamepad(input_t &input, int nr, const gamepad_state_t &gamepad_state) { } } +int thread_priority() { + return SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST) ? 0 : 1; +} + void freeInput(void *p) { auto vigem = (vigem_t*)p; diff --git a/sunshine/platform/windows_dxgi.cpp b/sunshine/platform/windows_dxgi.cpp index d461fa16..9e95942a 100644 --- a/sunshine/platform/windows_dxgi.cpp +++ b/sunshine/platform/windows_dxgi.cpp @@ -8,6 +8,7 @@ extern "C" { #include #include +#include #include #include @@ -26,17 +27,17 @@ void Release(T *dxgi) { dxgi->Release(); } -using factory1_t = util::safe_ptr>; -using dxgi_t = util::safe_ptr>; -using dxgi1_t = util::safe_ptr>; -using device_t = util::safe_ptr>; -using device_ctx_t = util::safe_ptr>; -using adapter_t = util::safe_ptr>; -using output_t = util::safe_ptr>; -using output1_t = util::safe_ptr>; -using dup_t = util::safe_ptr>; -using texture2d_t = util::safe_ptr>; -using resource_t = util::safe_ptr>; +using factory1_t = util::safe_ptr>; +using dxgi_t = util::safe_ptr>; +using dxgi1_t = util::safe_ptr>; +using device_t = util::safe_ptr>; +using device_ctx_t = util::safe_ptr>; +using adapter_t = util::safe_ptr>; +using output_t = util::safe_ptr>; +using output1_t = util::safe_ptr>; +using dup_t = util::safe_ptr>; +using texture2d_t = util::safe_ptr>; +using resource_t = util::safe_ptr>; namespace video { using device_t = util::safe_ptr>; @@ -54,13 +55,13 @@ public: dup_t dup; bool has_frame {}; - capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, resource_t::pointer *res_p) { + capture_e next_frame(DXGI_OUTDUPL_FRAME_INFO &frame_info, std::chrono::milliseconds timeout, resource_t::pointer *res_p) { auto capture_status = release_frame(); if(capture_status != capture_e::ok) { return capture_status; } - auto status = dup->AcquireNextFrame(1000, &frame_info, res_p); + auto status = dup->AcquireNextFrame(timeout.count(), &frame_info, res_p); switch(status) { case S_OK: @@ -300,7 +301,8 @@ public: video::processor_in_t::pointer processor_in_p; auto status = device->CreateVideoProcessorInputView(img.texture.get(), processor_e.get(), &input_desc, &processor_in_p); if(FAILED(status)) { - BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Failed to create VideoProcessorInputView [0x"sv + << util::hex(status).to_string_view() << ']'; return nullptr; } it = texture_to_processor_in.emplace(img.texture.get(), processor_in_p).first; @@ -308,7 +310,7 @@ public: auto &processor_in = it->second; D3D11_VIDEO_PROCESSOR_STREAM stream { TRUE, 0, 0, 0, 0, nullptr, processor_in.get(), nullptr }; - std::lock_guard lg { *lock }; + auto status = ctx->VideoProcessorBlt(processor.get(), processor_out.get(), 0, 1, &stream); if(FAILED(status)) { BOOST_LOG(error) << "Failed size and color conversion [0x"sv << util::hex(status).to_string_view() << ']'; @@ -318,11 +320,13 @@ public: return &this->img; } - int init(std::shared_ptr &lock, std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) { - HRESULT status; + void set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + colorspace |= (color_range >> 4); + ctx->VideoProcessorSetOutputColorSpace(processor.get(), (D3D11_VIDEO_PROCESSOR_COLOR_SPACE*)&colorspace); + } - this->lock = lock; - std::lock_guard lg { *lock }; + int init(std::shared_ptr display, device_t::pointer device_p, device_ctx_t::pointer device_ctx_p, int in_width, int in_height, int out_width, int out_height) { + HRESULT status; video::device_t::pointer vdevice_p; status = device_p->QueryInterface(IID_ID3D11VideoDevice, (void**)&vdevice_p); @@ -344,7 +348,7 @@ public: D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE, { 1, 1 }, (UINT)in_width, (UINT)in_height, { 1, 1 }, (UINT)out_width, (UINT)out_height, - D3D11_VIDEO_USAGE_PLAYBACK_NORMAL + D3D11_VIDEO_USAGE_OPTIMAL_QUALITY }; video::processor_enum_t::pointer vp_e_p; @@ -632,7 +636,7 @@ public: class display_cpu_t : public display_base_t { public: - capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override { + capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override { auto img = (img_t*)img_base; HRESULT status; @@ -640,7 +644,7 @@ public: DXGI_OUTDUPL_FRAME_INFO frame_info; resource_t::pointer res_p {}; - auto capture_status = dup.next_frame(frame_info, &res_p); + auto capture_status = dup.next_frame(frame_info, timeout, &res_p); resource_t res{res_p}; if (capture_status != capture_e::ok) { @@ -736,10 +740,14 @@ public: return img; } - int dummy_img(platf::img_t *img, int &) override { - auto dummy_data_p = new int[1]; + int dummy_img(platf::img_t *img) override { + img->data = new std::uint8_t[4]; + img->row_pitch = 4; + img->pixel_pitch = 4; + img->width = 1; + img->height = 1; - return platf::display_t::dummy_img(img, *dummy_data_p); + return 0; } int init() { @@ -784,23 +792,22 @@ public: class display_gpu_t : public display_base_t, public std::enable_shared_from_this { public: - capture_e snapshot(::platf::img_t *img_base, bool cursor_visible) override { + capture_e snapshot(::platf::img_t *img_base, std::chrono::milliseconds timeout, bool cursor_visible) override { auto img = (img_d3d_t*)img_base; HRESULT status; DXGI_OUTDUPL_FRAME_INFO frame_info; - std::lock_guard lg { *lock }; resource_t::pointer res_p {}; - auto capture_status = dup.next_frame(frame_info, &res_p); + auto capture_status = dup.next_frame(frame_info, timeout, &res_p); resource_t res{res_p}; if (capture_status != capture_e::ok) { return capture_status; } - const bool update_flag = frame_info.LastPresentTime.QuadPart != 0; + const bool update_flag = frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0; if(!update_flag) { return capture_e::timeout; } @@ -814,7 +821,6 @@ public: } texture2d_t src { src_p }; - device_ctx->CopyResource(img->texture.get(), src.get()); return capture_e::ok; @@ -850,7 +856,7 @@ public: return img; } - int dummy_img(platf::img_t *img_base, int &dummy_data_p) override { + int dummy_img(platf::img_t *img_base) override { auto img = (img_d3d_t*)img_base; img->row_pitch = width * 4; @@ -887,17 +893,10 @@ public: return 0; } - int init() { - lock = std::make_shared(); - std::lock_guard lg { *lock }; - return display_base_t::init(); - } - std::shared_ptr make_hwdevice_ctx(int width, int height, pix_fmt_e pix_fmt) override { auto hwdevice = std::make_shared(); auto ret = hwdevice->init( - lock, shared_from_this(), device.get(), device_ctx.get(), @@ -910,8 +909,6 @@ public: return hwdevice; } - - std::shared_ptr lock; }; const char *format_str[] = { diff --git a/sunshine/round_robin.h b/sunshine/round_robin.h index b3ae4475..47e125db 100755 --- a/sunshine/round_robin.h +++ b/sunshine/round_robin.h @@ -118,6 +118,14 @@ public: } } + void dec() { + if(_pos == _begin) { + _pos = _end; + } + + --_pos; + } + bool eq(const round_robin_t &other) const { return *_pos == *other._pos; } diff --git a/sunshine/stream.cpp b/sunshine/stream.cpp index c692d7c0..17013b11 100644 --- a/sunshine/stream.cpp +++ b/sunshine/stream.cpp @@ -604,21 +604,21 @@ void videoBroadcastThread(safe::signal_t *shutdown_event, udp::socket &sock, vid payload = {(char *) payload_new.data(), payload_new.size()}; - // make sure moonlight recognizes the nalu code for IDR frames - if (packet->flags & AV_PKT_FLAG_KEY) { - // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix - std::string_view frame_old = "\000\000\001e"sv; - std::string_view frame_new = "\000\000\000\001e"sv; - if(session->config.monitor.videoFormat != 0) { - frame_old = "\000\000\001("sv; - frame_new = "\000\000\000\001("sv; - } + // // make sure moonlight recognizes the nalu code for IDR frames + // if (packet->flags & AV_PKT_FLAG_KEY) { + // // TODO: Not all encoders encode their IDR frames with the 4 byte NALU prefix + // std::string_view frame_old = "\000\000\001e"sv; + // std::string_view frame_new = "\000\000\000\001e"sv; + // if(session->config.monitor.videoFormat != 0) { + // frame_old = "\000\000\001("sv; + // frame_new = "\000\000\000\001("sv; + // } - assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) == - std::end(payload)); - payload_new = replace(payload, frame_old, frame_new); - payload = {(char *) payload_new.data(), payload_new.size()}; - } + // assert(std::search(std::begin(payload), std::end(payload), std::begin(frame_new), std::end(frame_new)) == + // std::end(payload)); + // payload_new = replace(payload, frame_old, frame_new); + // payload = {(char *) payload_new.data(), payload_new.size()}; + // } // insert packet headers auto blocksize = session->config.packetsize + MAX_RTP_HEADER_SIZE; diff --git a/sunshine/video.cpp b/sunshine/video.cpp index 1d5ae97d..3530f481 100644 --- a/sunshine/video.cpp +++ b/sunshine/video.cpp @@ -113,7 +113,12 @@ static encoder_t nvenc { { {"forced-idr"s, 1} }, "hevc_nvenc"s }, { - { {"forced-idr"s, 1}, { "preset"s , 9} }, "h264_nvenc"s + { + { "forced-idr"s, 1}, + { "profile"s, "high"s }, + { "preset"s , "llhp" }, + { "rc"s, "cbr_ld_hq"s }, + }, "h264_nvenc"s }, false, @@ -209,9 +214,8 @@ void captureThread( display_wp = disp; std::vector> imgs(12); - auto round_robin = util::make_round_robin>(std::begin(imgs) +1, std::end(imgs)); + auto round_robin = util::make_round_robin>(std::begin(imgs), std::end(imgs)); - int dummy_data = 0; for(auto &img : imgs) { img = disp->alloc_img(); if(!img) { @@ -219,9 +223,11 @@ void captureThread( return; } } - auto &dummy_img = imgs.front(); - if(disp->dummy_img(dummy_img.get(), dummy_data)) { - return; + + if(auto capture_ctx = capture_ctx_queue->pop()) { + capture_ctxs.emplace_back(std::move(*capture_ctx)); + + delay = capture_ctxs.back().delay; } auto next_frame = std::chrono::steady_clock::now(); @@ -229,22 +235,15 @@ void captureThread( while(capture_ctx_queue->peek()) { capture_ctxs.emplace_back(std::move(*capture_ctx_queue->pop())); - // Temporary image to ensure something is send to Moonlight even if no frame has been captured yet. - capture_ctxs.back().images->raise(dummy_img); - delay = std::min(delay, capture_ctxs.back().delay); } auto now = std::chrono::steady_clock::now(); - if(next_frame > now) { - std::this_thread::sleep_until(next_frame); - } - next_frame += delay; auto &img = *round_robin++; while(img.use_count() > 1) {} - auto status = disp->snapshot(img.get(), display_cursor); + auto status = disp->snapshot(img.get(), 1000ms, display_cursor); switch (status) { case platf::capture_e::reinit: { reinit_event.raise(true); @@ -276,16 +275,14 @@ void captureThread( return; } } - if(disp->dummy_img(dummy_img.get(), dummy_data)) { - return; - } reinit_event.reset(); continue; } case platf::capture_e::error: - return; + return; case platf::capture_e::timeout: + std::this_thread::sleep_for(1ms); continue; case platf::capture_e::ok: break; @@ -310,9 +307,36 @@ void captureThread( capture_ctx->images->raise(img); ++capture_ctx; }) + + if(next_frame > now) { + std::this_thread::sleep_until(next_frame); + } + next_frame += delay; } } +int start_capture(capture_thread_ctx_t &capture_thread_ctx) { + capture_thread_ctx.encoder_p = &encoders.front(); + capture_thread_ctx.reinit_event.reset(); + + capture_thread_ctx.capture_ctx_queue = std::make_shared>(); + + capture_thread_ctx.capture_thread = std::thread { + captureThread, + capture_thread_ctx.capture_ctx_queue, + std::ref(capture_thread_ctx.display_wp), + std::ref(capture_thread_ctx.reinit_event), + std::ref(*capture_thread_ctx.encoder_p) + }; + + return 0; +} +void end_capture(capture_thread_ctx_t &capture_thread_ctx) { + capture_thread_ctx.capture_ctx_queue->stop(); + + capture_thread_ctx.capture_thread.join(); +} + util::Either hwdevice_ctx(AVHWDeviceType type, void *hwdevice_ctx) { buffer_t ctx; @@ -385,28 +409,6 @@ int encode(int64_t frame_nr, ctx_t &ctx, frame_t &frame, packet_queue_t &packets return 0; } -int start_capture(capture_thread_ctx_t &capture_thread_ctx) { - capture_thread_ctx.encoder_p = &encoders.front(); - capture_thread_ctx.reinit_event.reset(); - - capture_thread_ctx.capture_ctx_queue = std::make_shared>(); - - capture_thread_ctx.capture_thread = std::thread { - captureThread, - capture_thread_ctx.capture_ctx_queue, - std::ref(capture_thread_ctx.display_wp), - std::ref(capture_thread_ctx.reinit_event), - std::ref(*capture_thread_ctx.encoder_p) - }; - - return 0; -} -void end_capture(capture_thread_ctx_t &capture_thread_ctx) { - capture_thread_ctx.capture_ctx_queue->stop(); - - capture_thread_ctx.capture_thread.join(); -} - std::optional make_session(const encoder_t &encoder, const config_t &config, platf::hwdevice_ctx_t *device_ctx) { bool hardware = encoder.dev_type != AV_HWDEVICE_TYPE_NONE; @@ -505,6 +507,8 @@ std::optional make_session(const encoder_t &encoder, const config_t if(hwframe_ctx(ctx, hwdevice, sw_fmt)) { return std::nullopt; } + + ctx->slices = config.slicesPerFrame; } else /* software */ { ctx->pix_fmt = sw_fmt; @@ -530,7 +534,7 @@ std::optional make_session(const encoder_t &encoder, const config_t if(config.bitrate > 500) { auto bitrate = config.bitrate * 1000; ctx->rc_max_rate = bitrate; - ctx->rc_buffer_size = bitrate / 100; + ctx->rc_buffer_size = bitrate / config.framerate; ctx->bit_rate = bitrate; ctx->rc_min_rate = bitrate; } @@ -582,6 +586,8 @@ void encode_run( return; } + hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range); + auto delay = std::chrono::floor(1s) / config.framerate; auto img_width = 0; @@ -654,17 +660,8 @@ void encode_run( break; } } - - int err; - if(hwdevice_ctx && hwdevice_ctx->lock) { - std::lock_guard lg { *hwdevice_ctx->lock }; - err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data); - } - else { - err = encode(frame_nr++, session->ctx, session->frame, packets, channel_data); - } - if(err) { + if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) { BOOST_LOG(fatal) << "Could not encode video packet"sv; log_flush(); std::abort(); @@ -681,6 +678,110 @@ void capture( config_t config, void *channel_data) { + auto lg = util::fail_guard([&]() { + shutdown_event->raise(true); + }); + + const auto &encoder = encoders.front(); + auto disp = platf::display(encoder.dev_type); + if(!disp) { + return; + } + + auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; + auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt); + if(!hwdevice_ctx) { + return; + } + + auto session = make_session(encoder, config, hwdevice_ctx.get()); + if(!session) { + return; + } + hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range); + + auto img = disp->alloc_img(); + if(disp->dummy_img(img.get())) { + return; + } + + const platf::img_t* img_p = hwdevice_ctx->convert(*img); + if(!img_p) { + return; + } + + sws_t sws; + encoder.img_to_frame(sws, *img_p, session->frame); + + std::vector> imgs(12); + for(auto &img : imgs) { + img = disp->alloc_img(); + } + + auto round_robin = util::make_round_robin>(std::begin(imgs), std::end(imgs)); + + int frame_nr = 1; + int key_frame_nr = 1; + + auto max_delay = 1000ms / config.framerate; + + std::shared_ptr img_tmp; + auto next_frame = std::chrono::steady_clock::now(); + while(!shutdown_event->peek()) { + if(idr_events->peek()) { + session->frame->pict_type = AV_PICTURE_TYPE_I; + + auto event = idr_events->pop(); + TUPLE_2D_REF(_, end, *event); + + frame_nr = end; + key_frame_nr = end + config.framerate; + } + else if(frame_nr == key_frame_nr) { + session->frame->pict_type = AV_PICTURE_TYPE_I; + } + + auto delay = std::max(0ms, std::chrono::duration_cast(next_frame - std::chrono::steady_clock::now())); + + auto status = disp->snapshot(round_robin->get(), delay, display_cursor); + switch(status) { + case platf::capture_e::reinit: + return; + case platf::capture_e::error: + return; + case platf::capture_e::timeout: + next_frame += max_delay; + if(!img_tmp && frame_nr > (key_frame_nr + config.framerate)) { + continue; + } + + break; + case platf::capture_e::ok: + img_tmp = *round_robin++; + break; + } + + if(img_tmp) { + img_p = hwdevice_ctx->convert(*img_tmp); + img_tmp.reset(); + } + + if(encode(frame_nr++, session->ctx, session->frame, packets, channel_data)) { + BOOST_LOG(fatal) << "Could not encode video packet"sv; + log_flush(); + std::abort(); + } + + session->frame->pict_type = AV_PICTURE_TYPE_NONE; + } +} +void capture_async( + safe::signal_t *shutdown_event, + packet_queue_t packets, + idr_event_t idr_events, + config_t config, + void *channel_data) { + auto images = std::make_shared(); auto lg = util::fail_guard([&]() { images->stop(); @@ -723,6 +824,12 @@ void capture( return; } + auto dummy_img = display->alloc_img(); + if(display->dummy_img(dummy_img.get())) { + return; + } + images->raise(std::move(dummy_img)); + encode_run(frame_nr, key_frame_nr, shutdown_event, packets, idr_events, images, config, hwdevice_ctx.get(), ref->reinit_event, *ref->encoder_p, channel_data); } } @@ -733,7 +840,6 @@ bool validate_config(std::shared_ptr &disp, const encoder_t &e return false; } - auto pix_fmt = config.dynamicRange == 0 ? platf::pix_fmt_e::yuv420p : platf::pix_fmt_e::yuv420p10; auto hwdevice_ctx = disp->make_hwdevice_ctx(config.width, config.height, pix_fmt); if(!hwdevice_ctx) { @@ -744,10 +850,10 @@ bool validate_config(std::shared_ptr &disp, const encoder_t &e if(!session) { return false; } + hwdevice_ctx->set_colorspace(session->sws_color_format, session->ctx->color_range); - int dummy_data; auto img = disp->alloc_img(); - if(disp->dummy_img(img.get(), dummy_data)) { + if(disp->dummy_img(img.get())) { return false; } @@ -900,10 +1006,8 @@ void nv_d3d_img_to_frame(sws_t &sws, const platf::img_t &img, frame_t &frame) { } void nvenc_lock(void *lock_p) { - ((std::recursive_mutex*)lock_p)->lock(); } void nvenc_unlock(void *lock_p) { - ((std::recursive_mutex*)lock_p)->unlock(); } util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwdevice_ctx) { @@ -913,10 +1017,6 @@ util::Either nv_d3d_make_hwdevice_ctx(platf::hwdevice_ctx_t *hwde std::fill_n((std::uint8_t*)ctx, sizeof(AVD3D11VADeviceContext), 0); std::swap(ctx->device, *(ID3D11Device**)&hwdevice_ctx->hwdevice); - ctx->lock_ctx = hwdevice_ctx->lock.get(); - ctx->lock = nvenc_lock; - ctx->unlock = nvenc_unlock; - auto err = av_hwdevice_ctx_init(ctx_buf.get()); if(err) { char err_str[AV_ERROR_MAX_STRING_SIZE] {0};