[gnome-remote-desktop] rdp: Avoid local copy if possible
- From: Jonas Ådahl <jadahl src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-remote-desktop] rdp: Avoid local copy if possible
- Date: Thu, 3 Mar 2022 14:23:09 +0000 (UTC)
commit f843bf2e73754419fc49f03b83ae13c25f76b1d0
Author: Pascal Nowack <Pascal Nowack gmx de>
Date: Thu Jan 13 18:41:30 2022 +0100
rdp: Avoid local copy if possible
If the GPU supports NVENC, the client did not disable AVC, and an NVENC
session was successfully opened, there is no need to have the frame
data available to the host.
To accomplish this, add a new boolean value to RDP surfaces, which
indicates, whether such local copy is necessary or not.
By default, the local copy is necessary.
As soon as an NVENC session is created, this boolean value is set to
TRUE.
When the RDP surface is resized or destroyed, this value is reset to
FALSE again.
Since all framebuffers are tracked, this won't cause any problems, when
resizing the associated stream.
This boolean value causes, that dma-bufs are no longer downloaded, when
it is set to TRUE.
The content of the dma-buf texture is just retrieved in a pixel buffer
object, which will be then mapped.
This will drastically reduce the frame latency due to the missing
download operation.
Also, when dma-bufs are not supported, the frame latency can be reduced
too, when memfds are used (which is usually the case, when dma-bufs are
not supported).
In this case, the mapped data does not need to be duplicated any more,
since the source stride is in almost all cases the same value as the
destination stride.
The EGL thread will then upload the mapped source content, instead of
uploading a copy of the source content.
The latency reduction depends on the framebuffer size and on the
system.
For FullHD frames, this is usually about 4ms.
src/grd-rdp-graphics-pipeline.c | 4 +++
src/grd-rdp-pipewire-stream.c | 78 ++++++++++++++++++++++++++++++-----------
src/grd-rdp-surface.c | 2 ++
src/grd-rdp-surface.h | 1 +
4 files changed, 64 insertions(+), 21 deletions(-)
---
diff --git a/src/grd-rdp-graphics-pipeline.c b/src/grd-rdp-graphics-pipeline.c
index 270a99b0..ce4b75da 100644
--- a/src/grd-rdp-graphics-pipeline.c
+++ b/src/grd-rdp-graphics-pipeline.c
@@ -157,6 +157,8 @@ grd_rdp_graphics_pipeline_create_surface (GrdRdpGraphicsPipeline *graphics_pipel
g_hash_table_insert (graphics_pipeline->surface_hwaccel_table,
GUINT_TO_POINTER (surface_id), hwaccel_context);
+
+ rdp_surface->needs_no_local_data = TRUE;
}
g_mutex_unlock (&graphics_pipeline->gfx_mutex);
@@ -175,6 +177,7 @@ grd_rdp_graphics_pipeline_delete_surface (GrdRdpGraphicsPipeline *graphics_pipel
RdpgfxServerContext *rdpgfx_context = graphics_pipeline->rdpgfx_context;
RDPGFX_DELETE_ENCODING_CONTEXT_PDU delete_encoding_context = {0};
RDPGFX_DELETE_SURFACE_PDU delete_surface = {0};
+ GrdRdpSurface *rdp_surface = grd_rdp_gfx_surface_get_rdp_surface (gfx_surface);
gboolean needs_encoding_context_deletion = FALSE;
GfxSurfaceContext *surface_context;
HWAccelContext *hwaccel_context;
@@ -207,6 +210,7 @@ grd_rdp_graphics_pipeline_delete_surface (GrdRdpGraphicsPipeline *graphics_pipel
NULL, (gpointer *) &hwaccel_context))
{
g_debug ("[RDP.RDPGFX] Destroying NVENC session for surface %u", surface_id);
+ rdp_surface->needs_no_local_data = FALSE;
g_assert (hwaccel_context->api == HW_ACCEL_API_NVENC);
grd_hwaccel_nvidia_free_nvenc_session (graphics_pipeline->hwaccel_nvidia,
diff --git a/src/grd-rdp-pipewire-stream.c b/src/grd-rdp-pipewire-stream.c
index 0c686f59..aed5f5fa 100644
--- a/src/grd-rdp-pipewire-stream.c
+++ b/src/grd-rdp-pipewire-stream.c
@@ -64,6 +64,10 @@ struct _GrdRdpFrame
GrdRdpBuffer *buffer;
+ gboolean has_map;
+ size_t map_size;
+ uint8_t *map;
+
gboolean has_pointer_data;
uint8_t *pointer_bitmap;
uint16_t pointer_hotspot_x;
@@ -168,6 +172,8 @@ grd_rdp_frame_unref (GrdRdpFrame *frame)
{
if (g_atomic_ref_count_dec (&frame->refcount))
{
+ g_assert (!frame->has_map);
+
g_clear_pointer (&frame->buffer, grd_rdp_buffer_release);
g_free (frame->pointer_bitmap);
g_free (frame);
@@ -634,7 +640,7 @@ process_buffer (GrdRdpPipeWireStream *stream,
uint8_t *map;
void *src_data;
uint32_t pbo;
- uint8_t *local_data;
+ uint8_t *data_to_upload;
size = buffer->datas[0].maxsize + buffer->datas[0].mapoffset;
map = mmap (NULL, size, PROT_READ, MAP_PRIVATE, buffer->datas[0].fd, 0);
@@ -656,16 +662,29 @@ process_buffer (GrdRdpPipeWireStream *stream,
}
rdp_buffer = frame->buffer;
pbo = rdp_buffer->pbo;
- local_data = rdp_buffer->local_data;
- copy_frame_data (frame,
- src_data,
- width, height,
- dst_stride,
- src_stride,
- bpp);
+ if (stream->rdp_surface->needs_no_local_data &&
+ src_stride == dst_stride)
+ {
+ frame->map_size = size;
+ frame->map = map;
+ frame->has_map = TRUE;
+
+ data_to_upload = src_data;
+ }
+ else
+ {
+ copy_frame_data (frame,
+ src_data,
+ width, height,
+ dst_stride,
+ src_stride,
+ bpp);
- munmap (map, size);
+ munmap (map, size);
+
+ data_to_upload = rdp_buffer->local_data;
+ }
if (!hwaccel_nvidia)
{
@@ -687,7 +706,7 @@ process_buffer (GrdRdpPipeWireStream *stream,
pbo,
height,
dst_stride,
- local_data,
+ data_to_upload,
cuda_allocate_buffer,
allocate_buffer_data,
g_free,
@@ -714,7 +733,7 @@ process_buffer (GrdRdpPipeWireStream *stream,
uint64_t *modifiers;
uint32_t n_planes;
unsigned int i;
- uint8_t *dst_data;
+ uint8_t *dst_data = NULL;
frame->buffer = grd_rdp_buffer_pool_acquire (stream->buffer_pool);
if (!frame->buffer)
@@ -741,7 +760,9 @@ process_buffer (GrdRdpPipeWireStream *stream,
strides[i] = buffer->datas[i].chunk->stride;
modifiers[i] = stream->spa_format.modifier;
}
- dst_data = frame->buffer->local_data;
+
+ if (!stream->rdp_surface->needs_no_local_data)
+ dst_data = frame->buffer->local_data;
if (hwaccel_nvidia)
{
@@ -791,7 +812,7 @@ process_buffer (GrdRdpPipeWireStream *stream,
GrdRdpBuffer *rdp_buffer;
void *src_data;
uint32_t pbo;
- uint8_t *local_data;
+ uint8_t *data_to_upload;
src_data = buffer->datas[0].data;
@@ -805,14 +826,23 @@ process_buffer (GrdRdpPipeWireStream *stream,
}
rdp_buffer = frame->buffer;
pbo = rdp_buffer->pbo;
- local_data = rdp_buffer->local_data;
- copy_frame_data (frame,
- src_data,
- width, height,
- dst_stride,
- src_stride,
- bpp);
+ if (stream->rdp_surface->needs_no_local_data &&
+ src_stride == dst_stride)
+ {
+ data_to_upload = src_data;
+ }
+ else
+ {
+ copy_frame_data (frame,
+ src_data,
+ width, height,
+ dst_stride,
+ src_stride,
+ bpp);
+
+ data_to_upload = rdp_buffer->local_data;
+ }
if (!hwaccel_nvidia)
{
@@ -834,7 +864,7 @@ process_buffer (GrdRdpPipeWireStream *stream,
pbo,
height,
dst_stride,
- local_data,
+ data_to_upload,
cuda_allocate_buffer,
allocate_buffer_data,
g_free,
@@ -877,6 +907,12 @@ on_frame_ready (GrdRdpPipeWireStream *stream,
g_assert (frame);
+ if (frame->has_map)
+ {
+ munmap (frame->map, frame->map_size);
+ frame->has_map = FALSE;
+ }
+
if (!success)
goto out;
diff --git a/src/grd-rdp-surface.c b/src/grd-rdp-surface.c
index d2481fff..f3e5699c 100644
--- a/src/grd-rdp-surface.c
+++ b/src/grd-rdp-surface.c
@@ -100,6 +100,8 @@ grd_rdp_surface_free (GrdRdpSurface *rdp_surface)
void
grd_rdp_surface_reset (GrdRdpSurface *rdp_surface)
{
+ rdp_surface->needs_no_local_data = FALSE;
+
if (rdp_surface->avc.main_view)
{
grd_hwaccel_nvidia_clear_mem_ptr (rdp_surface->hwaccel_nvidia,
diff --git a/src/grd-rdp-surface.h b/src/grd-rdp-surface.h
index 49dd84b5..fff10cf1 100644
--- a/src/grd-rdp-surface.h
+++ b/src/grd-rdp-surface.h
@@ -46,6 +46,7 @@ struct _GrdRdpSurface
CUdeviceptr main_view;
} avc;
+ gboolean needs_no_local_data;
gboolean valid;
GrdRdpGfxSurface *gfx_surface;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]