[gnome-remote-desktop] rdp: Avoid local copy if possible

From: Jonas Ådahl <jadahl src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-remote-desktop] rdp: Avoid local copy if possible
Date: Thu, 3 Mar 2022 14:23:09 +0000 (UTC)
commit f843bf2e73754419fc49f03b83ae13c25f76b1d0
Author: Pascal Nowack <Pascal Nowack gmx de>
Date:   Thu Jan 13 18:41:30 2022 +0100

    rdp: Avoid local copy if possible
    
    If the GPU supports NVENC, the client did not disable AVC, and an NVENC
    session was successfully opened, there is no need to have the frame
    data available to the host.
    
    To accomplish this, add a new boolean value to RDP surfaces, which
    indicates, whether such local copy is necessary or not.
    By default, the local copy is necessary.
    As soon as an NVENC session is created, this boolean value is set to
    TRUE.
    When the RDP surface is resized or destroyed, this value is reset to
    FALSE again.
    Since all framebuffers are tracked, this won't cause any problems, when
    resizing the associated stream.
    
    This boolean value causes, that dma-bufs are no longer downloaded, when
    it is set to TRUE.
    The content of the dma-buf texture is just retrieved in a pixel buffer
    object, which will be then mapped.
    This will drastically reduce the frame latency due to the missing
    download operation.
    Also, when dma-bufs are not supported, the frame latency can be reduced
    too, when memfds are used (which is usually the case, when dma-bufs are
    not supported).
    In this case, the mapped data does not need to be duplicated any more,
    since the source stride is in almost all cases the same value as the
    destination stride.
    The EGL thread will then upload the mapped source content, instead of
    uploading a copy of the source content.
    
    The latency reduction depends on the framebuffer size and on the
    system.
    For FullHD frames, this is usually about 4ms.

 src/grd-rdp-graphics-pipeline.c |  4 +++
 src/grd-rdp-pipewire-stream.c   | 78 ++++++++++++++++++++++++++++++-----------
 src/grd-rdp-surface.c           |  2 ++
 src/grd-rdp-surface.h           |  1 +
 4 files changed, 64 insertions(+), 21 deletions(-)
---
diff --git a/src/grd-rdp-graphics-pipeline.c b/src/grd-rdp-graphics-pipeline.c
index 270a99b0..ce4b75da 100644
--- a/src/grd-rdp-graphics-pipeline.c
+++ b/src/grd-rdp-graphics-pipeline.c
@@ -157,6 +157,8 @@ grd_rdp_graphics_pipeline_create_surface (GrdRdpGraphicsPipeline *graphics_pipel
 
       g_hash_table_insert (graphics_pipeline->surface_hwaccel_table,
                            GUINT_TO_POINTER (surface_id), hwaccel_context);
+
+      rdp_surface->needs_no_local_data = TRUE;
     }
   g_mutex_unlock (&graphics_pipeline->gfx_mutex);
 
@@ -175,6 +177,7 @@ grd_rdp_graphics_pipeline_delete_surface (GrdRdpGraphicsPipeline *graphics_pipel
   RdpgfxServerContext *rdpgfx_context = graphics_pipeline->rdpgfx_context;
   RDPGFX_DELETE_ENCODING_CONTEXT_PDU delete_encoding_context = {0};
   RDPGFX_DELETE_SURFACE_PDU delete_surface = {0};
+  GrdRdpSurface *rdp_surface = grd_rdp_gfx_surface_get_rdp_surface (gfx_surface);
   gboolean needs_encoding_context_deletion = FALSE;
   GfxSurfaceContext *surface_context;
   HWAccelContext *hwaccel_context;
@@ -207,6 +210,7 @@ grd_rdp_graphics_pipeline_delete_surface (GrdRdpGraphicsPipeline *graphics_pipel
                                    NULL, (gpointer *) &hwaccel_context))
     {
       g_debug ("[RDP.RDPGFX] Destroying NVENC session for surface %u", surface_id);
+      rdp_surface->needs_no_local_data = FALSE;
 
       g_assert (hwaccel_context->api == HW_ACCEL_API_NVENC);
       grd_hwaccel_nvidia_free_nvenc_session (graphics_pipeline->hwaccel_nvidia,
diff --git a/src/grd-rdp-pipewire-stream.c b/src/grd-rdp-pipewire-stream.c
index 0c686f59..aed5f5fa 100644
--- a/src/grd-rdp-pipewire-stream.c
+++ b/src/grd-rdp-pipewire-stream.c
@@ -64,6 +64,10 @@ struct _GrdRdpFrame
 
   GrdRdpBuffer *buffer;
 
+  gboolean has_map;
+  size_t map_size;
+  uint8_t *map;
+
   gboolean has_pointer_data;
   uint8_t *pointer_bitmap;
   uint16_t pointer_hotspot_x;
@@ -168,6 +172,8 @@ grd_rdp_frame_unref (GrdRdpFrame *frame)
 {
   if (g_atomic_ref_count_dec (&frame->refcount))
     {
+      g_assert (!frame->has_map);
+
       g_clear_pointer (&frame->buffer, grd_rdp_buffer_release);
       g_free (frame->pointer_bitmap);
       g_free (frame);
@@ -634,7 +640,7 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       uint8_t *map;
       void *src_data;
       uint32_t pbo;
-      uint8_t *local_data;
+      uint8_t *data_to_upload;
 
       size = buffer->datas[0].maxsize + buffer->datas[0].mapoffset;
       map = mmap (NULL, size, PROT_READ, MAP_PRIVATE, buffer->datas[0].fd, 0);
@@ -656,16 +662,29 @@ process_buffer (GrdRdpPipeWireStream     *stream,
         }
       rdp_buffer = frame->buffer;
       pbo = rdp_buffer->pbo;
-      local_data = rdp_buffer->local_data;
 
-      copy_frame_data (frame,
-                       src_data,
-                       width, height,
-                       dst_stride,
-                       src_stride,
-                       bpp);
+      if (stream->rdp_surface->needs_no_local_data &&
+          src_stride == dst_stride)
+        {
+          frame->map_size = size;
+          frame->map = map;
+          frame->has_map = TRUE;
+
+          data_to_upload = src_data;
+        }
+      else
+        {
+          copy_frame_data (frame,
+                           src_data,
+                           width, height,
+                           dst_stride,
+                           src_stride,
+                           bpp);
 
-      munmap (map, size);
+          munmap (map, size);
+
+          data_to_upload = rdp_buffer->local_data;
+        }
 
       if (!hwaccel_nvidia)
         {
@@ -687,7 +706,7 @@ process_buffer (GrdRdpPipeWireStream     *stream,
                              pbo,
                              height,
                              dst_stride,
-                             local_data,
+                             data_to_upload,
                              cuda_allocate_buffer,
                              allocate_buffer_data,
                              g_free,
@@ -714,7 +733,7 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       uint64_t *modifiers;
       uint32_t n_planes;
       unsigned int i;
-      uint8_t *dst_data;
+      uint8_t *dst_data = NULL;
 
       frame->buffer = grd_rdp_buffer_pool_acquire (stream->buffer_pool);
       if (!frame->buffer)
@@ -741,7 +760,9 @@ process_buffer (GrdRdpPipeWireStream     *stream,
           strides[i] = buffer->datas[i].chunk->stride;
           modifiers[i] = stream->spa_format.modifier;
         }
-      dst_data = frame->buffer->local_data;
+
+      if (!stream->rdp_surface->needs_no_local_data)
+        dst_data = frame->buffer->local_data;
 
       if (hwaccel_nvidia)
         {
@@ -791,7 +812,7 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       GrdRdpBuffer *rdp_buffer;
       void *src_data;
       uint32_t pbo;
-      uint8_t *local_data;
+      uint8_t *data_to_upload;
 
       src_data = buffer->datas[0].data;
 
@@ -805,14 +826,23 @@ process_buffer (GrdRdpPipeWireStream     *stream,
         }
       rdp_buffer = frame->buffer;
       pbo = rdp_buffer->pbo;
-      local_data = rdp_buffer->local_data;
 
-      copy_frame_data (frame,
-                       src_data,
-                       width, height,
-                       dst_stride,
-                       src_stride,
-                       bpp);
+      if (stream->rdp_surface->needs_no_local_data &&
+          src_stride == dst_stride)
+        {
+          data_to_upload = src_data;
+        }
+      else
+        {
+          copy_frame_data (frame,
+                           src_data,
+                           width, height,
+                           dst_stride,
+                           src_stride,
+                           bpp);
+
+          data_to_upload = rdp_buffer->local_data;
+        }
 
       if (!hwaccel_nvidia)
         {
@@ -834,7 +864,7 @@ process_buffer (GrdRdpPipeWireStream     *stream,
                              pbo,
                              height,
                              dst_stride,
-                             local_data,
+                             data_to_upload,
                              cuda_allocate_buffer,
                              allocate_buffer_data,
                              g_free,
@@ -877,6 +907,12 @@ on_frame_ready (GrdRdpPipeWireStream *stream,
 
   g_assert (frame);
 
+  if (frame->has_map)
+    {
+      munmap (frame->map, frame->map_size);
+      frame->has_map = FALSE;
+    }
+
   if (!success)
     goto out;
 
diff --git a/src/grd-rdp-surface.c b/src/grd-rdp-surface.c
index d2481fff..f3e5699c 100644
--- a/src/grd-rdp-surface.c
+++ b/src/grd-rdp-surface.c
@@ -100,6 +100,8 @@ grd_rdp_surface_free (GrdRdpSurface *rdp_surface)
 void
 grd_rdp_surface_reset (GrdRdpSurface *rdp_surface)
 {
+  rdp_surface->needs_no_local_data = FALSE;
+
   if (rdp_surface->avc.main_view)
     {
       grd_hwaccel_nvidia_clear_mem_ptr (rdp_surface->hwaccel_nvidia,
diff --git a/src/grd-rdp-surface.h b/src/grd-rdp-surface.h
index 49dd84b5..fff10cf1 100644
--- a/src/grd-rdp-surface.h
+++ b/src/grd-rdp-surface.h
@@ -46,6 +46,7 @@ struct _GrdRdpSurface
     CUdeviceptr main_view;
   } avc;
 
+  gboolean needs_no_local_data;
   gboolean valid;
 
   GrdRdpGfxSurface *gfx_surface;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]