From 67224626a5a7e4f1296ca464d4d3750d4b6619d5 Mon Sep 17 00:00:00 2001 From: Matteo Benedetto Date: Tue, 24 Mar 2026 10:33:33 +0100 Subject: [PATCH] =?UTF-8?q?perf:=20insert=20videoscale=20before=20appsink?= =?UTF-8?q?=20to=20cut=20NV12=20memmove=206.7=C3=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hardware decode (mppvideodec/NV12) is active, wrap the appsink in a GstBin with a videoscale element so the VPU decodes at full stream resolution but Python only receives a frame pre-scaled to the SDL display size (default 640x480). Effect: NV12 buffer per frame: 3,133,440 B (1080p) → 460,800 B (640x480) memmove per frame: ~33 ms (80.5% budget) → ~5 ms (expected ~12%) The videoscale bilinear step runs entirely in software on the A35 cores but scales down 6.7×, so its cost is far lower than the avoided memmove. SDL still handles final aspect-ratio fitting inside the viewport, so visual quality is unchanged relative to what the 640x480 display can show. Fallback: if videoscale is not available, unscaled NV12 is used as before. --- .../player/gstreamer_backend.py | 55 ++++++++++++++++--- tests/test_player.py | 34 ++++++++++++ 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/r36s_dlna_browser/player/gstreamer_backend.py b/src/r36s_dlna_browser/player/gstreamer_backend.py index 516d8ad..a0a2ebc 100644 --- a/src/r36s_dlna_browser/player/gstreamer_backend.py +++ b/src/r36s_dlna_browser/player/gstreamer_backend.py @@ -421,15 +421,54 @@ class GStreamerBackend(PlayerBackend): sink.set_property("sync", True) sink.set_property("max-buffers", 2) sink.set_property("drop", True) - # Accept NV12 when hardware decode is active (avoids a software colourspace - # conversion step); fall back to BGRA for the software-decode path. - if self._hw_decoders: - caps_str = "video/x-raw,format=NV12;video/x-raw,format=BGRA" - else: - caps_str = "video/x-raw,format=BGRA" - sink.set_property("caps", self._gst.Caps.from_string(caps_str)) sink.connect("new-sample", self._on_new_sample) - return sink + + if not self._hw_decoders: + # Software decode: request BGRA directly, no scaling bin needed. + sink.set_property("caps", self._gst.Caps.from_string("video/x-raw,format=BGRA")) + return sink + + # Hardware decode (NV12): insert a videoscale element before the appsink + # so mppvideodec can decode at full resolution in HW, but Python only + # receives a frame scaled to the display size (default 640x480). + # This cuts the memmove from 3.1 MB (1080p) to ~460 KB (640x480) per frame + # — a 6.7× reduction in CPU copy cost. + app_w, app_h = self._viewport[0], self._viewport[1] + scale_w, scale_h = (app_w or 640), (app_h or 480) + log.info("NV12 appsink: inserting videoscale → %dx%d before appsink", scale_w, scale_h) + + scale = self._gst.ElementFactory.make("videoscale", "vscale") + capsfilter = self._gst.ElementFactory.make("capsfilter", "vcaps") + if scale is None or capsfilter is None: + # videoscale not available — fall back to unscaled NV12 + log.warning("videoscale element unavailable; using unscaled NV12 appsink") + sink.set_property("caps", self._gst.Caps.from_string( + "video/x-raw,format=NV12;video/x-raw,format=BGRA")) + return sink + + capsfilter.set_property( + "caps", + self._gst.Caps.from_string( + f"video/x-raw,format=NV12,width={scale_w},height={scale_h}" + ), + ) + + # Wire scale → capsfilter → appsink inside a bin so playbin accepts it + # as a single video-sink element. + bin_ = self._gst.Bin.new("vscale-bin") + bin_.add(scale) + bin_.add(capsfilter) + bin_.add(sink) + scale.link(capsfilter) + capsfilter.link(sink) + + # Expose the scale element's sink pad as the bin's ghost sink pad. + sink_pad = scale.get_static_pad("sink") + ghost = self._gst.GhostPad.new("sink", sink_pad) + ghost.set_active(True) + bin_.add_pad(ghost) + + return bin_ def _on_new_sample(self, sink) -> Any: sample = sink.emit("pull-sample") diff --git a/tests/test_player.py b/tests/test_player.py index 98434b8..8f0fc01 100644 --- a/tests/test_player.py +++ b/tests/test_player.py @@ -189,6 +189,30 @@ class FakeMessage: return SimpleNamespace(get_name=lambda: self._structure_name) +class FakeGhostPad: + def __init__(self, name, pad): + self.name = name + def set_active(self, _): + pass + + +class FakeBin: + def __init__(self, name=""): + self.name = name + self._elements = [] + self._pads = [] + self.props = {} + + def add(self, elem): + self._elements.append(elem) + + def add_pad(self, pad): + self._pads.append(pad) + + def set_property(self, name, value): + self.props[name] = value + + class FakeMapFlags: READ = 1 @@ -204,6 +228,16 @@ class FakeGst: SECOND = 1_000_000_000 MSECOND = 1_000_000 Caps = SimpleNamespace(from_string=lambda value: value) + GhostPad = SimpleNamespace(new=lambda name, pad: FakeGhostPad(name, pad)) + Bin = SimpleNamespace(new=lambda name="": FakeBin(name)) + + @staticmethod + def ElementFactory_make(name, alias=None): + return None # signal "not available" so SW fallback kicks in + + # GStreamer uses Gst.ElementFactory.make, exposed as class attribute below +ElementFactory = SimpleNamespace(make=lambda name, alias=None: None) +FakeGst.ElementFactory = ElementFactory class _FakeFinfo: