Browse Source

perf: insert videoscale before appsink to cut NV12 memmove 6.7×

When hardware decode (mppvideodec/NV12) is active, wrap the appsink in a
GstBin with a videoscale element so the VPU decodes at full stream
resolution but Python only receives a frame pre-scaled to the SDL display
size (default 640x480).

Effect:
  NV12 buffer per frame: 3,133,440 B (1080p) → 460,800 B (640x480)
  memmove per frame:     ~33 ms (80.5% budget) → ~5 ms (expected ~12%)

The videoscale bilinear step runs entirely in software on the A35 cores
but scales down 6.7×, so its cost is far lower than the avoided memmove.
SDL still handles final aspect-ratio fitting inside the viewport, so
visual quality is unchanged relative to what the 640x480 display can show.

Fallback: if videoscale is not available, unscaled NV12 is used as before.
main
Matteo Benedetto 1 week ago
parent
commit
67224626a5
  1. 53
      src/r36s_dlna_browser/player/gstreamer_backend.py
  2. 34
      tests/test_player.py

53
src/r36s_dlna_browser/player/gstreamer_backend.py

@ -421,16 +421,55 @@ class GStreamerBackend(PlayerBackend):
sink.set_property("sync", True)
sink.set_property("max-buffers", 2)
sink.set_property("drop", True)
# Accept NV12 when hardware decode is active (avoids a software colourspace
# conversion step); fall back to BGRA for the software-decode path.
if self._hw_decoders:
caps_str = "video/x-raw,format=NV12;video/x-raw,format=BGRA"
else:
caps_str = "video/x-raw,format=BGRA"
sink.set_property("caps", self._gst.Caps.from_string(caps_str))
sink.connect("new-sample", self._on_new_sample)
if not self._hw_decoders:
# Software decode: request BGRA directly, no scaling bin needed.
sink.set_property("caps", self._gst.Caps.from_string("video/x-raw,format=BGRA"))
return sink
# Hardware decode (NV12): insert a videoscale element before the appsink
# so mppvideodec can decode at full resolution in HW, but Python only
# receives a frame scaled to the display size (default 640x480).
# This cuts the memmove from 3.1 MB (1080p) to ~460 KB (640x480) per frame
# — a 6.7× reduction in CPU copy cost.
app_w, app_h = self._viewport[0], self._viewport[1]
scale_w, scale_h = (app_w or 640), (app_h or 480)
log.info("NV12 appsink: inserting videoscale → %dx%d before appsink", scale_w, scale_h)
scale = self._gst.ElementFactory.make("videoscale", "vscale")
capsfilter = self._gst.ElementFactory.make("capsfilter", "vcaps")
if scale is None or capsfilter is None:
# videoscale not available — fall back to unscaled NV12
log.warning("videoscale element unavailable; using unscaled NV12 appsink")
sink.set_property("caps", self._gst.Caps.from_string(
"video/x-raw,format=NV12;video/x-raw,format=BGRA"))
return sink
capsfilter.set_property(
"caps",
self._gst.Caps.from_string(
f"video/x-raw,format=NV12,width={scale_w},height={scale_h}"
),
)
# Wire scale → capsfilter → appsink inside a bin so playbin accepts it
# as a single video-sink element.
bin_ = self._gst.Bin.new("vscale-bin")
bin_.add(scale)
bin_.add(capsfilter)
bin_.add(sink)
scale.link(capsfilter)
capsfilter.link(sink)
# Expose the scale element's sink pad as the bin's ghost sink pad.
sink_pad = scale.get_static_pad("sink")
ghost = self._gst.GhostPad.new("sink", sink_pad)
ghost.set_active(True)
bin_.add_pad(ghost)
return bin_
def _on_new_sample(self, sink) -> Any:
sample = sink.emit("pull-sample")
if sample is None:

34
tests/test_player.py

@ -189,6 +189,30 @@ class FakeMessage:
return SimpleNamespace(get_name=lambda: self._structure_name)
class FakeGhostPad:
def __init__(self, name, pad):
self.name = name
def set_active(self, _):
pass
class FakeBin:
def __init__(self, name=""):
self.name = name
self._elements = []
self._pads = []
self.props = {}
def add(self, elem):
self._elements.append(elem)
def add_pad(self, pad):
self._pads.append(pad)
def set_property(self, name, value):
self.props[name] = value
class FakeMapFlags:
READ = 1
@ -204,6 +228,16 @@ class FakeGst:
SECOND = 1_000_000_000
MSECOND = 1_000_000
Caps = SimpleNamespace(from_string=lambda value: value)
GhostPad = SimpleNamespace(new=lambda name, pad: FakeGhostPad(name, pad))
Bin = SimpleNamespace(new=lambda name="": FakeBin(name))
@staticmethod
def ElementFactory_make(name, alias=None):
return None # signal "not available" so SW fallback kicks in
# GStreamer uses Gst.ElementFactory.make, exposed as class attribute below
ElementFactory = SimpleNamespace(make=lambda name, alias=None: None)
FakeGst.ElementFactory = ElementFactory
class _FakeFinfo:

Loading…
Cancel
Save