SDL2/GStreamer DLNA browser for R36S by Matteo Benedetto
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

414 lines
16 KiB

#!/usr/bin/env python3
"""
NV12 / mppvideodec decode benchmark for R36S (RK3326 / ArkOS).
Replays the exact same pipeline the app uses (playbin → NV12 appsink with
mppvideodec auto-selected) and reports:
• Decoder selected (HW vs SW)
• Decoded frame rate (actual vs stream nominal)
• Frame interval stddev (jitter)
• Dropped / late frames
• A/V sync drift (video PTS vs pipeline clock position)
• from_buffer_copy() time per frame (CPU copy cost)
Run on device (must use same env as the app):
export LD_LIBRARY_PATH=/home/ark/miniconda3/envs/r36s-dlna-browser/lib
export GST_PLUGIN_PATH=/usr/lib/aarch64-linux-gnu/gstreamer-1.0
export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
export PYTHONPATH=/home/ark/R36SHack/src
/home/ark/miniconda3/envs/r36s-dlna-browser/bin/python \\
/home/ark/R36SHack/tests/benchmark_nv12_decode.py [URL]
If no URL is given, reads /tmp/dlna_last_url.txt (auto-written by the app on play).
"""
from __future__ import annotations
import ctypes
import os
import statistics
import sys
import time
import threading
from dataclasses import dataclass, field
from typing import Optional
# ── URL resolution ──────────────────────────────────────────────────────────
url: Optional[str] = sys.argv[1] if len(sys.argv) > 1 else None
if not url:
try:
with open("/tmp/dlna_last_url.txt") as _f:
url = _f.read().strip() or None
if url:
print(f"[auto] URL from /tmp/dlna_last_url.txt: {url}")
except FileNotFoundError:
pass
if not url:
print("Usage: benchmark_nv12_decode.py <url-or-path>")
print(" Or start the app, play something, then re-run without args.")
sys.exit(1)
BENCH_SECONDS = 30 # how long to run
WARMUP_FRAMES = 5 # frames to skip before recording stats
# ── GStreamer setup ─────────────────────────────────────────────────────────
import gi
gi.require_version("Gst", "1.0")
gi.require_version("GstApp", "1.0")
gi.require_version("GstVideo", "1.0")
from gi.repository import Gst, GstApp, GstVideo
Gst.init(None)
gst_v = Gst.version()
print(f"GStreamer {gst_v.major}.{gst_v.minor}.{gst_v.micro}")
# ── VPU probe + rank boost (same logic as the app) ──────────────────────────
_HW_DECODER_ELEMENTS = ["mppvideodec", "v4l2h264dec", "v4l2h265dec", "v4l2vp8dec", "v4l2vp9dec"]
_HW_VPU_DEVICES = ["/dev/vpu_service", "/dev/mpp_service", "/dev/video10", "/dev/video11"]
hw_decoders: list[str] = []
for dev in _HW_VPU_DEVICES:
try:
fd = os.open(dev, os.O_RDWR | os.O_NONBLOCK)
os.close(fd)
print(f"[HW] VPU device accessible: {dev}")
for name in _HW_DECODER_ELEMENTS:
fac = Gst.ElementFactory.find(name)
if fac is not None:
fac.set_rank(Gst.Rank.PRIMARY + 1)
hw_decoders.append(name)
print(f"[HW] Boosted rank: {name}")
break
except OSError:
pass
if not hw_decoders:
print("[SW] No VPU device or no gst-mpp plugin — using software decode")
# ── Pipeline ────────────────────────────────────────────────────────────────
pipeline = Gst.ElementFactory.make("playbin", "player")
if pipeline is None:
print("[ERR] playbin unavailable — install gst-plugins-base")
sys.exit(1)
# Target display size — same default as the app (R36S screen is 640×480).
SCALE_W, SCALE_H = 640, 480
appsink = Gst.ElementFactory.make("appsink", "vsink")
appsink.set_property("emit-signals", True)
appsink.set_property("sync", True) # keep A/V sync on
appsink.set_property("max-buffers", 2)
appsink.set_property("drop", True)
# video_sink is what we hand to playbin. For HW decode we mirror _create_appsink()
# from the app: wrap videoscale → capsfilter(NV12,640×480) → appsink in a GstBin
# so the Python callback receives 460 KB per frame instead of 3.1 MB (6.7× smaller).
# Pass --noscale to disable this and benchmark the unscaled path.
video_sink = appsink
if hw_decoders and "--noscale" not in sys.argv:
scale_el = Gst.ElementFactory.make("videoscale", "vscale")
cfilt_el = Gst.ElementFactory.make("capsfilter", "vcaps")
if scale_el is not None and cfilt_el is not None:
cfilt_el.set_property(
"caps",
Gst.Caps.from_string(
f"video/x-raw,format=NV12,width={SCALE_W},height={SCALE_H}"
),
)
bin_ = Gst.Bin.new("vscale-bin")
bin_.add(scale_el)
bin_.add(cfilt_el)
bin_.add(appsink)
scale_el.link(cfilt_el)
cfilt_el.link(appsink)
sink_pad = scale_el.get_static_pad("sink")
ghost = Gst.GhostPad.new("sink", sink_pad)
ghost.set_active(True)
bin_.add_pad(ghost)
video_sink = bin_
print(f"[scale] videoscale → {SCALE_W}×{SCALE_H} NV12 (mirrors app _create_appsink)")
else:
print("[scale] videoscale element unavailable — falling back to unscaled NV12")
appsink.set_property(
"caps", Gst.Caps.from_string("video/x-raw,format=NV12;video/x-raw,format=BGRA")
)
elif hw_decoders:
print(f"[scale] --noscale: unscaled NV12 ({SCALE_W}×{SCALE_H} disabled)")
appsink.set_property(
"caps", Gst.Caps.from_string("video/x-raw,format=NV12;video/x-raw,format=BGRA")
)
else:
appsink.set_property("caps", Gst.Caps.from_string("video/x-raw,format=BGRA"))
print(f"[caps] video-sink: {'GstBin(videoscale+capsfilter+appsink)' if video_sink is not appsink else 'appsink'}")
pipeline.set_property("video-sink", video_sink)
pipeline.set_property("uri", url)
# Disable subtitles / visualisations, keep audio+video (same flags as app).
PlayFlags = getattr(Gst, "PlayFlags", None)
if PlayFlags is not None:
flags = int(pipeline.get_property("flags"))
for req in ("AUDIO", "VIDEO"):
v = getattr(PlayFlags, req, None)
if v is not None:
flags |= int(v)
for dis in ("TEXT", "VIS"):
v = getattr(PlayFlags, dis, None)
if v is not None:
flags &= ~int(v)
pipeline.set_property("flags", flags)
# ── Measurement state ───────────────────────────────────────────────────────
@dataclass
class Stats:
total_frames: int = 0
warmup_done: bool = False
fmt: str = "?"
width: int = 0
height: int = 0
# timing
frame_wall_times: list[float] = field(default_factory=list)
pts_list: list[float] = field(default_factory=list) # seconds
copy_times_us: list[float] = field(default_factory=list) # µs
# A/V sync: (video_pts_s - pipeline_pos_s) samples
av_drift_ms: list[float] = field(default_factory=list)
dropped_frames: int = 0
lock: threading.Lock = field(default_factory=threading.Lock)
stats = Stats()
stats._raw_arr = None
stats._raw_arr_size = 0
# ── Callback ────────────────────────────────────────────────────────────────
def _on_sample(sink) -> Gst.FlowReturn:
sample = sink.emit("pull-sample")
if sample is None:
return Gst.FlowReturn.OK
buf = sample.get_buffer()
caps = sample.get_caps()
if buf is None or caps is None:
return Gst.FlowReturn.OK
info = GstVideo.VideoInfo.new_from_caps(caps)
if info is None:
return Gst.FlowReturn.OK
wall_now = time.monotonic()
# Buffer PTS in seconds
pts_ns = buf.pts
pts_s = pts_ns / Gst.SECOND if pts_ns != Gst.CLOCK_TIME_NONE else None
fmt_str = "BGRA"
if info.finfo is not None:
try:
fmt_str = info.finfo.name.upper()
except Exception:
pass
# Measure buffer.map(READ) + memmove into a pre-allocated ctypes array
# (same path as the app). Reuse a single ctypes array across frames to
# avoid per-frame allocation. del is not needed — ctypes array is reused.
t0 = time.monotonic()
ok, map_info = buf.map(Gst.MapFlags.READ)
if not ok:
return Gst.FlowReturn.OK
try:
src_size = map_info.size
if not hasattr(stats, '_raw_arr') or stats._raw_arr_size < src_size:
stats._raw_arr = (ctypes.c_ubyte * src_size)()
stats._raw_arr_size = src_size
ctypes.memmove(stats._raw_arr, map_info.data, src_size)
copy_us = (time.monotonic() - t0) * 1e6
finally:
buf.unmap(map_info)
with stats.lock:
stats.total_frames += 1
if stats.total_frames == 1:
stats.fmt = fmt_str
stats.width = int(info.width)
stats.height = int(info.height)
print(f"\n[first frame] fmt={fmt_str} {info.width}x{info.height} "
f"stride0={info.stride[0]} buf_total={buf.get_size()}")
if stats.total_frames <= WARMUP_FRAMES:
return Gst.FlowReturn.OK # skip warmup frames from stats
stats.warmup_done = True
stats.frame_wall_times.append(wall_now)
stats.copy_times_us.append(copy_us)
if pts_s is not None:
stats.pts_list.append(pts_s)
# A/V sync: query pipeline position and compare to video PTS
if pts_s is not None:
ok, pos_ns = pipeline.query_position(Gst.Format.TIME)
if ok and pos_ns >= 0:
drift_ms = (pts_s - pos_ns / Gst.SECOND) * 1000.0
stats.av_drift_ms.append(drift_ms)
return Gst.FlowReturn.OK
appsink.connect("new-sample", _on_sample)
# ── Bus watcher ─────────────────────────────────────────────────────────────
errors: list[str] = []
warnings: list[str] = []
eos_reached = threading.Event()
def _bus_thread():
bus = pipeline.get_bus()
while not eos_reached.is_set():
msg = bus.timed_pop_filtered(
200 * Gst.MSECOND,
Gst.MessageType.ERROR | Gst.MessageType.WARNING | Gst.MessageType.EOS,
)
if msg is None:
continue
if msg.type == Gst.MessageType.EOS:
print("\n[bus] EOS")
eos_reached.set()
elif msg.type == Gst.MessageType.ERROR:
err, dbg = msg.parse_error()
errors.append(f"{err.message} | {dbg}")
print(f"\n[bus] ERROR: {err.message}")
eos_reached.set()
elif msg.type == Gst.MessageType.WARNING:
w, d = msg.parse_warning()
warnings.append(w.message)
print(f"\n[bus] WARNING: {w.message}")
bt = threading.Thread(target=_bus_thread, daemon=True)
bt.start()
# ── Run ─────────────────────────────────────────────────────────────────────
print(f"\nRunning benchmark for {BENCH_SECONDS}s (warmup: {WARMUP_FRAMES} frames)...")
print("Press Ctrl+C to stop early.\n")
ret = pipeline.set_state(Gst.State.PLAYING)
if ret == Gst.StateChangeReturn.FAILURE:
print("[ERR] Pipeline failed to start")
sys.exit(1)
deadline = time.monotonic() + BENCH_SECONDS
try:
while time.monotonic() < deadline and not eos_reached.is_set():
with stats.lock:
n = len(stats.frame_wall_times)
elapsed = BENCH_SECONDS - (deadline - time.monotonic())
print(f"\r elapsed={elapsed:5.1f}s frames={n:4d} fmt={stats.fmt}", end="", flush=True)
time.sleep(0.5)
except KeyboardInterrupt:
print("\n[interrupted]")
print()
pipeline.set_state(Gst.State.NULL)
eos_reached.set()
# ── Report ───────────────────────────────────────────────────────────────────
print("\n" + "="*62)
print(" BENCHMARK RESULTS")
print("="*62)
with stats.lock:
wall_times = list(stats.frame_wall_times)
copy_us = list(stats.copy_times_us)
pts_list = list(stats.pts_list)
av_drifts = list(stats.av_drift_ms)
total = stats.total_frames
fmt = stats.fmt
w, h = stats.width, stats.height
scale_active = video_sink is not appsink
print(f" URL : {url}")
print(f" Scale : {'videoscale → %d×%d NV12' % (SCALE_W, SCALE_H) if scale_active else 'none (--noscale or SW)'}")
print(f" Format : {fmt} {w}x{h}")
print(f" Decoder : {'HW (' + ', '.join(hw_decoders) + ')' if hw_decoders else 'SW (avdec_*)'}")
print(f" Total frames decoded : {total} (excl. {WARMUP_FRAMES} warmup)")
print(f" Measured frames : {len(wall_times)}")
if len(wall_times) >= 2:
elapsed_wall = wall_times[-1] - wall_times[0]
actual_fps = (len(wall_times) - 1) / elapsed_wall if elapsed_wall > 0 else 0.0
intervals = [wall_times[i+1] - wall_times[i] for i in range(len(wall_times)-1)]
mean_ms = statistics.mean(intervals) * 1000
stdev_ms = statistics.stdev(intervals) * 1000 if len(intervals) > 1 else 0.0
max_ms = max(intervals) * 1000
min_ms = min(intervals) * 1000
drops = sum(1 for iv in intervals if iv > 0.080) # >80 ms = likely drop
print(f"\n --- Frame rate ---")
print(f" Actual FPS : {actual_fps:.2f}")
print(f" Frame interval mean : {mean_ms:.1f} ms (nominal {1000/actual_fps:.1f} ms)")
print(f" Frame interval stdev : {stdev_ms:.1f} ms (jitter)")
print(f" Frame interval min : {min_ms:.1f} ms")
print(f" Frame interval max : {max_ms:.1f} ms")
print(f" Likely dropped frames: {drops} (intervals > 80 ms)")
if len(pts_list) >= 2:
pts_intervals = [pts_list[i+1] - pts_list[i] for i in range(len(pts_list)-1)]
nominal_fps = 1.0 / statistics.mean(pts_intervals) if pts_intervals else 0.0
print(f"\n --- Stream timestamps ---")
print(f" Nominal stream FPS : {nominal_fps:.2f}")
pts_rate = actual_fps / nominal_fps if nominal_fps > 0 else 0
if pts_rate < 0.90:
print(f" [WARNING] delivering at {pts_rate*100:.0f}% of stream rate — decoder is slow")
else:
print(f" Decode pace : {pts_rate*100:.0f}% of stream rate (OK)")
else:
print(" Not enough frames to compute FPS — did playback start?")
if copy_us:
mean_copy = statistics.mean(copy_us)
max_copy = max(copy_us)
print(f"\n --- CPU copy cost (buffer.map + memmove) ---")
print(f" Mean copy time : {mean_copy:.0f} µs")
print(f" Max copy time : {max_copy:.0f} µs")
budget_us = 1_000_000 / (actual_fps if len(wall_times) >= 2 and actual_fps > 0 else 30)
copy_pct = mean_copy / budget_us * 100
print(f" Copy % of frame budget: {copy_pct:.1f}%")
if av_drifts:
mean_drift = statistics.mean(av_drifts)
stdev_drift = statistics.stdev(av_drifts) if len(av_drifts) > 1 else 0.0
min_drift = min(av_drifts)
max_drift = max(av_drifts)
print(f"\n --- A/V sync (video PTS - pipeline clock) ---")
print(f" Mean drift : {mean_drift:+.1f} ms")
print(f" Drift stdev : {stdev_drift:.1f} ms")
print(f" Drift range : {min_drift:+.1f} ms .. {max_drift:+.1f} ms")
if abs(mean_drift) > 100:
print(f" [WARNING] Large mean drift — audio/video desync likely")
if stdev_drift > 50:
print(f" [WARNING] High drift variance — intermittent desync")
if warnings:
print(f"\n GStreamer warnings ({len(warnings)}):")
for w_msg in warnings[:5]:
print(f"{w_msg}")
if errors:
print(f"\n GStreamer errors:")
for e_msg in errors:
print(f"{e_msg}")
print()