SDL2/GStreamer DLNA browser for R36S by Matteo Benedetto
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

327 lines
12 KiB

#!/usr/bin/env python3
"""
H.264 FHD decode benchmark for R36S / RK3326.
Compares three decode paths:
1. SW-fakesink — avdec_h264 → fakesink (pure decode throughput, CPU)
2. SW-appsink — avdec_h264 → videoconvert → BGRA → appsink (app path)
3. HW-appsink — mppvideodec → NV12 → appsink (HW decode + zero-copy upload)
Usage:
# Full auto (generates /tmp/test_fhd.mp4 if missing):
GST_PLUGIN_PATH=/usr/lib/aarch64-linux-gnu/gstreamer-1.0 \\
LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \\
/home/ark/miniconda3/envs/r36s-dlna-browser/bin/python \\
/home/ark/R36SHack/tests/benchmark_decode.py
# Use a specific file:
... benchmark_decode.py /path/to/video.mp4
Output: text table + /tmp/decode_benchmark.json
"""
from __future__ import annotations
import json
import os
import sys
import threading
import time
TEST_VIDEO = "/tmp/test_fhd.mp4"
GENERATE_DURATION = 10 # seconds of synthetic FHD content to encode
GENERATE_FPS = 30
WARMUP_FRAMES = 10 # frames to discard before measuring
# ── helpers ─────────────────────────────────────────────────────────────────
def _section(title: str) -> None:
print(f"\n{'='*60}")
print(f" {title}")
print("=" * 60)
def _cpu_times() -> tuple[int, int]:
"""Return (user+sys, total) jiffies from /proc/stat cpu line."""
with open("/proc/stat") as f:
line = f.readline()
parts = line.split()
values = [int(x) for x in parts[1:8]]
busy = values[0] + values[1] + values[2] # user + nice + system
total = sum(values)
return busy, total
def _cpu_percent(t0_busy: int, t0_total: int, t1_busy: int, t1_total: int) -> float:
db = t1_busy - t0_busy
dt = t1_total - t0_total
return 100.0 * db / dt if dt else 0.0
# ── GStreamer ────────────────────────────────────────────────────────────────
def _init_gst():
import gi
gi.require_version("Gst", "1.0")
gi.require_version("GstApp", "1.0")
from gi.repository import Gst, GstApp, GLib
Gst.init(None)
return Gst, GLib
# ── video generation ─────────────────────────────────────────────────────────
def _generate_test_video(path: str, Gst, GLib) -> None:
print(f" Generating synthetic FHD H.264 clip → {path}")
print(f" (duration={GENERATE_DURATION}s, size=1920x1080, rate={GENERATE_FPS}/1)")
pipe_str = (
f"videotestsrc num-buffers={GENERATE_DURATION * GENERATE_FPS} "
f"! video/x-raw,width=1920,height=1080,framerate={GENERATE_FPS}/1,format=I420 "
f"! avenc_h264 bitrate=4000000 "
f"! h264parse ! mp4mux ! filesink location={path}"
)
# Check if avenc_h264 is available
reg = Gst.Registry.get()
if reg.find_feature("avenc_h264", Gst.ElementFactory.__gtype__) is None:
print(" [WARN] avenc_h264 not found — trying x264enc")
pipe_str = pipe_str.replace("avenc_h264 bitrate=4000000", "x264enc bitrate=4000 speed-preset=ultrafast")
if reg.find_feature("x264enc", Gst.ElementFactory.__gtype__) is None:
sys.exit(" [ERR] No H.264 encoder found. Use ffmpeg to generate the clip:\n"
f" ffmpeg -y -f lavfi -i testsrc=duration={GENERATE_DURATION}:"
f"size=1920x1080:rate={GENERATE_FPS} -c:v libx264 -preset ultrafast "
f"-b:v 4M {path}")
pipeline = Gst.parse_launch(pipe_str)
loop = GLib.MainLoop()
def on_message(bus, msg):
if msg.type == Gst.MessageType.EOS:
pipeline.set_state(Gst.State.NULL)
loop.quit()
elif msg.type == Gst.MessageType.ERROR:
err, dbg = msg.parse_error()
pipeline.set_state(Gst.State.NULL)
loop.quit()
sys.exit(f" [ERR] Generation failed: {err} {dbg}")
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message", on_message)
pipeline.set_state(Gst.State.PLAYING)
t_start = time.monotonic()
loop.run()
elapsed = time.monotonic() - t_start
size = os.path.getsize(path)
print(f" Generated in {elapsed:.1f}s — {size // 1024}KB")
# ── benchmark runner ─────────────────────────────────────────────────────────
def _run_benchmark(
label: str,
video_path: str,
pipeline_str: str,
is_appsink: bool,
Gst,
GLib,
) -> dict:
"""Run one decode benchmark pass, return metrics dict."""
print(f"\n Running: {label}")
frame_count = [0]
done_event = threading.Event()
frame_times: list[float] = []
pipeline = Gst.parse_launch(pipeline_str)
if is_appsink:
sink = pipeline.get_by_name("bench_sink")
def on_new_sample(s):
s.emit("pull-sample")
n = frame_count[0]
frame_count[0] = n + 1
if n >= WARMUP_FRAMES:
frame_times.append(time.monotonic())
return Gst.FlowReturn.OK
sink.set_property("emit-signals", True)
sink.set_property("sync", False)
sink.set_property("max-buffers", 4)
sink.set_property("drop", False)
sink.connect("new-sample", on_new_sample)
else:
# fakesink path: count frames via identity handoff signal
probe = pipeline.get_by_name("probe")
if probe:
def on_handoff(*args):
n = frame_count[0]
frame_count[0] = n + 1
if n >= WARMUP_FRAMES:
frame_times.append(time.monotonic())
probe.connect("handoff", on_handoff)
loop = GLib.MainLoop()
def on_message(bus, msg):
if msg.type == Gst.MessageType.EOS:
pipeline.set_state(Gst.State.NULL)
loop.quit()
done_event.set()
elif msg.type == Gst.MessageType.ERROR:
err, dbg = msg.parse_error()
pipeline.set_state(Gst.State.NULL)
loop.quit()
done_event.set()
print(f" [ERR] {label}: {err}")
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message", on_message)
cpu_before = _cpu_times()
t_wall_start = time.monotonic()
pipeline.set_state(Gst.State.PLAYING)
loop.run()
t_wall_end = time.monotonic()
cpu_after = _cpu_times()
wall = t_wall_end - t_wall_start
cpu_pct = _cpu_percent(*cpu_before, *cpu_after)
if len(frame_times) >= 2:
measure_frames = len(frame_times)
measure_wall = frame_times[-1] - frame_times[0]
fps = (measure_frames - 1) / measure_wall if measure_wall > 0 else 0
else:
total_frames = frame_count[0]
fps = total_frames / wall if wall > 0 else 0
measure_frames = total_frames
result = {
"label": label,
"fps": round(fps, 2),
"frames": measure_frames,
"wall_s": round(wall, 2),
"cpu_pct": round(cpu_pct, 1),
}
print(f" {fps:6.1f} fps {measure_frames} frames {wall:.1f}s wall CPU {cpu_pct:.0f}%")
return result
# ── main ─────────────────────────────────────────────────────────────────────
def main() -> None:
video_path = sys.argv[1] if len(sys.argv) > 1 else TEST_VIDEO
_section("GStreamer FHD decode benchmark — R36S / RK3326")
Gst, GLib = _init_gst()
print(f" GStreamer {Gst.version_string()}")
reg = Gst.Registry.get()
mpp_ok = reg.find_feature("mppvideodec", Gst.ElementFactory.__gtype__) is not None
av_ok = reg.find_feature("avdec_h264", Gst.ElementFactory.__gtype__) is not None
print(f" avdec_h264: {'' if av_ok else '✗ MISSING'}")
print(f" mppvideodec: {'' if mpp_ok else '✗ MISSING (install mpp libs)'}")
# --- prepare test video -------------------------------------------------
_section("Test video")
if not os.path.isfile(video_path) or os.path.getsize(video_path) < 10_000:
_generate_test_video(video_path, Gst, GLib)
else:
size = os.path.getsize(video_path)
print(f" Using existing: {video_path} ({size // 1024}KB)")
def _set_mpp_rank(rank: int) -> None:
"""Temporarily set mppvideodec factory rank to steer decodebin selection."""
factory = reg.find_feature("mppvideodec", Gst.ElementFactory.__gtype__)
if factory:
factory.set_rank(rank)
# --- benchmark runs -----------------------------------------------------
_section("Benchmarks (sync=false, as fast as possible)")
results = []
# Pipeline note: qtdemux has dynamic src pads so we use decodebin.
# decodebin rank manipulation steers it toward SW or HW decoder.
GST_RANK_NONE = 0
MPP_RANK_HIGH = 257 # above avdec_h264 (256)
# 1. SW fakesink — pure decode throughput
if av_ok:
_set_mpp_rank(GST_RANK_NONE) # ensure avdec_h264 wins
pipe = (f"filesrc location={video_path} ! decodebin "
f"! identity name=probe signal-handoffs=true ! fakesink sync=false")
results.append(_run_benchmark(
"SW fakesink (avdec_h264 → discard)",
video_path, pipe, is_appsink=False, Gst=Gst, GLib=GLib,
))
# 2. SW appsink — full BGRA path (as used by app without HW decode)
if av_ok:
_set_mpp_rank(GST_RANK_NONE)
pipe = (
f"filesrc location={video_path} ! decodebin "
f"! videoconvert ! video/x-raw,format=BGRA "
f"! appsink name=bench_sink"
)
results.append(_run_benchmark(
"SW appsink BGRA (avdec_h264 → videoconvert → BGRA)",
video_path, pipe, is_appsink=True, Gst=Gst, GLib=GLib,
))
# 3. HW fakesink — pure MPP decode throughput
if mpp_ok:
_set_mpp_rank(MPP_RANK_HIGH) # prefer mppvideodec
pipe = (f"filesrc location={video_path} ! decodebin "
f"! identity name=probe signal-handoffs=true ! fakesink sync=false")
results.append(_run_benchmark(
"HW fakesink (mppvideodec → discard)",
video_path, pipe, is_appsink=False, Gst=Gst, GLib=GLib,
))
# 4. HW appsink NV12 — full app path with HW + zero-copy SDL upload
if mpp_ok:
_set_mpp_rank(MPP_RANK_HIGH)
pipe = (
f"filesrc location={video_path} ! decodebin "
f"! appsink name=bench_sink caps=video/x-raw,format=NV12"
)
results.append(_run_benchmark(
"HW appsink NV12 (mppvideodec → NV12, zero-copy)",
video_path, pipe, is_appsink=True, Gst=Gst, GLib=GLib,
))
_set_mpp_rank(64) # restore default marginal rank
# --- summary table -------------------------------------------------------
_section("Results")
print(f" {'Path':<45} {'FPS':>6} {'CPU%':>5}")
print(f" {'-'*45} {'-'*6} {'-'*5}")
for r in results:
fps_bar = "" * int(r["fps"] / 5)
print(f" {r['label']:<45} {r['fps']:>6.1f} {r['cpu_pct']:>4.0f}% {fps_bar}")
target_fps = GENERATE_FPS
print(f"\n Target playback FPS: {target_fps}")
best = max(results, key=lambda r: r["fps"]) if results else None
if best:
print(f" Best path: {best['label']} @ {best['fps']:.1f} fps")
if best["fps"] >= target_fps:
print(f" ✓ Smooth real-time playback is achievable.")
else:
print(f" ✗ Cannot sustain {target_fps} fps — consider lower resolution or bitrate.")
# --- save JSON -----------------------------------------------------------
out_path = "/tmp/decode_benchmark.json"
with open(out_path, "w") as f:
json.dump({"video": video_path, "results": results, "target_fps": target_fps}, f, indent=2)
print(f"\n Results saved: {out_path}")
if __name__ == "__main__":
main()