You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
12 KiB
327 lines
12 KiB
#!/usr/bin/env python3 |
|
""" |
|
H.264 FHD decode benchmark for R36S / RK3326. |
|
|
|
Compares three decode paths: |
|
1. SW-fakesink — avdec_h264 → fakesink (pure decode throughput, CPU) |
|
2. SW-appsink — avdec_h264 → videoconvert → BGRA → appsink (app path) |
|
3. HW-appsink — mppvideodec → NV12 → appsink (HW decode + zero-copy upload) |
|
|
|
Usage: |
|
# Full auto (generates /tmp/test_fhd.mp4 if missing): |
|
GST_PLUGIN_PATH=/usr/lib/aarch64-linux-gnu/gstreamer-1.0 \\ |
|
LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \\ |
|
/home/ark/miniconda3/envs/r36s-dlna-browser/bin/python \\ |
|
/home/ark/R36SHack/tests/benchmark_decode.py |
|
|
|
# Use a specific file: |
|
... benchmark_decode.py /path/to/video.mp4 |
|
|
|
Output: text table + /tmp/decode_benchmark.json |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import json |
|
import os |
|
import sys |
|
import threading |
|
import time |
|
|
|
TEST_VIDEO = "/tmp/test_fhd.mp4" |
|
GENERATE_DURATION = 10 # seconds of synthetic FHD content to encode |
|
GENERATE_FPS = 30 |
|
WARMUP_FRAMES = 10 # frames to discard before measuring |
|
|
|
|
|
# ── helpers ───────────────────────────────────────────────────────────────── |
|
|
|
def _section(title: str) -> None: |
|
print(f"\n{'='*60}") |
|
print(f" {title}") |
|
print("=" * 60) |
|
|
|
|
|
def _cpu_times() -> tuple[int, int]: |
|
"""Return (user+sys, total) jiffies from /proc/stat cpu line.""" |
|
with open("/proc/stat") as f: |
|
line = f.readline() |
|
parts = line.split() |
|
values = [int(x) for x in parts[1:8]] |
|
busy = values[0] + values[1] + values[2] # user + nice + system |
|
total = sum(values) |
|
return busy, total |
|
|
|
|
|
def _cpu_percent(t0_busy: int, t0_total: int, t1_busy: int, t1_total: int) -> float: |
|
db = t1_busy - t0_busy |
|
dt = t1_total - t0_total |
|
return 100.0 * db / dt if dt else 0.0 |
|
|
|
|
|
# ── GStreamer ──────────────────────────────────────────────────────────────── |
|
|
|
def _init_gst(): |
|
import gi |
|
gi.require_version("Gst", "1.0") |
|
gi.require_version("GstApp", "1.0") |
|
from gi.repository import Gst, GstApp, GLib |
|
Gst.init(None) |
|
return Gst, GLib |
|
|
|
|
|
# ── video generation ───────────────────────────────────────────────────────── |
|
|
|
def _generate_test_video(path: str, Gst, GLib) -> None: |
|
print(f" Generating synthetic FHD H.264 clip → {path}") |
|
print(f" (duration={GENERATE_DURATION}s, size=1920x1080, rate={GENERATE_FPS}/1)") |
|
pipe_str = ( |
|
f"videotestsrc num-buffers={GENERATE_DURATION * GENERATE_FPS} " |
|
f"! video/x-raw,width=1920,height=1080,framerate={GENERATE_FPS}/1,format=I420 " |
|
f"! avenc_h264 bitrate=4000000 " |
|
f"! h264parse ! mp4mux ! filesink location={path}" |
|
) |
|
# Check if avenc_h264 is available |
|
reg = Gst.Registry.get() |
|
if reg.find_feature("avenc_h264", Gst.ElementFactory.__gtype__) is None: |
|
print(" [WARN] avenc_h264 not found — trying x264enc") |
|
pipe_str = pipe_str.replace("avenc_h264 bitrate=4000000", "x264enc bitrate=4000 speed-preset=ultrafast") |
|
if reg.find_feature("x264enc", Gst.ElementFactory.__gtype__) is None: |
|
sys.exit(" [ERR] No H.264 encoder found. Use ffmpeg to generate the clip:\n" |
|
f" ffmpeg -y -f lavfi -i testsrc=duration={GENERATE_DURATION}:" |
|
f"size=1920x1080:rate={GENERATE_FPS} -c:v libx264 -preset ultrafast " |
|
f"-b:v 4M {path}") |
|
|
|
pipeline = Gst.parse_launch(pipe_str) |
|
loop = GLib.MainLoop() |
|
|
|
def on_message(bus, msg): |
|
if msg.type == Gst.MessageType.EOS: |
|
pipeline.set_state(Gst.State.NULL) |
|
loop.quit() |
|
elif msg.type == Gst.MessageType.ERROR: |
|
err, dbg = msg.parse_error() |
|
pipeline.set_state(Gst.State.NULL) |
|
loop.quit() |
|
sys.exit(f" [ERR] Generation failed: {err} {dbg}") |
|
|
|
bus = pipeline.get_bus() |
|
bus.add_signal_watch() |
|
bus.connect("message", on_message) |
|
pipeline.set_state(Gst.State.PLAYING) |
|
t_start = time.monotonic() |
|
loop.run() |
|
elapsed = time.monotonic() - t_start |
|
size = os.path.getsize(path) |
|
print(f" Generated in {elapsed:.1f}s — {size // 1024}KB") |
|
|
|
|
|
# ── benchmark runner ───────────────────────────────────────────────────────── |
|
|
|
def _run_benchmark( |
|
label: str, |
|
video_path: str, |
|
pipeline_str: str, |
|
is_appsink: bool, |
|
Gst, |
|
GLib, |
|
) -> dict: |
|
"""Run one decode benchmark pass, return metrics dict.""" |
|
print(f"\n Running: {label}") |
|
|
|
frame_count = [0] |
|
done_event = threading.Event() |
|
frame_times: list[float] = [] |
|
|
|
pipeline = Gst.parse_launch(pipeline_str) |
|
|
|
if is_appsink: |
|
sink = pipeline.get_by_name("bench_sink") |
|
|
|
def on_new_sample(s): |
|
s.emit("pull-sample") |
|
n = frame_count[0] |
|
frame_count[0] = n + 1 |
|
if n >= WARMUP_FRAMES: |
|
frame_times.append(time.monotonic()) |
|
return Gst.FlowReturn.OK |
|
|
|
sink.set_property("emit-signals", True) |
|
sink.set_property("sync", False) |
|
sink.set_property("max-buffers", 4) |
|
sink.set_property("drop", False) |
|
sink.connect("new-sample", on_new_sample) |
|
else: |
|
# fakesink path: count frames via identity handoff signal |
|
probe = pipeline.get_by_name("probe") |
|
if probe: |
|
def on_handoff(*args): |
|
n = frame_count[0] |
|
frame_count[0] = n + 1 |
|
if n >= WARMUP_FRAMES: |
|
frame_times.append(time.monotonic()) |
|
probe.connect("handoff", on_handoff) |
|
|
|
loop = GLib.MainLoop() |
|
|
|
def on_message(bus, msg): |
|
if msg.type == Gst.MessageType.EOS: |
|
pipeline.set_state(Gst.State.NULL) |
|
loop.quit() |
|
done_event.set() |
|
elif msg.type == Gst.MessageType.ERROR: |
|
err, dbg = msg.parse_error() |
|
pipeline.set_state(Gst.State.NULL) |
|
loop.quit() |
|
done_event.set() |
|
print(f" [ERR] {label}: {err}") |
|
|
|
bus = pipeline.get_bus() |
|
bus.add_signal_watch() |
|
bus.connect("message", on_message) |
|
|
|
cpu_before = _cpu_times() |
|
t_wall_start = time.monotonic() |
|
pipeline.set_state(Gst.State.PLAYING) |
|
loop.run() |
|
t_wall_end = time.monotonic() |
|
cpu_after = _cpu_times() |
|
|
|
wall = t_wall_end - t_wall_start |
|
cpu_pct = _cpu_percent(*cpu_before, *cpu_after) |
|
|
|
if len(frame_times) >= 2: |
|
measure_frames = len(frame_times) |
|
measure_wall = frame_times[-1] - frame_times[0] |
|
fps = (measure_frames - 1) / measure_wall if measure_wall > 0 else 0 |
|
else: |
|
total_frames = frame_count[0] |
|
fps = total_frames / wall if wall > 0 else 0 |
|
measure_frames = total_frames |
|
|
|
result = { |
|
"label": label, |
|
"fps": round(fps, 2), |
|
"frames": measure_frames, |
|
"wall_s": round(wall, 2), |
|
"cpu_pct": round(cpu_pct, 1), |
|
} |
|
print(f" {fps:6.1f} fps {measure_frames} frames {wall:.1f}s wall CPU {cpu_pct:.0f}%") |
|
return result |
|
|
|
|
|
# ── main ───────────────────────────────────────────────────────────────────── |
|
|
|
def main() -> None: |
|
video_path = sys.argv[1] if len(sys.argv) > 1 else TEST_VIDEO |
|
|
|
_section("GStreamer FHD decode benchmark — R36S / RK3326") |
|
|
|
Gst, GLib = _init_gst() |
|
print(f" GStreamer {Gst.version_string()}") |
|
|
|
reg = Gst.Registry.get() |
|
mpp_ok = reg.find_feature("mppvideodec", Gst.ElementFactory.__gtype__) is not None |
|
av_ok = reg.find_feature("avdec_h264", Gst.ElementFactory.__gtype__) is not None |
|
print(f" avdec_h264: {'✓' if av_ok else '✗ MISSING'}") |
|
print(f" mppvideodec: {'✓' if mpp_ok else '✗ MISSING (install mpp libs)'}") |
|
|
|
# --- prepare test video ------------------------------------------------- |
|
_section("Test video") |
|
if not os.path.isfile(video_path) or os.path.getsize(video_path) < 10_000: |
|
_generate_test_video(video_path, Gst, GLib) |
|
else: |
|
size = os.path.getsize(video_path) |
|
print(f" Using existing: {video_path} ({size // 1024}KB)") |
|
|
|
def _set_mpp_rank(rank: int) -> None: |
|
"""Temporarily set mppvideodec factory rank to steer decodebin selection.""" |
|
factory = reg.find_feature("mppvideodec", Gst.ElementFactory.__gtype__) |
|
if factory: |
|
factory.set_rank(rank) |
|
|
|
# --- benchmark runs ----------------------------------------------------- |
|
_section("Benchmarks (sync=false, as fast as possible)") |
|
|
|
results = [] |
|
|
|
# Pipeline note: qtdemux has dynamic src pads so we use decodebin. |
|
# decodebin rank manipulation steers it toward SW or HW decoder. |
|
GST_RANK_NONE = 0 |
|
MPP_RANK_HIGH = 257 # above avdec_h264 (256) |
|
|
|
# 1. SW fakesink — pure decode throughput |
|
if av_ok: |
|
_set_mpp_rank(GST_RANK_NONE) # ensure avdec_h264 wins |
|
pipe = (f"filesrc location={video_path} ! decodebin " |
|
f"! identity name=probe signal-handoffs=true ! fakesink sync=false") |
|
results.append(_run_benchmark( |
|
"SW fakesink (avdec_h264 → discard)", |
|
video_path, pipe, is_appsink=False, Gst=Gst, GLib=GLib, |
|
)) |
|
|
|
# 2. SW appsink — full BGRA path (as used by app without HW decode) |
|
if av_ok: |
|
_set_mpp_rank(GST_RANK_NONE) |
|
pipe = ( |
|
f"filesrc location={video_path} ! decodebin " |
|
f"! videoconvert ! video/x-raw,format=BGRA " |
|
f"! appsink name=bench_sink" |
|
) |
|
results.append(_run_benchmark( |
|
"SW appsink BGRA (avdec_h264 → videoconvert → BGRA)", |
|
video_path, pipe, is_appsink=True, Gst=Gst, GLib=GLib, |
|
)) |
|
|
|
# 3. HW fakesink — pure MPP decode throughput |
|
if mpp_ok: |
|
_set_mpp_rank(MPP_RANK_HIGH) # prefer mppvideodec |
|
pipe = (f"filesrc location={video_path} ! decodebin " |
|
f"! identity name=probe signal-handoffs=true ! fakesink sync=false") |
|
results.append(_run_benchmark( |
|
"HW fakesink (mppvideodec → discard)", |
|
video_path, pipe, is_appsink=False, Gst=Gst, GLib=GLib, |
|
)) |
|
|
|
# 4. HW appsink NV12 — full app path with HW + zero-copy SDL upload |
|
if mpp_ok: |
|
_set_mpp_rank(MPP_RANK_HIGH) |
|
pipe = ( |
|
f"filesrc location={video_path} ! decodebin " |
|
f"! appsink name=bench_sink caps=video/x-raw,format=NV12" |
|
) |
|
results.append(_run_benchmark( |
|
"HW appsink NV12 (mppvideodec → NV12, zero-copy)", |
|
video_path, pipe, is_appsink=True, Gst=Gst, GLib=GLib, |
|
)) |
|
|
|
_set_mpp_rank(64) # restore default marginal rank |
|
|
|
# --- summary table ------------------------------------------------------- |
|
_section("Results") |
|
print(f" {'Path':<45} {'FPS':>6} {'CPU%':>5}") |
|
print(f" {'-'*45} {'-'*6} {'-'*5}") |
|
for r in results: |
|
fps_bar = "★" * int(r["fps"] / 5) |
|
print(f" {r['label']:<45} {r['fps']:>6.1f} {r['cpu_pct']:>4.0f}% {fps_bar}") |
|
|
|
target_fps = GENERATE_FPS |
|
print(f"\n Target playback FPS: {target_fps}") |
|
|
|
best = max(results, key=lambda r: r["fps"]) if results else None |
|
if best: |
|
print(f" Best path: {best['label']} @ {best['fps']:.1f} fps") |
|
if best["fps"] >= target_fps: |
|
print(f" ✓ Smooth real-time playback is achievable.") |
|
else: |
|
print(f" ✗ Cannot sustain {target_fps} fps — consider lower resolution or bitrate.") |
|
|
|
# --- save JSON ----------------------------------------------------------- |
|
out_path = "/tmp/decode_benchmark.json" |
|
with open(out_path, "w") as f: |
|
json.dump({"video": video_path, "results": results, "target_fps": target_fps}, f, indent=2) |
|
print(f"\n Results saved: {out_path}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main()
|
|
|