fix: scale to 16:9 target box with add-borders to preserve source DAR

GStreamer caps fixation always picks the identity value for unconstrained dimensions (width-only caps keeps source height unchanged, giving 640x1080 instead of 640x360 for a 1920x1080 source). Fix: compute a 16:9 output box that fits inside the video area, use both width and height in the capsfilter, and set add-borders=True so GStreamer letterboxes or pillarboxes any non-16:9 source without distortion. For the test device (720x720 KMSDRM, ~120px HUD): video area: ~720x600 → scale target: 720x404 (16:9) For default viewport (640x480): video area: 640x480 → scale target: 640x360 (16:9) Section 8 test updated to mirror the same 16:9+add-borders strategy.
1 week ago · 790f001f4e
2 changed files with 38 additions and 28 deletions
--- a/src/r36s_dlna_browser/player/gstreamer_backend.py
+++ b/src/r36s_dlna_browser/player/gstreamer_backend.py
@ -431,24 +431,34 @@ class GStreamerBackend(PlayerBackend):
        # Hardware decode (NV12): insert a videoscale → capsfilter chain inside a
        # GstBin before the appsink so playbin accepts it as a single video-sink.
        #
-        # videoscale(method=nearest-neighbour) scales the decoded source to the
-        # video-area width while letting GStreamer pick the output height from
-        # the source's native aspect ratio.  Constraining only the width (not
-        # both width and height) means GStreamer will never stretch the video:
-        # it always preserves the source DAR.  The resulting frame is then
-        # centred in the SDL viewport by _fit_frame_to_viewport() which adds
-        # the necessary letterbox/pillarbox margins through SDL_RenderCopy.
+        # Strategy: compute a 16:9 target box that fits inside the video area,
+        # then use videoscale with add-borders=True.
+        #  - For 16:9 source (most HD content): source AR == target AR → no
+        #    borders, full frame filled with content.  No distortion.
+        #  - For non-16:9 source (4:3, ultra-wide, etc.): add-borders adds
+        #    pillar- or letterboxes to preserve the source DAR within the 16:9
+        #    output frame.  No distortion.
+        # The output height is derived from the video area width using a 16:9
+        # ratio rather than left as unconstrained, because leaving height out of
+        # the capsfilter causes GStreamer to keep the source height unchanged
+        # (caps fixation picks the identity value for unconstrained dimensions).
        #   Nearest-neighbour skips ~56% of source rows so only ~44% of source
        #   cache lines are fetched; Python memmove drops from ~32 ms to ~1 ms.
-        #
-        # capsfilter — enforces format=NV12 and output width; height is left as
-        # a range so GStreamer can choose the correct value from the source AR.
        vp_w, vp_h, vp_top, vp_bottom, vp_left, vp_right = self._viewport
        video_w = max(4, vp_w - vp_left - vp_right)
-        scale_w = (video_w // 2) * 2
-        if scale_w < 4:
-            scale_w = 640
-        log.info("NV12 appsink: videoscale(nearest) → width=%d (AR-preserving) before appsink", scale_w)
+        video_h = max(4, vp_h - vp_top - vp_bottom)
+        _AR = 16 / 9   # target AR — efficient for typical HD content
+        if video_w / video_h >= _AR:
+            # Video area is wider than 16:9 → height is the limiting dimension.
+            scale_h = (video_h // 2) * 2
+            scale_w = (int(scale_h * _AR) // 2) * 2
+        else:
+            # Video area is taller than 16:9 → width is the limiting dimension.
+            scale_w = (video_w // 2) * 2
+            scale_h = (int(scale_w / _AR) // 2) * 2
+        if scale_w < 4 or scale_h < 4:
+            scale_w, scale_h = 640, 360
+        log.info("NV12 appsink: videoscale(nearest,add-borders) → %dx%d before appsink", scale_w, scale_h)

        scale      = self._gst.ElementFactory.make("videoscale",  "vscale")
        capsfilter = self._gst.ElementFactory.make("capsfilter",  "vcaps")
@ -461,18 +471,14 @@ class GStreamerBackend(PlayerBackend):

        # nearest-neighbour: accesses only the source pixels needed for each
        # output sample (strided reads), skipping ~56% of source rows entirely.
-        # Height is intentionally OMITTED from the caps so GStreamer computes it
-        # from the source's display aspect ratio (DAR).  Specifying a height
-        # range like (int)[2,2160] is wrong — GStreamer's caps fixation picks the
-        # nearest compatible value (the source height itself), bypassing scaling.
-        # Without any height cap, videoscale scales width to scale_w and derives
-        # the height that preserves the DAR; NV12's even-dimension requirement is
-        # satisfied automatically by GStreamer's caps fixation rounding.
+        # add-borders=True: GStreamer letterboxes/pillarboxes any source that
+        # doesn't match the 16:9 target to preserve the source DAR.
        scale.set_property("method", 0)
+        scale.set_property("add-borders", True)
        capsfilter.set_property(
            "caps",
            self._gst.Caps.from_string(
-                f"video/x-raw,format=NV12,width={scale_w}"
+                f"video/x-raw,format=NV12,width={scale_w},height={scale_h}"
            ),
        )

--- a/tests/test_video_playback_device.py
+++ b/tests/test_video_playback_device.py
@ -365,7 +365,7 @@ elif not test_url:
    _warn("Skipped — no URL. Provide a URL as the first argument.")
 else:
    SDL8_SECONDS = 20          # how long to run
-    SDL8_SCALE_W = 640           # width fed into capsfilter; height derived from source DAR
+    SDL8_SCALE_W = 640           # target width; height computed as 16:9 box

    try:
        import ctypes
@ -450,16 +450,20 @@ else:
                pass

        # Build videoscale GstBin (nearest-neighbour) → capsfilter → appsink.
-        # Only width is fixed in the capsfilter; GStreamer derives height from
-        # the source's display aspect ratio (same strategy as _create_appsink).
+        # Mirrors _create_appsink(): 16:9 target box + add-borders for non-16:9
+        # sources so all content ARs are handled without distortion.
        video_sink8 = appsink8
        if _hw_active:
            scale8   = Gst.ElementFactory.make("videoscale", "vs8")
            cfilt8   = Gst.ElementFactory.make("capsfilter", "cf8")
            if scale8 and cfilt8:
-                scale8.set_property("method", 0)   # nearest-neighbour
+                _ar = 16 / 9
+                _s8_w = SDL8_SCALE_W
+                _s8_h = (int(_s8_w / _ar) // 2) * 2   # e.g. 640 → 360
+                scale8.set_property("method", 0)
+                scale8.set_property("add-borders", True)
                cfilt8.set_property("caps", Gst.Caps.from_string(
-                    f"video/x-raw,format=NV12,width={SDL8_SCALE_W}"))
+                    f"video/x-raw,format=NV12,width={_s8_w},height={_s8_h}"))
                bin8 = Gst.Bin.new("vscale-bin8")
                bin8.add(scale8); bin8.add(cfilt8); bin8.add(appsink8)
                scale8.link(cfilt8); cfilt8.link(appsink8)
@ -468,7 +472,7 @@ else:
                gp.set_active(True)
                bin8.add_pad(gp)
                video_sink8 = bin8
-                print(f"  [pipeline] videoscale(nearest) width={SDL8_SCALE_W} NV12 bin active (height=AR-derived)")
+                print(f"  [pipeline] videoscale(nearest,add-borders) → {_s8_w}×{_s8_h} NV12 bin active")
            else:
                appsink8.set_property("caps", Gst.Caps.from_string(
                    "video/x-raw,format=NV12;video/x-raw,format=BGRA"))