@ -431,24 +431,34 @@ class GStreamerBackend(PlayerBackend):
# Hardware decode (NV12): insert a videoscale → capsfilter chain inside a
# GstBin before the appsink so playbin accepts it as a single video-sink.
#
# videoscale(method=nearest-neighbour) scales the decoded source to the
# video-area width while letting GStreamer pick the output height from
# the source's native aspect ratio. Constraining only the width (not
# both width and height) means GStreamer will never stretch the video:
# it always preserves the source DAR. The resulting frame is then
# centred in the SDL viewport by _fit_frame_to_viewport() which adds
# the necessary letterbox/pillarbox margins through SDL_RenderCopy.
# Strategy: compute a 16:9 target box that fits inside the video area,
# then use videoscale with add-borders=True.
# - For 16:9 source (most HD content): source AR == target AR → no
# borders, full frame filled with content. No distortion.
# - For non-16:9 source (4:3, ultra-wide, etc.): add-borders adds
# pillar- or letterboxes to preserve the source DAR within the 16:9
# output frame. No distortion.
# The output height is derived from the video area width using a 16:9
# ratio rather than left as unconstrained, because leaving height out of
# the capsfilter causes GStreamer to keep the source height unchanged
# (caps fixation picks the identity value for unconstrained dimensions).
# Nearest-neighbour skips ~56% of source rows so only ~44% of source
# cache lines are fetched; Python memmove drops from ~32 ms to ~1 ms.
#
# capsfilter — enforces format=NV12 and output width; height is left as
# a range so GStreamer can choose the correct value from the source AR.
vp_w , vp_h , vp_top , vp_bottom , vp_left , vp_right = self . _viewport
video_w = max ( 4 , vp_w - vp_left - vp_right )
scale_w = ( video_w / / 2 ) * 2
if scale_w < 4 :
scale_w = 640
log . info ( " NV12 appsink: videoscale(nearest) → width= %d (AR-preserving) before appsink " , scale_w )
video_h = max ( 4 , vp_h - vp_top - vp_bottom )
_AR = 16 / 9 # target AR — efficient for typical HD content
if video_w / video_h > = _AR :
# Video area is wider than 16:9 → height is the limiting dimension.
scale_h = ( video_h / / 2 ) * 2
scale_w = ( int ( scale_h * _AR ) / / 2 ) * 2
else :
# Video area is taller than 16:9 → width is the limiting dimension.
scale_w = ( video_w / / 2 ) * 2
scale_h = ( int ( scale_w / _AR ) / / 2 ) * 2
if scale_w < 4 or scale_h < 4 :
scale_w , scale_h = 640 , 360
log . info ( " NV12 appsink: videoscale(nearest,add-borders) → %d x %d before appsink " , scale_w , scale_h )
scale = self . _gst . ElementFactory . make ( " videoscale " , " vscale " )
capsfilter = self . _gst . ElementFactory . make ( " capsfilter " , " vcaps " )
@ -461,18 +471,14 @@ class GStreamerBackend(PlayerBackend):
# nearest-neighbour: accesses only the source pixels needed for each
# output sample (strided reads), skipping ~56% of source rows entirely.
# Height is intentionally OMITTED from the caps so GStreamer computes it
# from the source's display aspect ratio (DAR). Specifying a height
# range like (int)[2,2160] is wrong — GStreamer's caps fixation picks the
# nearest compatible value (the source height itself), bypassing scaling.
# Without any height cap, videoscale scales width to scale_w and derives
# the height that preserves the DAR; NV12's even-dimension requirement is
# satisfied automatically by GStreamer's caps fixation rounding.
# add-borders=True: GStreamer letterboxes/pillarboxes any source that
# doesn't match the 16:9 target to preserve the source DAR.
scale . set_property ( " method " , 0 )
scale . set_property ( " add-borders " , True )
capsfilter . set_property (
" caps " ,
self . _gst . Caps . from_string (
f " video/x-raw,format=NV12,width= { scale_w } "
f " video/x-raw,format=NV12,width= { scale_w } ,height= { scale_h } "
) ,
)