feat(server): add hls playlist hint (#29150)

* add playlist hint

* update api

* unused import

* update crf in tests
This commit is contained in:
Mert
2026-06-17 19:02:10 -04:00
committed by GitHub
parent cbe34d7931
commit 7ef2de6b53
11 changed files with 153 additions and 22 deletions

View File

@@ -982,7 +982,9 @@ class AssetsApi {
/// * [String] key:
///
/// * [String] slug:
Future<Response> getMediaPlaylistWithHttpInfo(String id, String sessionId, int variantIndex, { String? key, String? slug, Future<void>? abortTrigger, }) async {
///
/// * [num] xImmichHlsPos:
Future<Response> getMediaPlaylistWithHttpInfo(String id, String sessionId, int variantIndex, { String? key, String? slug, num? xImmichHlsPos, Future<void>? abortTrigger, }) async {
// ignore: prefer_const_declarations
final apiPath = r'/assets/{id}/video/stream/{sessionId}/{variantIndex}/playlist.m3u8'
.replaceAll('{id}', id)
@@ -1003,6 +1005,10 @@ class AssetsApi {
queryParams.addAll(_queryParams('', 'slug', slug));
}
if (xImmichHlsPos != null) {
headerParams[r'x-immich-hls-pos'] = parameterToString(xImmichHlsPos);
}
const contentTypes = <String>[];
@@ -1033,8 +1039,10 @@ class AssetsApi {
/// * [String] key:
///
/// * [String] slug:
Future<String?> getMediaPlaylist(String id, String sessionId, int variantIndex, { String? key, String? slug, Future<void>? abortTrigger, }) async {
final response = await getMediaPlaylistWithHttpInfo(id, sessionId, variantIndex, key: key, slug: slug, abortTrigger: abortTrigger,);
///
/// * [num] xImmichHlsPos:
Future<String?> getMediaPlaylist(String id, String sessionId, int variantIndex, { String? key, String? slug, num? xImmichHlsPos, Future<void>? abortTrigger, }) async {
final response = await getMediaPlaylistWithHttpInfo(id, sessionId, variantIndex, key: key, slug: slug, xImmichHlsPos: xImmichHlsPos, abortTrigger: abortTrigger,);
if (response.statusCode >= HttpStatus.badRequest) {
throw ApiException(response.statusCode, await _decodeBodyBytes(response));
}

View File

@@ -4924,6 +4924,15 @@
"maximum": 9007199254740991,
"type": "integer"
}
},
{
"name": "x-immich-hls-pos",
"required": false,
"in": "header",
"schema": {
"minimum": 0,
"type": "number"
}
}
],
"responses": {

View File

@@ -4452,12 +4452,13 @@ export function endSession({ id, key, sessionId, slug }: {
/**
* Get HLS media playlist
*/
export function getMediaPlaylist({ id, key, sessionId, slug, variantIndex }: {
export function getMediaPlaylist({ id, key, sessionId, slug, variantIndex, xImmichHlsPos }: {
id: string;
key?: string;
sessionId: string;
slug?: string;
variantIndex: number;
xImmichHlsPos?: number;
}, opts?: Oazapfts.RequestOpts) {
return oazapfts.ok(oazapfts.fetchBlob<{
status: 200;
@@ -4466,7 +4467,10 @@ export function getMediaPlaylist({ id, key, sessionId, slug, variantIndex }: {
key,
slug
}))}`, {
...opts
...opts,
headers: oazapfts.mergeHeaders(opts?.headers, {
"x-immich-hls-pos": xImmichHlsPos
})
}));
}
/**

View File

@@ -223,6 +223,12 @@ export const SUPPORTED_HWA_CODECS: Record<TranscodeHardwareAcceleration, VideoCo
export const HLS_BACKPRESSURE_PAUSE_SEGMENTS = 30;
export const HLS_BACKPRESSURE_RESUME_SEGMENTS = 15;
export const HLS_CLEANUP_INTERVAL_MS = 60 * 1000;
export const HLS_CRF: Record<VideoCodec, number> = {
[VideoCodec.H264]: 23,
[VideoCodec.Hevc]: 28,
[VideoCodec.Vp9]: 31,
[VideoCodec.Av1]: 35,
};
export const HLS_INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000;
export const HLS_LEASE_DURATION_MS = 30 * 60 * 1000;
export const HLS_PLAYLIST_CONTENT_TYPE = 'application/vnd.apple.mpegurl';

View File

@@ -6,6 +6,7 @@ import { HLS_PLAYLIST_CONTENT_TYPE } from 'src/constants';
import { Endpoint, HistoryBuilder } from 'src/decorators';
import { AuthDto } from 'src/dtos/auth.dto';
import {
HlsPlaylistHeaderDto,
HlsSegmentHeaderDto,
HlsSegmentParamDto,
HlsSessionParamDto,
@@ -50,8 +51,17 @@ export class VideoStreamController {
description: 'Returns an HLS media playlist for one variant of the streaming session.',
history: new HistoryBuilder().added('v3').alpha('v3'),
})
getMediaPlaylist(@Auth() auth: AuthDto, @Param() { id, sessionId }: HlsVariantParamDto) {
return this.service.getMediaPlaylist(auth, id, sessionId);
getMediaPlaylist(
@Auth() auth: AuthDto,
@Param() { id, sessionId, variantIndex }: HlsVariantParamDto,
@Headers() headers: HlsPlaylistHeaderDto,
) {
try {
headers = HlsPlaylistHeaderDto.create(headers);
} catch (error) {
throw new ZodValidationException(error);
}
return this.service.getMediaPlaylist(auth, id, sessionId, variantIndex, headers[ImmichHeader.HlsPosition]);
}
@Get(':id/video/stream/:sessionId/:variantIndex/:filename')

View File

@@ -32,3 +32,11 @@ const HlsSegmentHeaderSchema = z.object({
});
export class HlsSegmentHeaderDto extends createZodDto(HlsSegmentHeaderSchema) {}
const HlsPlaylistHeaderSchema = z.object({
// Lets the client hint at which segment will be loaded after the playlist.
// A position rather than a segment index since indices aren't comparable across variants.
[ImmichHeader.HlsPosition]: z.coerce.number().min(0).optional(),
});
export class HlsPlaylistHeaderDto extends createZodDto(HlsPlaylistHeaderSchema) {}

View File

@@ -25,6 +25,7 @@ export enum ImmichHeader {
Checksum = 'x-immich-checksum',
CorrelationId = 'X-Correlation-ID',
HlsInitSegment = 'x-immich-hls-msn',
HlsPosition = 'x-immich-hls-pos',
}
export enum ImmichQuery {

View File

@@ -8,6 +8,7 @@ import { newTestService, ServiceMocks } from 'test/utils';
// EXTINF values come from FFmpeg's playlist to enforce an exact match
const eiffelExpectedMediaPlaylist = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-TARGETDURATION:2
#EXT-X-MEDIA-SEQUENCE:0
#EXT-X-PLAYLIST-TYPE:VOD
@@ -41,6 +42,7 @@ seg_11.m4s
const waterfallExpectedMediaPlaylist = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-TARGETDURATION:2
#EXT-X-MEDIA-SEQUENCE:0
#EXT-X-PLAYLIST-TYPE:VOD
@@ -62,6 +64,7 @@ seg_5.m4s
const trainExpectedMediaPlaylist = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-TARGETDURATION:2
#EXT-X-MEDIA-SEQUENCE:0
#EXT-X-PLAYLIST-TYPE:VOD
@@ -95,6 +98,7 @@ const sessionId = '00000000-0000-0000-0000-000000000000';
const eiffelExpectedMasterDisabled = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-STREAM-INF:BANDWIDTH=1000000,RESOLUTION=480x852,CODECS="av01.0.04M.08,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=24.910
${sessionId}/0/playlist.m3u8
#EXT-X-STREAM-INF:BANDWIDTH=1200000,RESOLUTION=480x852,CODECS="hvc1.1.6.L90.B0,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=24.910
@@ -117,6 +121,7 @@ ${sessionId}/8/playlist.m3u8
const eiffelExpectedMasterRkmpp = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-STREAM-INF:BANDWIDTH=1200000,RESOLUTION=480x852,CODECS="hvc1.1.6.L90.B0,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=24.910
${sessionId}/1/playlist.m3u8
#EXT-X-STREAM-INF:BANDWIDTH=2500000,RESOLUTION=480x852,CODECS="avc1.64001e,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=24.910
@@ -133,6 +138,7 @@ ${sessionId}/8/playlist.m3u8
const waterfallExpectedMasterDisabled = `#EXTM3U
#EXT-X-VERSION:7
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-STREAM-INF:BANDWIDTH=1000000,RESOLUTION=480x852,CODECS="av01.0.04M.08,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=29.830
${sessionId}/0/playlist.m3u8
#EXT-X-STREAM-INF:BANDWIDTH=1200000,RESOLUTION=480x852,CODECS="hvc1.1.6.L90.B0,mp4a.40.2",VIDEO-RANGE=SDR,FRAME-RATE=29.830
@@ -218,12 +224,58 @@ describe(HlsService.name, () => {
it.each(fixtures)('matches FFmpeg for $data.originalPath', async ({ data, playlist }) => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
mocks.videoStream.getForMediaPlaylist.mockResolvedValue(data);
await expect(sut.getMediaPlaylist(auth, assetId, sessionId)).resolves.toBe(playlist);
await expect(sut.getMediaPlaylist(auth, assetId, sessionId, 0)).resolves.toBe(playlist);
});
it('throws NotFoundException when the session/asset cannot be loaded', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
await expect(sut.getMediaPlaylist(auth, assetId, sessionId)).rejects.toBeInstanceOf(NotFoundException);
await expect(sut.getMediaPlaylist(auth, assetId, sessionId, 0)).rejects.toBeInstanceOf(NotFoundException);
});
it('prewarms transcoding at the segment containing the hinted position', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
mocks.videoStream.getForMediaPlaylist.mockResolvedValue(eiffelTower);
await sut.getMediaPlaylist(auth, assetId, sessionId, 1, 10.5);
expect(mocks.websocket.serverSend).toHaveBeenCalledWith('HlsSegmentRequest', {
sessionId,
assetId,
variantIndex: 1,
segmentIndex: 5,
});
});
it('prewarms from the last requested segment when no hint is given', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
mocks.videoStream.getSession.mockResolvedValue({ id: sessionId, assetId } as never);
mocks.storage.checkFileExists.mockResolvedValue(true);
await sut.getSegment(auth, assetId, sessionId, 0, 'seg_5.m4s');
mocks.videoStream.getForMediaPlaylist.mockResolvedValue(eiffelTower);
await sut.getMediaPlaylist(auth, assetId, sessionId, 1);
expect(mocks.websocket.serverSend).toHaveBeenCalledWith('HlsSegmentRequest', {
sessionId,
assetId,
variantIndex: 1,
segmentIndex: 6,
});
});
it('does not prewarm without a hint or prior segment traffic', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
mocks.videoStream.getForMediaPlaylist.mockResolvedValue(eiffelTower);
await sut.getMediaPlaylist(auth, assetId, sessionId, 1);
expect(mocks.websocket.serverSend).not.toHaveBeenCalled();
});
it('does not prewarm the variant the session is already playing', async () => {
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set([assetId]));
mocks.videoStream.getSession.mockResolvedValue({ id: sessionId, assetId } as never);
mocks.storage.checkFileExists.mockResolvedValue(true);
await sut.getSegment(auth, assetId, sessionId, 1, 'seg_5.m4s');
mocks.videoStream.getForMediaPlaylist.mockResolvedValue(eiffelTower);
await sut.getMediaPlaylist(auth, assetId, sessionId, 1, 12.5);
expect(mocks.websocket.serverSend).not.toHaveBeenCalledWith('HlsSegmentRequest', expect.anything());
});
});
@@ -314,7 +366,7 @@ describe(HlsService.name, () => {
);
});
it('uses the target segment for init.mp4 when provided', async () => {
it('uses the initSegment hint for init.mp4', async () => {
await sut.getSegment(auth, assetId, sessionId, variantIndex, 'init.mp4', 7);
expect(mocks.websocket.serverSend).toHaveBeenCalledWith('HlsHeartbeat', {
sessionId,
@@ -323,18 +375,18 @@ describe(HlsService.name, () => {
});
});
it('prefers the target segment over the lastRequested + 1 fallback', async () => {
it('prefers the initSegment hint over the lastRequested + 1 fallback', async () => {
await sut.getSegment(auth, assetId, sessionId, variantIndex, 'seg_5.m4s'); // fallback would be 6
mocks.websocket.serverSend.mockClear();
await sut.getSegment(auth, assetId, sessionId, variantIndex, 'init.mp4', 12);
await sut.getSegment(auth, assetId, sessionId, variantIndex, 'init.mp4', 10);
expect(mocks.websocket.serverSend).toHaveBeenCalledWith('HlsHeartbeat', {
sessionId,
variantIndex,
segmentIndex: 12,
segmentIndex: 10,
});
});
it('ignores the target segment for media segment requests (the filename wins)', async () => {
it('ignores the initSegment hint for media segment requests (the filename wins)', async () => {
await sut.getSegment(auth, assetId, sessionId, variantIndex, 'seg_5.m4s', 99);
expect(mocks.websocket.serverSend).toHaveBeenCalledWith('HlsHeartbeat', {
sessionId,

View File

@@ -21,6 +21,7 @@ import { ImmichFileResponse } from 'src/utils/file';
import { getOutputSize } from 'src/utils/media';
type AssetWithStreamInfo = { videoStream: VideoStreamInfo & { timeBase: number }; packets: VideoPacketInfo };
type Segmentation = { fps: number; framesPerSegment: number; segmentCount: number; segmentDuration: number };
type ApiSession = { lastRequestedSegment: number | null; lastVariantIndex: number | null };
@Injectable()
@@ -71,7 +72,7 @@ export class HlsService extends BaseService {
return this.generateMainPlaylist(sessionId, ffmpeg, asset);
}
async getMediaPlaylist(auth: AuthDto, assetId: string, sessionId: string) {
async getMediaPlaylist(auth: AuthDto, assetId: string, sessionId: string, variantIndex: number, position?: number) {
await this.requireAccess({ auth, permission: Permission.AssetView, ids: [assetId] });
const asset = await this.videoStreamRepository.getForMediaPlaylist(assetId, sessionId);
@@ -79,7 +80,11 @@ export class HlsService extends BaseService {
throw new NotFoundException('Asset not found or metadata not yet ready for streaming');
}
return this.generateMediaPlaylist(asset);
const segmentation = this.getSegmentation(asset);
const hintedSegment = position === undefined ? undefined : this.positionToSegmentIndex(segmentation, position);
this.prewarmVariant(assetId, sessionId, variantIndex, hintedSegment);
return this.generateMediaPlaylist(asset, segmentation);
}
async getSegment(
@@ -129,7 +134,7 @@ export class HlsService extends BaseService {
const fps = ((asset.packets.packetCount * asset.videoStream.timeBase) / asset.packets.totalDuration).toFixed(3);
const sourceResolution = Math.min(asset.videoStream.height, asset.videoStream.width);
const targetResolution = Math.max(sourceResolution, HLS_VARIANTS[0].resolution);
const lines = ['#EXTM3U', `#EXT-X-VERSION:${HLS_VERSION}`];
const lines = ['#EXTM3U', `#EXT-X-VERSION:${HLS_VERSION}`, '#EXT-X-INDEPENDENT-SEGMENTS'];
for (let i = 0; i < HLS_VARIANTS.length; i++) {
const { resolution, bitrate, codec, codecString } = HLS_VARIANTS[i];
if (resolution > targetResolution || !SUPPORTED_HWA_CODECS[ffmpeg.accel].includes(codec)) {
@@ -143,24 +148,33 @@ export class HlsService extends BaseService {
}
lines.push('');
if (lines.length === 3) {
if (lines.length === 4) {
throw new NotFoundException('No supported variants for this video');
}
return lines.join('\n');
}
private generateMediaPlaylist({ videoStream, packets }: AssetWithStreamInfo) {
private getSegmentation({ videoStream, packets }: AssetWithStreamInfo): Segmentation {
const fps = (packets.packetCount * videoStream.timeBase) / packets.totalDuration;
const framesPerSegment = Math.ceil(HLS_SEGMENT_DURATION * fps);
const fullSegmentDuration = framesPerSegment / fps;
const segmentCount = Math.ceil(packets.outputFrames / framesPerSegment);
return { fps, framesPerSegment, segmentCount, segmentDuration: framesPerSegment / fps };
}
private positionToSegmentIndex({ segmentDuration, segmentCount }: Segmentation, position: number) {
return Math.min(Math.max(Math.floor(position / segmentDuration), 0), segmentCount - 1);
}
private generateMediaPlaylist({ packets }: AssetWithStreamInfo, segmentation: Segmentation) {
const { fps, framesPerSegment, segmentCount, segmentDuration: fullSegmentDuration } = segmentation;
const lastSegmentFrames = packets.outputFrames - framesPerSegment * (segmentCount - 1);
const lastSegmentDuration = lastSegmentFrames / fps;
const lines = [
'#EXTM3U',
`#EXT-X-VERSION:${HLS_VERSION}`,
'#EXT-X-INDEPENDENT-SEGMENTS',
`#EXT-X-TARGETDURATION:${HLS_SEGMENT_DURATION}`,
'#EXT-X-MEDIA-SEQUENCE:0',
'#EXT-X-PLAYLIST-TYPE:VOD',
@@ -175,6 +189,19 @@ export class HlsService extends BaseService {
return lines.join('\n');
}
private prewarmVariant(assetId: string, sessionId: string, variantIndex: number, hintedSegment?: number) {
const session = this.sessions.get(sessionId);
if (session?.lastVariantIndex === variantIndex) {
return;
}
const nextSegment = session && session.lastRequestedSegment !== null ? session.lastRequestedSegment + 1 : undefined;
const segmentIndex = hintedSegment ?? nextSegment;
if (segmentIndex !== undefined) {
this.websocketRepository.serverSend('HlsSegmentRequest', { sessionId, assetId, variantIndex, segmentIndex });
}
}
private getSegmentKey({ sessionId, variantIndex, segmentIndex }: ArgOf<'HlsSegmentResult'>) {
return `${sessionId}:${variantIndex}:${segmentIndex}`;
}

View File

@@ -381,8 +381,6 @@ describe(TranscodingService.name, () => {
'50',
'-keyint_min',
'50',
'-crf',
'23',
'-copyts',
'-r',
'50130000/2012441',
@@ -417,6 +415,8 @@ describe(TranscodingService.name, () => {
'aac',
'-preset',
'12',
'-crf',
'35',
'-svtav1-params',
'hierarchical-levels=3:lookahead=0:enable-tf=0:mbr=4000k',
'-hls_segment_filename',
@@ -436,6 +436,8 @@ describe(TranscodingService.name, () => {
'hvc1',
'-preset',
'ultrafast',
'-crf',
'28',
'-maxrate',
'2500k',
'-bufsize',
@@ -459,6 +461,8 @@ describe(TranscodingService.name, () => {
'aac',
'-preset',
'ultrafast',
'-crf',
'23',
'-maxrate',
'2500k',
'-bufsize',

View File

@@ -5,6 +5,7 @@ import {
HLS_BACKPRESSURE_PAUSE_SEGMENTS,
HLS_BACKPRESSURE_RESUME_SEGMENTS,
HLS_CLEANUP_INTERVAL_MS,
HLS_CRF,
HLS_INACTIVITY_TIMEOUT_MS,
HLS_LEASE_DURATION_MS,
HLS_SEGMENT_DURATION,
@@ -221,6 +222,7 @@ export class TranscodingService extends BaseService {
targetResolution: String(variant.resolution),
maxBitrate: `${Math.round(variant.bitrate / 1000)}k`,
gopSize: gop,
crf: HLS_CRF[variant.codec],
},
this.videoInterfaces,
{ strictGop: true, lowLatency: true },