Skip to content

Commit

Permalink
Remux audio-only stream contiguously without dropping samples or inse…
Browse files Browse the repository at this point in the history
…rting silence

Strongly types demuxer and remuxer track input and output
Fixes support for audio-only playlist with ACC segments containing ID3 "com.apple.streaming.transportStreamTimestamp" that should be ignored:
https://playertest.longtailvideo.com/adaptive/id3/playlist.m3u8
  • Loading branch information
Rob Walch committed Jan 1, 2021
1 parent 1037f0a commit e478757
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 66 deletions.
17 changes: 9 additions & 8 deletions src/demux/base-audio-demuxer.ts
Expand Up @@ -2,17 +2,19 @@ import * as ID3 from '../demux/id3';
import type {
DemuxerResult,
Demuxer,
DemuxedTrack,
DemuxedAudioTrack,
AppendedAudioFrame,
DemuxedMetadataTrack,
DemuxedAvcTrack,
DemuxedUserdataTrack,
} from '../types/demuxer';
import { dummyTrack } from './dummy-demuxed-track';
import { appendUint8Array } from '../utils/mp4-tools';
import { sliceUint8 } from '../utils/typed-array';

class BaseAudioDemuxer implements Demuxer {
protected _audioTrack!: DemuxedAudioTrack;
protected _id3Track!: DemuxedTrack;
protected _id3Track!: DemuxedMetadataTrack;
protected frameIndex: number = 0;
protected cachedData: Uint8Array | null = null;
protected initPTS: number | null = null;
Expand Down Expand Up @@ -59,7 +61,7 @@ class BaseAudioDemuxer implements Demuxer {
const timestamp = id3Data ? ID3.getTimeStamp(id3Data) : undefined;
const length = data.length;

if (this.initPTS === null) {
if (this.frameIndex === 0 || this.initPTS === null) {
this.initPTS = initPTSFn(timestamp, timeOffset);
}

Expand Down Expand Up @@ -106,9 +108,9 @@ class BaseAudioDemuxer implements Demuxer {

return {
audioTrack: track,
avcTrack: dummyTrack(),
avcTrack: dummyTrack() as DemuxedAvcTrack,
id3Track,
textTrack: dummyTrack(),
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

Expand All @@ -129,14 +131,13 @@ class BaseAudioDemuxer implements Demuxer {
}

this.frameIndex = 0;
this.initPTS = null;
this.cachedData = null;

return {
audioTrack: this._audioTrack,
avcTrack: dummyTrack(),
avcTrack: dummyTrack() as DemuxedAvcTrack,
id3Track: this._id3Track,
textTrack: dummyTrack(),
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

Expand Down
30 changes: 19 additions & 11 deletions src/demux/mp4demuxer.ts
@@ -1,7 +1,15 @@
/**
* MP4 demuxer
*/
import { Demuxer, DemuxerResult, DemuxedTrack } from '../types/demuxer';
import {
Demuxer,
DemuxerResult,
DemuxedTrack,
PassthroughVideoTrack,
DemuxedAudioTrack,
DemuxedUserdataTrack,
DemuxedMetadataTrack,
} from '../types/demuxer';
import {
findBox,
segmentValidRange,
Expand Down Expand Up @@ -38,7 +46,7 @@ class MP4Demuxer implements Demuxer {
demux(data): DemuxerResult {
// Load all data into the avc track. The CMAF remuxer will look for the data in the samples object; the rest of the fields do not matter
let avcSamples = data;
const avcTrack = dummyTrack();
const avcTrack = dummyTrack() as PassthroughVideoTrack;
if (this.config.progressive) {
// Split the bytestream into two ranges: one encompassing all data up until the start of the last moof, and everything else.
// This is done to guarantee that we're sending valid data to MSE - when demuxing progressively, we have no guarantee
Expand All @@ -48,29 +56,29 @@ class MP4Demuxer implements Demuxer {
}
const segmentedData = segmentValidRange(avcSamples);
this.remainderData = segmentedData.remainder;
avcTrack.samples = segmentedData.valid;
avcTrack.samples = segmentedData.valid || new Uint8Array();
} else {
avcTrack.samples = avcSamples;
}

return {
audioTrack: dummyTrack(),
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
id3Track: dummyTrack(),
textTrack: dummyTrack(),
id3Track: dummyTrack() as DemuxedMetadataTrack,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

flush() {
const avcTrack: DemuxedTrack = dummyTrack();
avcTrack.samples = this.remainderData;
const avcTrack = dummyTrack() as PassthroughVideoTrack;
avcTrack.samples = this.remainderData || new Uint8Array();
this.remainderData = null;

return {
audioTrack: dummyTrack(),
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
id3Track: dummyTrack(),
textTrack: dummyTrack(),
id3Track: dummyTrack() as DemuxedMetadataTrack,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

Expand Down
25 changes: 19 additions & 6 deletions src/demux/tsdemuxer.ts
Expand Up @@ -25,6 +25,7 @@ import {
DemuxerResult,
AvcSample,
DemuxedMetadataTrack,
DemuxedUserdataTrack,
} from '../types/demuxer';
import { appendUint8Array } from '../utils/mp4-tools';
import { utf8ArrayToStr } from '../demux/id3';
Expand Down Expand Up @@ -66,8 +67,8 @@ class TSDemuxer implements Demuxer {
private _avcTrack!: DemuxedAvcTrack;
private _audioTrack!: DemuxedAudioTrack;
private _id3Track!: DemuxedMetadataTrack;
private _txtTrack!: DemuxedTrack;
private aacOverFlow: any;
private _txtTrack!: DemuxedUserdataTrack;
private aacOverFlow: Uint8Array | null = null;
private avcSample: AvcSample | null = null;
private remainderData: Uint8Array | null = null;

Expand Down Expand Up @@ -141,10 +142,22 @@ class TSDemuxer implements Demuxer {
this.pmtParsed = false;
this._pmtId = -1;

this._avcTrack = TSDemuxer.createTrack('video', duration);
this._audioTrack = TSDemuxer.createTrack('audio', duration);
this._id3Track = TSDemuxer.createTrack('id3', duration);
this._txtTrack = TSDemuxer.createTrack('text', duration);
this._avcTrack = TSDemuxer.createTrack(
'video',
duration
) as DemuxedAvcTrack;
this._audioTrack = TSDemuxer.createTrack(
'audio',
duration
) as DemuxedAudioTrack;
this._id3Track = TSDemuxer.createTrack(
'id3',
duration
) as DemuxedMetadataTrack;
this._txtTrack = TSDemuxer.createTrack(
'text',
duration
) as DemuxedUserdataTrack;
this._audioTrack.isAAC = true;

// flush any partial content
Expand Down
59 changes: 35 additions & 24 deletions src/remux/mp4-remuxer.ts
Expand Up @@ -13,10 +13,12 @@ import {
RemuxedUserdata,
} from '../types/remuxer';
import type {
AudioSample,
AvcSample,
DemuxedAudioTrack,
DemuxedAvcTrack,
DemuxedTrack,
DemuxedMetadataTrack,
DemuxedUserdataTrack,
} from '../types/demuxer';
import type { TrackSet } from '../types/track';
import type { SourceBufferName } from '../types/buffer';
Expand Down Expand Up @@ -110,8 +112,8 @@ export default class MP4Remuxer implements Remuxer {
remux(
audioTrack: DemuxedAudioTrack,
videoTrack: DemuxedAvcTrack,
id3Track: DemuxedTrack,
textTrack: DemuxedTrack,
id3Track: DemuxedMetadataTrack,
textTrack: DemuxedUserdataTrack,
timeOffset: number,
accurateTimeOffset: boolean,
flush: boolean
Expand Down Expand Up @@ -201,7 +203,7 @@ export default class MP4Remuxer implements Remuxer {
audioTimeOffset,
this.isAudioContiguous,
accurateTimeOffset,
videoTimeOffset
enoughVideoSamples ? videoTimeOffset : undefined
);
if (enoughVideoSamples) {
const audioTrackLength = audio ? audio.endPTS - audio.startPTS : 0;
Expand Down Expand Up @@ -677,7 +679,7 @@ export default class MP4Remuxer implements Remuxer {
const rawMPEG: boolean = !track.isAAC && this.typeSupported.mpeg;
const outputSamples: Array<Mp4Sample> = [];

let inputSamples: Array<any> = track.samples;
let inputSamples: Array<AudioSample> = track.samples;
let offset: number = rawMPEG ? 0 : 8;
let fillFrame: any;
let nextAudioPts: number = this.nextAudioPts || -1;
Expand All @@ -692,40 +694,44 @@ export default class MP4Remuxer implements Remuxer {
// contiguous fragments are consecutive fragments from same quality level (same level, new SN = old SN + 1)
// this helps ensuring audio continuity
// and this also avoids audio glitches/cut when switching quality, or reporting wrong duration on first audio frame
const timeOffsetMpegTS = timeOffset * inputTimeScale;
this.isAudioContiguous = contiguous =
contiguous ||
((inputSamples.length &&
nextAudioPts > 0 &&
((accurateTimeOffset &&
Math.abs(timeOffset - nextAudioPts / inputTimeScale) < 0.1) ||
Math.abs(inputSamples[0].pts - nextAudioPts - initPTS) <
Math.abs(timeOffsetMpegTS - nextAudioPts) < 9000) ||
Math.abs(
PTSNormalize(inputSamples[0].pts - initPTS, timeOffsetMpegTS) -
nextAudioPts
) <
20 * inputSampleDuration)) as boolean);

// compute normalized PTS
inputSamples.forEach(function (sample) {
sample.pts = sample.dts = PTSNormalize(
sample.pts - initPTS,
timeOffset * inputTimeScale
timeOffsetMpegTS
);
});

// filter out sample with negative PTS that are not playable anyway
// if we don't remove these negative samples, they will shift all audio samples forward.
// leading to audio overlap between current / next fragment
inputSamples = inputSamples.filter((sample) => sample.pts >= 0);

// in case all samples have negative PTS, and have been filtered out, return now
if (!inputSamples.length) {
return;
}

if (!contiguous || nextAudioPts < 0) {
// filter out sample with negative PTS that are not playable anyway
// if we don't remove these negative samples, they will shift all audio samples forward.
// leading to audio overlap between current / next fragment
inputSamples = inputSamples.filter((sample) => sample.pts >= 0);

// in case all samples have negative PTS, and have been filtered out, return now
if (!inputSamples.length) {
return;
}

if (videoTimeOffset === 0) {
// Set the start to 0 to match video so that start gaps larger than inputSampleDuration are filled with silence
nextAudioPts = 0;
} else if (accurateTimeOffset) {
// When not seeking, not live, and LevelDetails.PTSKnown, use fragment start as predicted next audio PTS
nextAudioPts = Math.max(0, timeOffset * inputTimeScale);
nextAudioPts = Math.max(0, timeOffsetMpegTS);
} else {
// if frags are not contiguous and if we cant trust time offset, let's use first sample PTS as next audio PTS
nextAudioPts = inputSamples[0].pts;
Expand All @@ -747,8 +753,11 @@ export default class MP4Remuxer implements Remuxer {
const delta = pts - nextPts;
const duration = Math.abs((1000 * delta) / inputTimeScale);

// If we're overlapping by more than a duration, drop this sample
if (delta <= -maxAudioFramesDrift * inputSampleDuration) {
// When remuxing with video, if we're overlapping by more than a duration, drop this sample to stay in sync
if (
delta <= -maxAudioFramesDrift * inputSampleDuration &&
videoTimeOffset !== undefined
) {
if (contiguous || i > 0) {
logger.warn(
`[mp4-remuxer]: Dropping 1 audio frame @ ${(
Expand Down Expand Up @@ -776,9 +785,11 @@ export default class MP4Remuxer implements Remuxer {
// 1: We're more than maxAudioFramesDrift frame away
// 2: Not more than MAX_SILENT_FRAME_DURATION away
// 3: currentTime (aka nextPtsNorm) is not 0
// 4: remuxing with video (videoTimeOffset !== undefined)
else if (
delta >= maxAudioFramesDrift * inputSampleDuration &&
duration < MAX_SILENT_FRAME_DURATION
duration < MAX_SILENT_FRAME_DURATION &&
videoTimeOffset !== undefined
) {
const missing = Math.floor(delta / inputSampleDuration);
// Adjust nextPts so that silent samples are aligned with media pts. This will prevent media samples from
Expand Down Expand Up @@ -1026,7 +1037,7 @@ export default class MP4Remuxer implements Remuxer {
}

remuxID3(
track: DemuxedTrack,
track: DemuxedMetadataTrack,
timeOffset: number
): RemuxedMetadata | undefined {
const length = track.samples.length;
Expand Down Expand Up @@ -1055,7 +1066,7 @@ export default class MP4Remuxer implements Remuxer {
}

remuxText(
track: DemuxedTrack,
track: DemuxedUserdataTrack,
timeOffset: number
): RemuxedUserdata | undefined {
const length = track.samples.length;
Expand Down
7 changes: 4 additions & 3 deletions src/remux/passthrough-remuxer.ts
Expand Up @@ -15,7 +15,8 @@ import type {
} from '../types/remuxer';
import type {
DemuxedAudioTrack,
DemuxedTrack,
DemuxedMetadataTrack,
DemuxedUserdataTrack,
PassthroughVideoTrack,
} from '../types/demuxer';

Expand Down Expand Up @@ -102,8 +103,8 @@ class PassThroughRemuxer implements Remuxer {
remux(
audioTrack: DemuxedAudioTrack,
videoTrack: PassthroughVideoTrack,
id3Track: DemuxedTrack,
textTrack: DemuxedTrack,
id3Track: DemuxedMetadataTrack,
textTrack: DemuxedUserdataTrack,
timeOffset: number
): RemuxerResult {
let { initPTS, lastEndDTS } = this;
Expand Down

0 comments on commit e478757

Please sign in to comment.