Do not append 608/WebVTT/IMSC cues that have alreading been appended

Merges changes from #3321 into master, while maintaining that `config.cueHandler` must append cues to their supplied TextTrack
video-dev · Jan 28, 2021 · 7d02994 · 7d02994
1 parent cc499ec
commit 7d02994
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 65 deletions.
diff --git a/src/controller/timeline-controller.ts b/src/controller/timeline-controller.ts
@@ -2,9 +2,13 @@ import { Events } from '../events';
 import Cea608Parser, { CaptionScreen } from '../utils/cea-608-parser';
 import OutputFilter from '../utils/output-filter';
 import { parseWebVTT } from '../utils/webvtt-parser';
-import { logger } from '../utils/logger';
-import { sendAddTrackEvent, clearCurrentCues } from '../utils/texttrack-utils';
+import {
+  sendAddTrackEvent,
+  clearCurrentCues,
+  addCueToTrack,
+} from '../utils/texttrack-utils';
 import { parseIMSC1, IMSC1_CODEC } from '../utils/imsc1-ttml-parser';
+import { PlaylistLevelType } from '../types/loader';
 import Fragment from '../loader/fragment';
 import {
   FragParsingUserdataData,
@@ -15,7 +19,7 @@ import {
   InitPTSFoundData,
   SubtitleTracksUpdatedData,
 } from '../types/events';
-import { PlaylistLevelType } from '../types/loader';
+import { logger } from '../utils/logger';
 import type Hls from '../hls';
 import type { ComponentAPI } from '../types/component-api';
 import type { HlsConfig } from '../config';
@@ -127,7 +131,7 @@ export class TimelineController implements ComponentAPI {
     hls.off(Events.SUBTITLE_TRACKS_CLEARED, this.onSubtitleTracksCleared, this);
   }
 
-  addCues(
+  public addCues(
     trackName: string,
     startTime: number,
     endTime: number,
@@ -158,12 +162,8 @@ export class TimelineController implements ComponentAPI {
     }
 
     if (this.config.renderTextTracksNatively) {
-      this.Cues.newCue(
-        this.captionsTracks[trackName],
-        startTime,
-        endTime,
-        screen
-      );
+      const track = this.captionsTracks[trackName];
+      this.Cues.newCue(track, startTime, endTime, screen);
     } else {
       const cues = this.Cues.newCue(null, startTime, endTime, screen);
       this.hls.trigger(Events.CUES_PARSED, {
@@ -175,7 +175,7 @@ export class TimelineController implements ComponentAPI {
   }
 
   // Triggered when an initial PTS is found; used for synchronisation of WebVTT.
-  onInitPtsFound(
+  private onInitPtsFound(
     event: Events.INIT_PTS_FOUND,
     { frag, id, initPTS, timescale }: InitPTSFoundData
   ) {
@@ -195,7 +195,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  getExistingTrack(trackName: string): TextTrack | null {
+  private getExistingTrack(trackName: string): TextTrack | null {
     const { media } = this;
     if (media) {
       for (let i = 0; i < media.textTracks.length; i++) {
@@ -208,15 +208,15 @@ export class TimelineController implements ComponentAPI {
     return null;
   }
 
-  createCaptionsTrack(trackName: string) {
+  public createCaptionsTrack(trackName: string) {
     if (this.config.renderTextTracksNatively) {
       this.createNativeTrack(trackName);
     } else {
       this.createNonNativeTrack(trackName);
     }
   }
 
-  createNativeTrack(trackName: string) {
+  private createNativeTrack(trackName: string) {
     if (this.captionsTracks[trackName]) {
       return;
     }
@@ -238,7 +238,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  createNonNativeTrack(trackName: string) {
+  private createNonNativeTrack(trackName: string) {
     if (this.nonNativeCaptionsTracks[trackName]) {
       return;
     }
@@ -259,7 +259,7 @@ export class TimelineController implements ComponentAPI {
     this.hls.trigger(Events.NON_NATIVE_TEXT_TRACKS_FOUND, { tracks: [track] });
   }
 
-  createTextTrack(
+  private createTextTrack(
     kind: TextTrackKind,
     label: string,
     lang?: string
@@ -271,16 +271,19 @@ export class TimelineController implements ComponentAPI {
     return media.addTextTrack(kind, label, lang);
   }
 
-  destroy() {
+  public destroy() {
     this._unregisterListeners();
   }
 
-  onMediaAttaching(event: Events.MEDIA_ATTACHING, data: MediaAttachingData) {
+  private onMediaAttaching(
+    event: Events.MEDIA_ATTACHING,
+    data: MediaAttachingData
+  ) {
     this.media = data.media;
     this._cleanTracks();
   }
 
-  onMediaDetaching() {
+  private onMediaDetaching() {
     const { captionsTracks } = this;
     Object.keys(captionsTracks).forEach((trackName) => {
       clearCurrentCues(captionsTracks[trackName]);
@@ -289,7 +292,7 @@ export class TimelineController implements ComponentAPI {
     this.nonNativeCaptionsTracks = {};
   }
 
-  onManifestLoading() {
+  private onManifestLoading() {
     this.lastSn = -1; // Detect discontinuity in fragment parsing
     this.prevCC = -1;
     this.vttCCs = newVTTCCs(); // Detect discontinuity in subtitle manifests
@@ -307,7 +310,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  _cleanTracks() {
+  private _cleanTracks() {
     // clear outdated subtitles
     const { media } = this;
     if (!media) {
@@ -321,7 +324,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  onSubtitleTracksUpdated(
+  private onSubtitleTracksUpdated(
     event: Events.SUBTITLE_TRACKS_UPDATED,
     data: SubtitleTracksUpdatedData
   ) {
@@ -385,7 +388,10 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  onManifestLoaded(event: Events.MANIFEST_LOADED, data: ManifestLoadedData) {
+  private onManifestLoaded(
+    event: Events.MANIFEST_LOADED,
+    data: ManifestLoadedData
+  ) {
     if (this.config.enableCEA708Captions && data.captions) {
       data.captions.forEach((captionsTrack) => {
         const instreamIdMatch = /(?:CC|SERVICE)([1-4])/.exec(
@@ -411,7 +417,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  onFragLoaded(event: Events.FRAG_LOADED, data: FragLoadedData) {
+  private onFragLoaded(event: Events.FRAG_LOADED, data: FragLoadedData) {
     const { frag, payload } = data;
     const {
       cea608Parser1,
@@ -557,47 +563,29 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  private _appendCues(cues, fragLevel) {
+  private _appendCues(cues: VTTCue[], fragLevel: number) {
     const hls = this.hls;
     if (this.config.renderTextTracksNatively) {
       const textTrack = this.textTracks[fragLevel];
       // WebVTTParser.parse is an async method and if the currently selected text track mode is set to "disabled"
       // before parsing is done then don't try to access currentTrack.cues.getCueById as cues will be null
       // and trying to access getCueById method of cues will throw an exception
-      // Because we check if the mode is diabled, we can force check `cues` below. They can't be null.
+      // Because we check if the mode is disabled, we can force check `cues` below. They can't be null.
       if (textTrack.mode === 'disabled') {
         return;
       }
-      // Sometimes there are cue overlaps on segmented vtts so the same
-      // cue can appear more than once in different vtt files.
-      // This avoid showing duplicated cues with same timecode and text.
-      cues
-        .filter((cue) => !textTrack.cues!.getCueById(cue.id))
-        .forEach((cue) => {
-          try {
-            textTrack.addCue(cue);
-            if (!textTrack.cues!.getCueById(cue.id)) {
-              throw new Error(`addCue is failed for: ${cue}`);
-            }
-          } catch (err) {
-            logger.debug(`Failed occurred on adding cues: ${err}`);
-            const textTrackCue = new (self.TextTrackCue as any)(
-              cue.startTime,
-              cue.endTime,
-              cue.text
-            );
-            textTrackCue.id = cue.id;
-            textTrack.addCue(textTrackCue);
-          }
-        });
+      cues.forEach((cue) => addCueToTrack(textTrack, cue));
     } else {
       const currentTrack = this.tracks[fragLevel];
       const track = currentTrack.default ? 'default' : 'subtitles' + fragLevel;
       hls.trigger(Events.CUES_PARSED, { type: 'subtitles', cues, track });
     }
   }
 
-  onFragDecrypted(event: Events.FRAG_DECRYPTED, data: FragDecryptedData) {
+  private onFragDecrypted(
+    event: Events.FRAG_DECRYPTED,
+    data: FragDecryptedData
+  ) {
     const { frag } = data;
     if (frag.type === PlaylistLevelType.SUBTITLE) {
       if (!Number.isFinite(this.initPTS[frag.cc])) {
@@ -611,12 +599,12 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  onSubtitleTracksCleared() {
+  private onSubtitleTracksCleared() {
     this.tracks = [];
     this.captionsTracks = {};
   }
 
-  onFragParsingUserdata(
+  private onFragParsingUserdata(
     event: Events.FRAG_PARSING_USERDATA,
     data: FragParsingUserdataData
   ) {
@@ -637,7 +625,7 @@ export class TimelineController implements ComponentAPI {
     }
   }
 
-  extractCea608Data(byteArray: Uint8Array): number[][] {
+  private extractCea608Data(byteArray: Uint8Array): number[][] {
     const count = byteArray[0] & 31;
     let position = 2;
     const actualCCBytes: number[][] = [[], []];

diff --git a/src/utils/cues.ts b/src/utils/cues.ts
@@ -1,5 +1,7 @@
 import { fixLineBreaks } from './vttparser';
 import type { CaptionScreen, Row } from './cea-608-parser';
+import { generateCueId } from './webvtt-parser';
+import { addCueToTrack } from './texttrack-utils';
 
 const WHITESPACE_CHAR = /\s/;
 
@@ -51,6 +53,7 @@ export function newCue(
       }
 
       cue = new Cue(startTime, endTime, fixLineBreaks(text.trim()));
+      cue.id = generateCueId(cue.startTime, cue.endTime, cue.text);
 
       if (indent >= 16) {
         indent--;
@@ -69,7 +72,7 @@ export function newCue(
   }
   if (track && result.length) {
     // Sort bottom cues in reverse order so that they render in line order when overlapping in Chrome
-    const sortedCues = result.sort((cueA, cueB) => {
+    result.sort((cueA, cueB) => {
       if (cueA.line === 'auto' || cueB.line === 'auto') {
         return 0;
       }
@@ -78,9 +81,7 @@ export function newCue(
       }
       return cueA.line - cueB.line;
     });
-    for (let i = 0; i < sortedCues.length; i++) {
-      track.addCue(sortedCues[i]);
-    }
+    result.forEach((cue) => addCueToTrack(track, cue));
   }
   return result;
 }
diff --git a/src/utils/imsc1-ttml-parser.ts b/src/utils/imsc1-ttml-parser.ts
@@ -3,6 +3,7 @@ import { parseTimeStamp } from './vttparser';
 import VTTCue from './vttcue';
 import { utf8ArrayToStr } from '../demux/id3';
 import { toTimescaleFromScale } from './timescale-conversion';
+import { generateCueId } from './webvtt-parser';
 
 export const IMSC1_CODEC = 'stpp.ttml.im1t';
 
@@ -91,6 +92,7 @@ function parseTTML(ttml: string, syncTime: number): Array<VTTCue> {
         endTime = startTime + duration;
       }
       const cue = new VTTCue(startTime - syncTime, endTime - syncTime, cueText);
+      cue.id = generateCueId(cue.startTime, cue.endTime, cue.text);
 
       const region = regionElements[cueElement.getAttribute('region')];
       const style = styleElements[cueElement.getAttribute('style')];

diff --git a/src/utils/texttrack-utils.ts b/src/utils/texttrack-utils.ts
@@ -1,3 +1,5 @@
+import { logger } from './logger';
+
 export function sendAddTrackEvent(track: TextTrack, videoEl: HTMLMediaElement) {
   let event: Event;
   try {
@@ -11,6 +13,29 @@ export function sendAddTrackEvent(track: TextTrack, videoEl: HTMLMediaElement) {
   videoEl.dispatchEvent(event);
 }
 
+export function addCueToTrack(track: TextTrack, cue: VTTCue) {
+  // Sometimes there are cue overlaps on segmented vtts so the same
+  // cue can appear more than once in different vtt files.
+  // This avoid showing duplicated cues with same timecode and text.
+  if (!track.cues!.getCueById(cue.id)) {
+    try {
+      track.addCue(cue);
+      if (!track.cues!.getCueById(cue.id)) {
+        throw new Error(`addCue is failed for: ${cue}`);
+      }
+    } catch (err) {
+      logger.debug(`[texttrack-utils]: ${err}`);
+      const textTrackCue = new (self.TextTrackCue as any)(
+        cue.startTime,
+        cue.endTime,
+        cue.text
+      );
+      textTrackCue.id = cue.id;
+      track.addCue(textTrackCue);
+    }
+  }
+}
+
 export function clearCurrentCues(track: TextTrack) {
   if (track?.cues) {
     // When track.mode is disabled, track.cues will be null.

diff --git a/src/utils/webvtt-parser.ts b/src/utils/webvtt-parser.ts
@@ -51,6 +51,16 @@ const hash = function (text: string) {
   return (hash >>> 0).toString();
 };
 
+// Create a unique hash id for a cue based on start/end times and text.
+// This helps timeline-controller to avoid showing repeated captions.
+export function generateCueId(
+  startTime: number,
+  endTime: number,
+  text: string
+) {
+  return hash(startTime.toString()) + hash(endTime.toString()) + hash(text);
+}
+
 const calculateOffset = function (vttCCs: VTTCCs, cc, presentationTime) {
   let currCC = vttCCs[cc];
   let prevCC = vttCCs[currCC.prevCC];
@@ -135,18 +145,14 @@ export function parseWebVTT(
       cue.endTime = startTime + duration;
     }
 
-    // If the cue was not assigned an id from the VTT file (line above the content),
-    // then create a unique hash id for a cue based on start/end times.
-    // This helps timeline-controller to avoid showing repeated captions.
+    // Fix encoding of special characters
+    cue.text = decodeURIComponent(encodeURIComponent(cue.text));
+
+    // If the cue was not assigned an id from the VTT file (line above the content), create one.
     if (!cue.id) {
-      cue.id =
-        hash(cue.startTime.toString()) +
-        hash(cue.endTime.toString()) +
-        hash(cue.text);
+      cue.id = generateCueId(cue.startTime, cue.endTime, cue.text);
     }
 
-    // Fix encoding of special characters
-    cue.text = decodeURIComponent(encodeURIComponent(cue.text));
     if (cue.endTime > 0) {
       cues.push(cue);
     }