yt-dlp · pukkandan · Feb 19, 2024 · Mar 27, 2023 · Mar 28, 2023 · Mar 27, 2023
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -3483,7 +3483,8 @@ def ffmpeg_fixup(cndn, msg, cls):
                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
                                      FFmpegFixupM3u8PP)
-                        ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
+                        ffmpeg_fixup(downloader == 'dashsegments'
+                                     and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
 
                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
@@ -247,6 +247,9 @@ class InfoExtractor:
                                  (For internal use only)
                                  * http_chunk_size Chunk size for HTTP downloads
                                  * ffmpeg_args     Extra arguments for ffmpeg downloader
+                    MPD formats can have the additional field:
+                    * is_dash_periods  True if the format is the result of merging
+                                 multiple DASH periods.
                     RTMP formats can also have the additional fields: page_url,
                     app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
                     rtmp_protocol, rtmp_real_time
@@ -2530,7 +2533,11 @@ def _extract_mpd_formats(self, *args, **kwargs):
             self._report_ignoring_subs('DASH')
         return fmts
 
-    def _extract_mpd_formats_and_subtitles(
+    def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
+        periods = self._extract_mpd_periods(*args, **kwargs)
+        return self._merge_mpd_periods(periods)
+
+    def _extract_mpd_periods(
             self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
             fatal=True, data=None, headers={}, query={}):
 
@@ -2543,26 +2550,55 @@ def _extract_mpd_formats_and_subtitles(
             errnote='Failed to download MPD manifest' if errnote is None else errnote,
             fatal=fatal, data=data, headers=headers, query=query)
         if res is False:
-            return [], {}
+            return []
         mpd_doc, urlh = res
         if mpd_doc is None:
-            return [], {}
+            return []
 
         # We could have been redirected to a new url when we retrieved our mpd file.
         mpd_url = urlh.url
         mpd_base_url = base_url(mpd_url)
 
-        return self._parse_mpd_formats_and_subtitles(
-            mpd_doc, mpd_id, mpd_base_url, mpd_url)
+        return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
     def _parse_mpd_formats(self, *args, **kwargs):
         fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
         if subs:
             self._report_ignoring_subs('DASH')
         return fmts
 
-    def _parse_mpd_formats_and_subtitles(
-            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
+        periods = self._parse_mpd_periods(*args, **kwargs)
+        return self._merge_mpd_periods(periods)
+
+    def _merge_mpd_periods(self, periods):
+        """
+        Combine all formats and subtitles from an MPD manifest into a single list,
+        by concatenate streams with similar formats.
+        """
+        # create merged formats
+        formats = {}
+        subtitles = {}
+        for period in periods:
+            for f in period['formats']:
+                # ignore period-specific parameters when grouping periods into formats
+                format_key = tuple(v for k, v in f.items() if k not in (
+                    ('format_id', 'fragments', 'manifest_stream_number')))
+                assert 'is_dash_periods' not in f, 'format already processed'
+                f['is_dash_periods'] = True
+                if format_key not in formats:
+                    # first period of this format
+                    formats[format_key] = f
+                elif 'fragments' in f:
+                    # follow-up period of a mergeable format
+                    formats[format_key].setdefault('fragments', []).extend(f['fragments'])
+
+            for sub_lang, sub_info in period['subtitles'].items():
+                subtitles.setdefault(sub_lang, []).extend(sub_info)
+
+        return list(formats.values()), subtitles
+
+    def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -2641,9 +2677,13 @@ def extract_Initialization(source):
             return ms_info
 
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
-        formats, subtitles = [], {}
         stream_numbers = collections.defaultdict(int)
-        for period in mpd_doc.findall(_add_ns('Period')):
+        for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
+            period_entry = {
+                'id': period.get('id', f'period-{period_idx}'),
+                'formats': [],
+                'subtitles': collections.defaultdict(list),
+            }
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
                 'start_number': 1,
@@ -2893,11 +2933,11 @@ def add_segment_url():
                     if content_type in ('video', 'audio', 'image/jpeg'):
                         f['manifest_stream_number'] = stream_numbers[f['url']]
                         stream_numbers[f['url']] += 1
-                        formats.append(f)
+                        period_entry['formats'].append(f)
                     elif content_type == 'text':
-                        subtitles.setdefault(lang or 'und', []).append(f)
-
-        return formats, subtitles
+                        period_entry['subtitles'][lang or 'und'].append(f)
+            if period_entry['formats'] or period_entry['subtitles']:
+                yield period_entry
 
     def _extract_ism_formats(self, *args, **kwargs):
         fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
@@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
             'title': 'A Family for the Holidays',
         },
         'skip': 'This video is only available for registered users'
+    }, {
+        'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
+        'info_dict': {
+            'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
+            'ext': 'mp4',
+            'title': 'S11 - Aflevering 1',
+            'episode': 'Episode 1',
+            'series': 'De Mol',
+            'season_number': 11,
+            'episode_number': 1,
+            'season': 'Season 11'
+        },
+        'params': {
+            'skip_download': True
+        },
+        'skip': 'This video is only available for registered users'
     }]
 
     _id_token = None
@@ -77,16 +93,39 @@ def _real_extract(self, url):
 
         api = self._download_json(
             f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
-            video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
+            video_id, headers={
+                'Authorization': 'Bearer %s' % self._id_token,
+                **self.geo_verification_headers(),
+            })
+
+        if 'manifestUrls' in api:
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+                api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
 
-        formats, subs = self._extract_m3u8_formats_and_subtitles(
-            api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
+        else:
+            if 'ssai' not in api:
+                raise ExtractorError('expecting Google SSAI stream')
+
+            ssai_content_source_id = api['ssai']['contentSourceID']
+            ssai_video_id = api['ssai']['videoID']
+
+            dai = self._download_json(
+                f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
+                video_id, data=b'{"api-key":"null"}',
+                headers={'content-type': 'application/json'})
+
+            periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
+
+            # skip pre-roll and mid-roll ads
+            periods = [p for p in periods if '-ad-' not in p['id']]
+
+            formats, subtitles = self._merge_mpd_periods(periods)
 
         info_dict.update({
             'id': video_id,
             'formats': formats,
+            'subtitles': subtitles,
         })
-
         return info_dict