From 2089ea50c4b13a50595183dec8bf4e3b4b1197a8 Mon Sep 17 00:00:00 2001
From: felix <felix.von.s@posteo.de>
Date: Sun, 23 May 2021 18:34:49 +0200
Subject: [PATCH 01/11] [downloader/mhtml] New downloader

This downloader is intended to be used for streams that consist of a
timed sequence of stand-alone images, such as slideshows or thumbnail
streams.
---
 yt_dlp/downloader/__init__.py |   2 +
 yt_dlp/downloader/mhtml.py    | 220 ++++++++++++++++++++++++++++++++++
 2 files changed, 222 insertions(+)
 create mode 100644 yt_dlp/downloader/mhtml.py
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
index c7ba918627a..82d7623f626 100644
--- a/yt_dlp/downloader/__init__.py
+++ b/yt_dlp/downloader/__init__.py
@@ -22,6 +22,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
 from .rtmp import RtmpFD
 from .rtsp import RtspFD
 from .ism import IsmFD
+from .mhtml import MhtmlFD
 from .niconico import NiconicoDmcFD
 from .youtube_live_chat import YoutubeLiveChatReplayFD
 from .external import (
@@ -39,6 +40,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
     'f4m': F4mFD,
     'http_dash_segments': DashSegmentsFD,
     'ism': IsmFD,
+    'mhtml': MhtmlFD,
     'niconico_dmc': NiconicoDmcFD,
     'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
 }
diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
new file mode 100644
index 00000000000..365e7ddb77a
--- /dev/null
+++ b/yt_dlp/downloader/mhtml.py
@@ -0,0 +1,220 @@
+# coding: utf-8
+from __future__ import unicode_literals
+from .fragment import FragmentFD
+import quopri
+import uuid
+import io
+import re
+from ..version import __version__ as YT_DLP_VERSION
+from ..utils import (
+    urljoin,
+    srt_subtitles_timecode,
+    formatSeconds,
+)
+
+
+class MhtmlFD(FragmentFD):
+    FD_NAME = 'mhtml'
+    _EXTENSION = 'mhtml'
+
+    _STYLESHEET = """\
+html, body {
+    margin: 0;
+    padding: 0;
+    height: 100vh;
+}
+
+html {
+    overflow-y: scroll;
+    scroll-snap-type: y mandatory;
+}
+
+body {
+    scroll-snap-type: y mandatory;
+    display: flex;
+    flex-flow: column;
+}
+
+body > figure {
+    max-width: 100vw;
+    max-height: 100vh;
+    scroll-snap-align: center;
+}
+
+body > figure > figcaption {
+    text-align: center;
+    height: 2.5em;
+}
+
+body > figure > img {
+    display: block;
+    margin: auto;
+    max-width: 100%;
+    max-height: calc(100vh - 5em);
+}
+"""
+    _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
+    _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
+
+    @staticmethod
+    def _escape_html(text):
+        return (
+            text
+            .replace('&', '&amp;')
+            .replace('<', '&lt;')
+            .replace('>', '&gt;')
+            .replace('"', '&quot;')
+            .replace("'", '&#39;')
+        )
+
+    @staticmethod
+    def _escape_mime(s):
+        return '=?utf-8?Q?' + (b''.join(
+            bytes((b,)) if b >= 0x20 else b'=%02X' % b
+            for b in quopri.encodestring(s.encode('utf-8'), header=True)
+        )).decode('us-ascii') + '?='
+
+    def _gen_cid(self, i, fragment, frag_boundary):
+        return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
+
+    def _gen_stub(self, *, fragments, frag_boundary, title):
+        output = io.StringIO()
+
+        output.write((
+            '<!DOCTYPE html>'
+            '<html>'
+            '<head>'
+            ''  '<meta name="generator" content="yt-dlp {version}">'
+            ''  '<title>{title}</title>'
+            ''  '<style>{styles}</style>'
+            '<body>'
+        ).format(
+            version=self._escape_html(YT_DLP_VERSION),
+            styles=self._STYLESHEET,
+            title=self._escape_html(title)
+        ))
+
+        t0 = 0
+        for i, frag in enumerate(fragments):
+            output.write('<figure>')
+            try:
+                t1 = t0 + frag['duration']
+                output.write((
+                    '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
+                ).format(
+                    num=i + 1,
+                    t0=srt_subtitles_timecode(t0),
+                    t1=srt_subtitles_timecode(t1),
+                    duration=formatSeconds(frag['duration'])
+                ))
+            except (KeyError, ValueError, TypeError):
+                t1 = None
+                output.write((
+                    '<figcaption>Slide #{num}</figcaption>'
+                ).format(num=i + 1))
+            output.write('<img src="cid:{cid}">'.format(
+                cid=self._gen_cid(i, frag, frag_boundary)))
+            output.write('</figure>')
+            t0 = t1
+
+        return output.getvalue()
+
+    def real_download(self, filename, info_dict):
+        fragment_base_url = info_dict.get('fragment_base_url')
+        fragments = info_dict['fragments'][:1] if self.params.get(
+            'test', False) else info_dict['fragments']
+        title = info_dict['_download_params'].get('title', fragment_base_url)
+        origin = info_dict['_download_params'].get('origin')
+
+        ctx = {
+            'filename': filename,
+            'total_frags': len(fragments),
+        }
+
+        self._prepare_and_start_frag_download(ctx)
+
+        extra_state = ctx.setdefault('extra_state', {
+            'header_written': False,
+            'mime_boundary': str(uuid.uuid4()).replace('-', ''),
+        })
+
+        frag_boundary = extra_state['mime_boundary']
+
+        if not extra_state['header_written']:
+            stub = self._gen_stub(
+                fragments=fragments,
+                frag_boundary=frag_boundary,
+                title=title
+            )
+
+            ctx['dest_stream'].write((
+                'MIME-Version: 1.0\r\n'
+                'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
+                'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
+                'Subject: {title}\r\n'
+                'Content-type: multipart/related; '
+                ''  'boundary="{boundary}"; '
+                ''  'type="text/html"\r\n'
+                'X.yt-dlp.Origin: {origin}\r\n'
+                '\r\n'
+                '--{boundary}\r\n'
+                'Content-Type: text/html; charset=utf-8\r\n'
+                'Content-Length: {length}\r\n'
+                '\r\n'
+                '{stub}\r\n'
+            ).format(
+                origin=origin,
+                boundary=frag_boundary,
+                length=len(stub),
+                title=self._escape_mime(title),
+                stub=stub
+            ).encode('utf-8'))
+            extra_state['header_written'] = True
+
+        for i, fragment in enumerate(fragments):
+            if (i + 1) <= ctx['fragment_index']:
+                continue
+
+            fragment_url = urljoin(fragment_base_url, fragment['path'])
+            success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+            if not success:
+                continue
+
+            mime_type = b'image/jpeg'
+            if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
+                mime_type = b'image/png'
+            if frag_content.startswith((b'GIF87a', b'GIF89a')):
+                mime_type = b'image/gif'
+            if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
+                mime_type = b'image/webp'
+
+            frag_header = io.BytesIO()
+            frag_header.write(
+                b'--%b\r\n'
+                % (frag_boundary.encode('us-ascii'),))
+            frag_header.write(
+                b'Content-ID: <%b>\r\n'
+                % (self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'),))
+            frag_header.write(
+                b'Content-type: %b\r\n'
+                % (mime_type,))
+            frag_header.write(
+                b'Content-length: %u\r\n'
+                % (len(frag_content),))
+            frag_header.write(
+                b'Content-location: %b\r\n'
+                % (fragment_url.encode('us-ascii'),))
+            try:
+                frag_header.write(
+                    b'X.yt-dlp.Duration: %f s\r\n'
+                    % (fragment['duration'],))
+            except KeyError:
+                pass
+            frag_header.write(b'\r\n')
+            self._append_fragment(
+                ctx, frag_header.getvalue() + frag_content + b'\r\n')
+
+        ctx['dest_stream'].write(
+            b'--%b--\r\n\r\n'
+            % (frag_boundary.encode('us-ascii'),))
+        self._finish_frag_download(ctx)

From c5df915b3ce0ccbcba30b192775b64ba0a670d17 Mon Sep 17 00:00:00 2001
From: felix <felix.von.s@posteo.de>
Date: Sun, 23 May 2021 18:34:53 +0200
Subject: [PATCH 02/11] [mediasite] Extract slides

---
 yt_dlp/extractor/mediasite.py | 67 ++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index 5d083a1cd1c..f1956c65e0d 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -122,6 +122,58 @@ def _extract_urls(webpage):
                 r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
                 webpage)]
 
+    def __extract_slides(self, *, stream_id, snum, Stream, duration, images, title, origin):
+        slide_base_url = Stream['SlideBaseUrl']
+
+        fname_template = Stream['SlideImageFileNameTemplate']
+        if fname_template != 'slide_{0:D4}.jpg':
+            self.report_warning('Unusual slide file name template; report a bug if slide downloading fails')
+        fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template)
+
+        fragments = []
+        for i, slide in enumerate(Stream['Slides']):
+            if i == 0:
+                if slide['Time'] > 0:
+                    default_slide = images.get('DefaultSlide')
+                    if default_slide is None:
+                        default_slide = images.get('DefaultStreamImage')
+                    if default_slide is not None:
+                        default_slide = default_slide['ImageFilename']
+                    if default_slide is not None:
+                        fragments.append({
+                            'path': default_slide,
+                            'duration': slide['Time'] / 1000,
+                        })
+
+            next_time = try_get(None, [
+                lambda _: Stream['Slides'][i + 1]['Time'],
+                lambda _: duration,
+                lambda _: slide['Time'],
+            ], expected_type=(int, float))
+
+            fragments.append({
+                'path': fname_template.format(slide.get('Number', i + 1)),
+                'duration': (next_time - slide['Time']) / 1000
+            })
+
+        return {
+            'format_id': '%s-%u.slides' % (stream_id, snum),
+            'ext': 'mhtml',
+            'url': slide_base_url,
+            'protocol': 'mhtml',
+            'acodec': 'none',
+            'vcodec': 'jpeg',
+            'quality': -12,
+            'format_note': 'Slides',
+            'fragments': fragments,
+            'fragment_base_url': slide_base_url,
+            '_download_params': {
+                'duration': duration,
+                'title': title,
+                'origin': origin,
+            },
+        }
+
     def _real_extract(self, url):
         url, data = unsmuggle_url(url, {})
         mobj = re.match(self._VALID_URL, url)
@@ -198,10 +250,17 @@ def _real_extract(self, url):
                         'ext': mimetype2ext(VideoUrl.get('MimeType')),
                     })
 
-            # TODO: if Stream['HasSlideContent']:
-            # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
-            # from Stream['Slides']
-            # this will require writing a custom downloader...
+            if Stream.get('HasSlideContent', False):
+                images = player_options['PlayerLayoutOptions']['Images']
+                stream_formats.append(self.__extract_slides(
+                    stream_id=stream_id,
+                    snum=snum,
+                    Stream=Stream,
+                    duration=presentation.get('Duration'),
+                    images=images,
+                    title=title,
+                    origin=url
+                ))
 
             # disprefer 'secondary' streams
             if stream_type != 0:

From 89124e63a906b06ef08a19980832ec2493d4d545 Mon Sep 17 00:00:00 2001
From: felix <felix.von.s@posteo.de>
Date: Sun, 23 May 2021 20:54:00 +0200
Subject: [PATCH 03/11] [common] Extract thumbnail stream from DASH manifests

---
 yt_dlp/extractor/common.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 64ab8f7062f..74c63fb23a3 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2122,6 +2122,7 @@ def extract_media(x_media_line):
                         format_id.append(str(format_index))
                     f = {
                         'format_id': '-'.join(format_id),
+                        'format_note': name,
                         'format_index': format_index,
                         'url': manifest_url,
                         'manifest_url': m3u8_url,
@@ -2633,7 +2634,7 @@ def extract_Initialization(source):
                     mime_type = representation_attrib['mimeType']
                     content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
 
-                    if content_type in ('video', 'audio', 'text'):
+                    if content_type in ('video', 'audio', 'text') or mime_type == 'image/jpeg':
                         base_url = ''
                         for element in (representation, adaptation_set, period, mpd_doc):
                             base_url_e = element.find(_add_ns('BaseURL'))
@@ -2650,9 +2651,15 @@ def extract_Initialization(source):
                         url_el = representation.find(_add_ns('BaseURL'))
                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                         bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                        if representation_id is not None:
+                            format_id = representation_id
+                        else:
+                            format_id = content_type
+                        if mpd_id:
+                            format_id = mpd_id + '-' + format_id
                         if content_type in ('video', 'audio'):
                             f = {
-                                'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+                                'format_id': format_id,
                                 'manifest_url': mpd_url,
                                 'ext': mimetype2ext(mime_type),
                                 'width': int_or_none(representation_attrib.get('width')),
@@ -2672,6 +2679,16 @@ def extract_Initialization(source):
                                 'manifest_url': mpd_url,
                                 'filesize': filesize,
                             }
+                        elif mime_type == 'image/jpeg':
+                            f = {
+                                'format_id': format_id,
+                                'ext': 'mhtml',
+                                'manifest_url': mpd_url,
+                                'format_note': 'DASH thumbnail',
+                                'preference': -10,
+                                'acodec': 'none',
+                                'vcodec': 'jpeg',
+                            }
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
                         def prepare_template(template_name, identifiers):
@@ -2690,7 +2707,8 @@ def prepare_template(template_name, identifiers):
                                     t += c
                             # Next, $...$ templates are translated to their
                             # %(...) counterparts to be used with % operator
-                            t = t.replace('$RepresentationID$', representation_id)
+                            if representation_id is not None:
+                                t = t.replace('$RepresentationID$', representation_id)
                             t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
                             t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
                             t.replace('$$', '$')
@@ -2807,7 +2825,7 @@ def add_segment_url():
                                 'url': mpd_url or base_url,
                                 'fragment_base_url': base_url,
                                 'fragments': [],
-                                'protocol': 'http_dash_segments',
+                                'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
                             })
                             if 'initialization_url' in representation_ms_info:
                                 initialization_url = representation_ms_info['initialization_url']
@@ -2818,7 +2836,7 @@ def add_segment_url():
                         else:
                             # Assuming direct URL to unfragmented media.
                             f['url'] = base_url
-                        if content_type in ('video', 'audio'):
+                        if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
                             formats.append(f)
                         elif content_type == 'text':
                             subtitles.setdefault(lang or 'und', []).append(f)

From 407878b327ec53c386d406ad696166d122f34a28 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 7 Jun 2021 13:07:02 +0530
Subject: [PATCH 04/11] Remove "Unknown MIME" warnings from tests

---
 yt_dlp/extractor/canvas.py | 6 +++---
 yt_dlp/extractor/common.py | 2 ++
 yt_dlp/extractor/viki.py   | 5 +----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py
index 1b7c1d2ff7f..575f3d25cbb 100644
--- a/yt_dlp/extractor/canvas.py
+++ b/yt_dlp/extractor/canvas.py
@@ -24,7 +24,7 @@ class CanvasIE(InfoExtractor):
     _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
-        'md5': '68993eda72ef62386a15ea2cf3c93107',
+        'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
         'info_dict': {
             'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
             'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
@@ -32,9 +32,9 @@ class CanvasIE(InfoExtractor):
             'title': 'Nachtwacht: De Greystook',
             'description': 'Nachtwacht: De Greystook',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 1468.04,
+            'duration': 1468.02,
         },
-        'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+        'expected_warnings': ['is not a supported codec'],
     }, {
         'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
         'only_matching': True,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 74c63fb23a3..f2ddb186aa5 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2680,6 +2680,8 @@ def extract_Initialization(source):
                                 'filesize': filesize,
                             }
                         elif mime_type == 'image/jpeg':
+                            # See test case in VikiIE
+                            # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
                             f = {
                                 'format_id': format_id,
                                 'ext': 'mhtml',
diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py
index 98d16f4d134..19bcf1d7be7 100644
--- a/yt_dlp/extractor/viki.py
+++ b/yt_dlp/extractor/viki.py
@@ -142,6 +142,7 @@ class VikiIE(VikiBaseIE):
     IE_NAME = 'viki'
     _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
     _TESTS = [{
+        'note': 'Free non-DRM video with storyboards in MPD',
         'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
         'info_dict': {
             'id': '1175236v',
@@ -155,7 +156,6 @@ class VikiIE(VikiBaseIE):
         'params': {
             'format': 'bestvideo',
         },
-        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
         'info_dict': {
@@ -173,7 +173,6 @@ class VikiIE(VikiBaseIE):
             'format': 'bestvideo',
         },
         'skip': 'Blocked in the US',
-        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
         # clip
         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
@@ -225,7 +224,6 @@ class VikiIE(VikiBaseIE):
         'params': {
             'format': 'bestvideo',
         },
-        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
         # youtube external
         'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@@ -264,7 +262,6 @@ class VikiIE(VikiBaseIE):
         'params': {
             'format': 'bestvideo',
         },
-        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }]
 
     def _real_extract(self, url):

From 5f323f1f512f497fb4ca4fcd2d063f76a57dd933 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 7 Jun 2021 14:12:57 +0530
Subject: [PATCH 05/11] Remove `_download_params`

---
 yt_dlp/downloader/mhtml.py    | 4 ++--
 yt_dlp/extractor/mediasite.py | 9 +--------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index 365e7ddb77a..e141ac52f43 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -123,8 +123,8 @@ def real_download(self, filename, info_dict):
         fragment_base_url = info_dict.get('fragment_base_url')
         fragments = info_dict['fragments'][:1] if self.params.get(
             'test', False) else info_dict['fragments']
-        title = info_dict['_download_params'].get('title', fragment_base_url)
-        origin = info_dict['_download_params'].get('origin')
+        title = info_dict['title']
+        origin = info_dict['webpage_url']
 
         ctx = {
             'filename': filename,
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index f1956c65e0d..6ec2e414f3e 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -122,7 +122,7 @@ def _extract_urls(webpage):
                 r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
                 webpage)]
 
-    def __extract_slides(self, *, stream_id, snum, Stream, duration, images, title, origin):
+    def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
         slide_base_url = Stream['SlideBaseUrl']
 
         fname_template = Stream['SlideImageFileNameTemplate']
@@ -167,11 +167,6 @@ def __extract_slides(self, *, stream_id, snum, Stream, duration, images, title,
             'format_note': 'Slides',
             'fragments': fragments,
             'fragment_base_url': slide_base_url,
-            '_download_params': {
-                'duration': duration,
-                'title': title,
-                'origin': origin,
-            },
         }
 
     def _real_extract(self, url):
@@ -258,8 +253,6 @@ def _real_extract(self, url):
                     Stream=Stream,
                     duration=presentation.get('Duration'),
                     images=images,
-                    title=title,
-                    origin=url
                 ))
 
             # disprefer 'secondary' streams

From bda743f52ebd11874e131a5ffb207176eab16fc9 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 7 Jun 2021 13:16:25 +0530
Subject: [PATCH 06/11] Add `msec` to duration

---
 yt_dlp/downloader/mhtml.py | 2 +-
 yt_dlp/utils.py            | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index e141ac52f43..0fdd048162f 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -105,7 +105,7 @@ def _gen_stub(self, *, fragments, frag_boundary, title):
                     num=i + 1,
                     t0=srt_subtitles_timecode(t0),
                     t1=srt_subtitles_timecode(t1),
-                    duration=formatSeconds(frag['duration'])
+                    duration=formatSeconds(frag['duration'], msec=True)
                 ))
             except (KeyError, ValueError, TypeError):
                 t1 = None
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 72fd8a0e7dd..3516ac950cf 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2321,13 +2321,14 @@ def decodeOption(optval):
     return optval
 
 
-def formatSeconds(secs, delim=':'):
+def formatSeconds(secs, delim=':', msec=False):
     if secs > 3600:
-        return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
+        ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
     elif secs > 60:
-        return '%d%s%02d' % (secs // 60, delim, secs % 60)
+        ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
     else:
-        return '%d' % secs
+        ret = '%d' % secs
+    return '%s.%03d' % (ret, secs % 1) if msec else ret
 
 
 def make_HTTPS_handler(params, **kwargs):

From 4b87c0ebc15d06a93f01f1891b4c5925bc771e34 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 7 Jun 2021 13:38:08 +0530
Subject: [PATCH 07/11] `escapeHTML` is a utility

---
 yt_dlp/downloader/mhtml.py | 15 ++-------------
 yt_dlp/utils.py            |  9 +++++++++
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index 0fdd048162f..6c67229f92a 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -56,17 +56,6 @@ class MhtmlFD(FragmentFD):
     _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
     _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
 
-    @staticmethod
-    def _escape_html(text):
-        return (
-            text
-            .replace('&', '&amp;')
-            .replace('<', '&lt;')
-            .replace('>', '&gt;')
-            .replace('"', '&quot;')
-            .replace("'", '&#39;')
-        )
-
     @staticmethod
     def _escape_mime(s):
         return '=?utf-8?Q?' + (b''.join(
@@ -89,9 +78,9 @@ def _gen_stub(self, *, fragments, frag_boundary, title):
             ''  '<style>{styles}</style>'
             '<body>'
         ).format(
-            version=self._escape_html(YT_DLP_VERSION),
+            version=escapeHTML(YT_DLP_VERSION),
             styles=self._STYLESHEET,
-            title=self._escape_html(title)
+            title=escapeHTML(title)
         ))
 
         t0 = 0
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 3516ac950cf..0e6c8151443 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2241,6 +2241,15 @@ def unescapeHTML(s):
     return re.sub(
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
+def escapeHTML(text):
+    return (
+        text
+        .replace('&', '&amp;')
+        .replace('<', '&lt;')
+        .replace('>', '&gt;')
+        .replace('"', '&quot;')
+        .replace("'", '&#39;')
+    )
 
 def process_communicate_or_kill(p, *args, **kwargs):
     try:

From 891ab6169817ef335883a2343de4414e89561278 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 7 Jun 2021 13:31:27 +0530
Subject: [PATCH 08/11] [cleanup]

---
 yt_dlp/downloader/mhtml.py | 42 +++++++++++++++-----------------------
 yt_dlp/utils.py            |  2 ++
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index 6c67229f92a..9ebf3654986 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -1,21 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from .fragment import FragmentFD
-import quopri
-import uuid
+
 import io
+import quopri
 import re
-from ..version import __version__ as YT_DLP_VERSION
+import uuid
+
+from .fragment import FragmentFD
 from ..utils import (
-    urljoin,
-    srt_subtitles_timecode,
+    escapeHTML,
     formatSeconds,
+    srt_subtitles_timecode,
+    urljoin,
 )
+from ..version import __version__ as YT_DLP_VERSION
 
 
 class MhtmlFD(FragmentFD):
     FD_NAME = 'mhtml'
-    _EXTENSION = 'mhtml'
 
     _STYLESHEET = """\
 html, body {
@@ -179,31 +181,21 @@ def real_download(self, filename, info_dict):
 
             frag_header = io.BytesIO()
             frag_header.write(
-                b'--%b\r\n'
-                % (frag_boundary.encode('us-ascii'),))
+                b'--%b\r\n' % frag_boundary.encode('us-ascii'))
             frag_header.write(
-                b'Content-ID: <%b>\r\n'
-                % (self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'),))
+                b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
             frag_header.write(
-                b'Content-type: %b\r\n'
-                % (mime_type,))
+                b'Content-type: %b\r\n' % mime_type)
             frag_header.write(
-                b'Content-length: %u\r\n'
-                % (len(frag_content),))
+                b'Content-length: %u\r\n' % len(frag_content))
             frag_header.write(
-                b'Content-location: %b\r\n'
-                % (fragment_url.encode('us-ascii'),))
-            try:
-                frag_header.write(
-                    b'X.yt-dlp.Duration: %f s\r\n'
-                    % (fragment['duration'],))
-            except KeyError:
-                pass
+                b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
+            frag_header.write(
+                b'X.yt-dlp.Duration: %f\r\n' % fragment['duration'])
             frag_header.write(b'\r\n')
             self._append_fragment(
                 ctx, frag_header.getvalue() + frag_content + b'\r\n')
 
         ctx['dest_stream'].write(
-            b'--%b--\r\n\r\n'
-            % (frag_boundary.encode('us-ascii'),))
+            b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
         self._finish_frag_download(ctx)
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 0e6c8151443..8bc4c27e5de 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2241,6 +2241,7 @@ def unescapeHTML(s):
     return re.sub(
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
+
 def escapeHTML(text):
     return (
         text
@@ -2251,6 +2252,7 @@ def escapeHTML(text):
         .replace("'", '&#39;')
     )
 
+
 def process_communicate_or_kill(p, *args, **kwargs):
     try:
         return p.communicate(*args, **kwargs)

From 58ca4c9d256afee128b1b4b6ab716eaea2cbe71e Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 13 Jun 2021 02:33:00 +0530
Subject: [PATCH 09/11] [mhtml] `real_download` should return `True` on success

---
 yt_dlp/downloader/mhtml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py
index 9ebf3654986..81d95c7cbef 100644
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@@ -199,3 +199,4 @@ def real_download(self, filename, info_dict):
         ctx['dest_stream'].write(
             b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
         self._finish_frag_download(ctx)
+        return True

From a662714561db1800ceea0d2bfcc165238234fcb3 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 13 Jun 2021 02:34:14 +0530
Subject: [PATCH 10/11] Remove `vcodec`

---
 yt_dlp/extractor/common.py    | 5 ++---
 yt_dlp/extractor/mediasite.py | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index f2ddb186aa5..8f916e1b0ba 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2686,10 +2686,9 @@ def extract_Initialization(source):
                                 'format_id': format_id,
                                 'ext': 'mhtml',
                                 'manifest_url': mpd_url,
-                                'format_note': 'DASH thumbnail',
-                                'preference': -10,
+                                'format_note': 'DASH storyboards (jpeg)',
                                 'acodec': 'none',
-                                'vcodec': 'jpeg',
+                                'vcodec': 'none',
                             }
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index 6ec2e414f3e..a18cfc689f0 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -162,9 +162,8 @@ def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
             'url': slide_base_url,
             'protocol': 'mhtml',
             'acodec': 'none',
-            'vcodec': 'jpeg',
-            'quality': -12,
-            'format_note': 'Slides',
+            'vcodec': 'none',
+            'format_note': 'Slides (jpeg)',
             'fragments': fragments,
             'fragment_base_url': slide_base_url,
         }

From b4fa676a591bb4cf65323c3e8161a12ab48c1191 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 13 Jun 2021 17:37:37 +0530
Subject: [PATCH 11/11] The images are not necessarily jpeg

---
 yt_dlp/extractor/mediasite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index a18cfc689f0..c62233ab7c7 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -163,7 +163,7 @@ def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
             'protocol': 'mhtml',
             'acodec': 'none',
             'vcodec': 'none',
-            'format_note': 'Slides (jpeg)',
+            'format_note': 'Slides',
             'fragments': fragments,
             'fragment_base_url': slide_base_url,
         }