Merge branch 'master' of https://github.com/yt-dlp/yt-dlp into ytdlp

* 'master' of https://github.com/yt-dlp/yt-dlp: [youtube] Add YoutubeStoriesIE (#3362) [cleanup] Misc fixes (see desc) [XAttrMetadata] Refactor and document dependencies [EmbedThumbnail] Do not obey `-k`
ytdl-patched · May 1, 2022 · 77931f3 · 77931f3
2 parents ca5c7e7 + 6e634cb
commit 77931f3
Show file tree

Hide file tree

Showing 14 changed files with 209 additions and 171 deletions.
diff --git a/README.md b/README.md
@@ -175,6 +175,7 @@ You'll get `DEBUG` column with tokens in arguments, and list of unmatched tokens
     * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
     * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
     * Download livestreams from the start using `--live-from-start` (experimental)
+    * Support for downloading stories (`ytstories:<channel UCID>`) 
 
 * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
 
@@ -346,6 +347,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
 * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
 * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
 * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
+* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata on Linux. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
 * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
 * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
 * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
@@ -390,10 +392,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
 <!-- Auto generated -->
 ## General Options:
     -h, --help                       Print this help text and exit
-    -V, --version                    Print program version and exit
-    -U, --update                     Update this program to latest version. Make
-                                     sure that you have sufficient permissions
-                                     (run with sudo if needed)
+    --version                        Print program version and exit
+    -U, --update                     Update this program to latest version
     -i, --ignore-errors              Ignore download and postprocessing errors.
                                      The download will be considered successful
                                      even if the postprocessing fails

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -3298,16 +3298,16 @@ def fixup():
                     if fixup_policy in ('ignore', 'never'):
                         return
                     elif fixup_policy == 'warn':
-                        do_fixup = False
+                        do_fixup = 'warn'
                     elif fixup_policy != 'force':
                         assert fixup_policy in ('detect_or_warn', None)
                         if not info_dict.get('__real_download'):
                             do_fixup = False
 
                     def ffmpeg_fixup(cndn, msg, cls):
-                        if not cndn:
+                        if not (do_fixup and cndn):
                             return
-                        if not do_fixup:
+                        elif do_fixup == 'warn':
                             self.report_warning(f'{vid}: {msg}')
                             return
                         pp = cls(self)
@@ -3455,6 +3455,17 @@ def filter_requested_info(info_dict, actually_filter=True):
         ''' Alias of sanitize_info for backward compatibility '''
         return YoutubeDL.sanitize_info(info_dict, actually_filter)
 
+    def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+        for filename in set(filter(None, files_to_delete)):
+            if msg:
+                self.to_screen(msg % filename)
+            try:
+                self.remove(filename)
+            except OSError:
+                self.report_warning(f'Unable to delete file {filename}')
+            if filename in info.get('__files_to_move', []):  # NB: Delete even if None
+                del info['__files_to_move'][filename]
+
     @staticmethod
     def post_extract(info_dict):
         def actual_post_extract(info_dict):
@@ -3487,14 +3498,8 @@ def run_pp(self, pp, infodict):
             for f in files_to_delete:
                 infodict['__files_to_move'].setdefault(f, '')
         else:
-            for old_filename in set(files_to_delete):
-                self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
-                try:
-                    self.remove(encodeFilename(old_filename))
-                except (IOError, OSError):
-                    self.report_warning('Unable to remove downloaded original file')
-                if old_filename in infodict['__files_to_move']:
-                    del infodict['__files_to_move'][old_filename]
+            self._delete_downloaded_files(
+                *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
         return infodict
 
     def run_all_pps(self, key, info, *, additional_pps=None):

diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py
@@ -78,6 +78,15 @@
 from .websocket import WebSocket
 
 
+try:
+    import xattr  # xattr or pyxattr
+except ImportError:
+    xattr = None
+else:
+    if hasattr(xattr, 'set'):  # pyxattr
+        xattr._yt_dlp__identifier = 'pyxattr'
+
+
 all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
 
 

diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
@@ -124,7 +124,7 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
             'ctx_id': ctx.get('ctx_id'),
             'unrecoverable_http_error': info_dict.get('unrecoverable_http_error'),
         }
-        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
+        success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
         if not success:
             return False
         if fragment_info_dict.get('filetime'):

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
@@ -2883,7 +2883,7 @@ def extract_Initialization(source):
                             content_type = 'video'
                         elif codecs['acodec'] != 'none':
                             content_type = 'audio'
-                        elif codecs.get('tcodec', 'none') != 'none':
+                        elif codecs.get('scodec', 'none') != 'none':
                             content_type = 'text'
                         elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
                             content_type = 'text'

diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
@@ -2162,6 +2162,7 @@
     YoutubeSearchURLIE,
     YoutubeMusicSearchURLIE,
     YoutubeSubscriptionsIE,
+    YoutubeStoriesIE,
     YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
     YoutubeYtBeIE,

diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
@@ -1,3 +1,4 @@
+import base64
 import calendar
 import copy
 import datetime
@@ -2210,7 +2211,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
             },
             'params': {'skip_download': True}
-        },
+        }, {
+            # Story. Requires specific player params to work.
+            # Note: stories get removed after some period of time
+            'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
+            'info_dict': {
+                'id': 'yN3x1t3sieA',
+                'ext': 'mp4',
+                'uploader': 'Linus Tech Tips',
+                'duration': 13,
+                'channel': 'Linus Tech Tips',
+                'playable_in_embed': True,
+                'tags': [],
+                'age_limit': 0,
+                'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
+                'upload_date': '20220402',
+                'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
+                'title': 'Story',
+                'live_status': 'not_live',
+                'uploader_id': 'LinusTechTips',
+                'view_count': int,
+                'description': '',
+                'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
+                'categories': ['Science & Technology'],
+                'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
+                'availability': 'unlisted',
+            }
+        }
     ]
 
     @classmethod
@@ -2861,12 +2888,17 @@ def extract_thread(contents):
             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
 
         continuation = self._extract_continuation(root_continuation_data)
-        message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
-        if message and not parent:
-            self.report_warning(message, video_id=video_id)
 
         response = None
+        is_forced_continuation = False
         is_first_continuation = parent is None
+        if is_first_continuation and not continuation:
+            # Sometimes you can get comments by generating the continuation yourself,
+            # even if YouTube initially reports them being disabled - e.g. stories comments.
+            # Note: if the comment section is actually disabled, YouTube may return a response with
+            # required check_get_keys missing. So we will disable that check initially in this case.
+            continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
+            is_forced_continuation = True
 
         for page_num in itertools.count(0):
             if not continuation:
@@ -2887,8 +2919,8 @@ def extract_thread(contents):
             response = self._extract_response(
                 item_id=None, query=continuation,
                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
-                check_get_keys='onResponseReceivedEndpoints')
-
+                check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+            is_forced_continuation = False
             continuation_contents = traverse_obj(
                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
 
@@ -2913,6 +2945,18 @@ def extract_thread(contents):
                 if continuation:
                     break
 
+        message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
+        if message and not parent and tracker['running_total'] == 0:
+            self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
+
+    @staticmethod
+    def _generate_comment_continuation(video_id):
+        """
+        Generates initial comment section continuation token from given video id
+        """
+        token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
+        return base64.b64encode(token.encode()).decode()
+
     def _get_comments(self, ytcfg, video_id, contents, webpage):
         """Entry for comment extraction"""
         def _real_comment_extract(contents):
@@ -2966,7 +3010,10 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
         headers = self.generate_api_headers(
             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client, hl=hl)
 
-        yt_query = {'videoId': video_id}
+        yt_query = {
+            'videoId': video_id,
+            'params': '8AEB'  # enable stories
+        }
         yt_query.update(self._generate_player_context(sts))
         return self._extract_response(
             item_id=video_id, ep='player', query=yt_query,
@@ -3327,7 +3374,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url,
         webpage = None
         if 'webpage' not in self._configuration_arg('player_skip'):
             webpage = self._download_webpage(
-                webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+                webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
 
         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
 
@@ -3788,7 +3835,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
             unified_strdate(get_first(microformats, 'uploadDate'))
             or unified_strdate(search_meta('uploadDate')))
         if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
-            upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
+            upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
         info['upload_date'] = upload_date
 
         for to, frm in fallbacks.items():
@@ -4303,7 +4350,7 @@ def _get_uncropped(url):
                 self._extract_visitor_data(data, ytcfg)),
             **metadata)
 
-    def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
+    def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
         first_id = last_id = response = None
         for page_num in itertools.count(1):
             videos = list(self._playlist_entries(playlist))
@@ -4313,9 +4360,6 @@ def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
             if start >= len(videos):
                 return
             for video in videos[start:]:
-                if video['id'] == first_id:
-                    self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
-                    return
                 yield video
             first_id = first_id or videos[0]['id']
             last_id = videos[-1]['id']
@@ -4347,13 +4391,18 @@ def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
         playlist_url = urljoin(url, try_get(
             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
             compat_str))
-        if playlist_url and playlist_url != url:
+
+        # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
+        # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
+        is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
+
+        if playlist_url and playlist_url != url and not is_known_unviewable:
             return self.url_result(
                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
                 video_title=title)
 
         return self.playlist_result(
-            self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
+            self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
             playlist_id=playlist_id, playlist_title=title)
 
     def _extract_availability(self, data):
@@ -5890,6 +5939,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
     }]
 
 
+class YoutubeStoriesIE(InfoExtractor):
+    IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
+    IE_NAME = 'youtube:stories'
+    _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
+    _TESTS = [{
+        'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = f'RLTD{self._match_id(url)}'
+        return self.url_result(
+            f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
+            ie=YoutubeTabIE, video_id=playlist_id)
+
+
 class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list

diff --git a/yt_dlp/options.py b/yt_dlp/options.py
@@ -240,7 +240,7 @@ def _dict_from_options_callback(
     general.add_option(
         '-U', '--update',
         action='store_true', dest='update_self',
-        help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+        help='Update this program to latest version')
     general.add_option(
         '-i', '--ignore-errors',
         action='store_true', dest='ignoreerrors',
@@ -1517,7 +1517,7 @@ def _dict_from_options_callback(
         dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
         help='Replace text in a metadata field using the given regex. This option can be used multiple times')
     postproc.add_option(
-        '--xattrs',
+        '--xattrs', '--xattr',
         action='store_true', dest='xattrs', default=False,
         help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
     postproc.add_option(

diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
@@ -96,6 +96,14 @@ def write_debug(self, text, *args, **kwargs):
         if self._downloader:
             return self._downloader.write_debug(text, *args, **kwargs)
 
+    def _delete_downloaded_files(self, *files_to_delete, **kwargs):
+        if not self._downloader:
+            import os
+            for filename in set(filter(None, files_to_delete)):
+                os.remove(filename)
+            return
+        return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)
+
     def get_param(self, name, default=None, *args, **kwargs):
         if self._downloader:
             return self._downloader.params.get(name, default, *args, **kwargs)

diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
@@ -220,11 +220,9 @@ def run(self, info):
             os.replace(temp_filename, filename)
 
         self.try_utime(filename, mtime, mtime)
-
-        files_to_delete = [thumbnail_filename]
-        if self._already_have_thumbnail:
-            if original_thumbnail == thumbnail_filename:
-                files_to_delete = []
-        elif original_thumbnail != thumbnail_filename:
-            files_to_delete.append(original_thumbnail)
-        return files_to_delete, info
+        converted = original_thumbnail != thumbnail_filename
+        self._delete_downloaded_files(
+            thumbnail_filename if converted or not self._already_have_thumbnail else None,
+            original_thumbnail if converted and not self._already_have_thumbnail else None,
+            info=info)
+        return [], info
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
@@ -433,7 +433,7 @@ def concat_files(self, in_files, out_file, concat_opts=None):
         self.real_run_ffmpeg(
             [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
             [(out_file, out_flags)])
-        os.remove(concat_file)
+        self._delete_downloaded_files(concat_file)
 
     @classmethod
     def _concat_spec(cls, in_files, concat_opts=None):
@@ -770,8 +770,7 @@ def run(self, info):
         self.run_ffmpeg_multiple_files(
             (filename, metadata_filename), temp_filename,
             itertools.chain(self._options(info['ext']), *options))
-        for file in filter(None, files_to_delete):
-            os.remove(file)  # Don't obey --keep-files
+        self._delete_downloaded_files(*files_to_delete)
         os.replace(temp_filename, filename)
         return [], info
 
@@ -1122,7 +1121,7 @@ def run(self, info):
             destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
             self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
         if in_file != info['filepath']:
-            os.remove(in_file)
+            self._delete_downloaded_files(in_file, msg=None)
         return [], info