Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/yt-dlp/yt-dlp into ytdlp
Browse files Browse the repository at this point in the history
* 'master' of https://github.com/yt-dlp/yt-dlp:
  [youtube] Add YoutubeStoriesIE (#3362)
  [cleanup] Misc fixes (see desc)
  [XAttrMetadata] Refactor and document dependencies
  [EmbedThumbnail] Do not obey `-k`
  • Loading branch information
Lesmiscore committed May 1, 2022
2 parents ca5c7e7 + 6e634cb commit 77931f3
Show file tree
Hide file tree
Showing 14 changed files with 209 additions and 171 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ You'll get `DEBUG` column with tokens in arguments, and list of unmatched tokens
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
* Download livestreams from the start using `--live-from-start` (experimental)
* Support for downloading stories (`ytstories:<channel UCID>`)

* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`

Expand Down Expand Up @@ -346,6 +347,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata on Linux. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
Expand Down Expand Up @@ -390,10 +392,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
<!-- Auto generated -->
## General Options:
-h, --help Print this help text and exit
-V, --version Print program version and exit
-U, --update Update this program to latest version. Make
sure that you have sufficient permissions
(run with sudo if needed)
--version Print program version and exit
-U, --update Update this program to latest version
-i, --ignore-errors Ignore download and postprocessing errors.
The download will be considered successful
even if the postprocessing fails
Expand Down
27 changes: 16 additions & 11 deletions yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -3298,16 +3298,16 @@ def fixup():
if fixup_policy in ('ignore', 'never'):
return
elif fixup_policy == 'warn':
do_fixup = False
do_fixup = 'warn'
elif fixup_policy != 'force':
assert fixup_policy in ('detect_or_warn', None)
if not info_dict.get('__real_download'):
do_fixup = False

def ffmpeg_fixup(cndn, msg, cls):
if not cndn:
if not (do_fixup and cndn):
return
if not do_fixup:
elif do_fixup == 'warn':
self.report_warning(f'{vid}: {msg}')
return
pp = cls(self)
Expand Down Expand Up @@ -3455,6 +3455,17 @@ def filter_requested_info(info_dict, actually_filter=True):
''' Alias of sanitize_info for backward compatibility '''
return YoutubeDL.sanitize_info(info_dict, actually_filter)

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
for filename in set(filter(None, files_to_delete)):
if msg:
self.to_screen(msg % filename)
try:
self.remove(filename)
except OSError:
self.report_warning(f'Unable to delete file {filename}')
if filename in info.get('__files_to_move', []): # NB: Delete even if None
del info['__files_to_move'][filename]

@staticmethod
def post_extract(info_dict):
def actual_post_extract(info_dict):
Expand Down Expand Up @@ -3487,14 +3498,8 @@ def run_pp(self, pp, infodict):
for f in files_to_delete:
infodict['__files_to_move'].setdefault(f, '')
else:
for old_filename in set(files_to_delete):
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
try:
self.remove(encodeFilename(old_filename))
except (IOError, OSError):
self.report_warning('Unable to remove downloaded original file')
if old_filename in infodict['__files_to_move']:
del infodict['__files_to_move'][old_filename]
self._delete_downloaded_files(
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict

def run_all_pps(self, key, info, *, additional_pps=None):
Expand Down
9 changes: 9 additions & 0 deletions yt_dlp/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@
from .websocket import WebSocket


try:
import xattr # xattr or pyxattr
except ImportError:
xattr = None
else:
if hasattr(xattr, 'set'): # pyxattr
xattr._yt_dlp__identifier = 'pyxattr'


all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}


Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/downloader/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
'ctx_id': ctx.get('ctx_id'),
'unrecoverable_http_error': info_dict.get('unrecoverable_http_error'),
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
if fragment_info_dict.get('filetime'):
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2883,7 +2883,7 @@ def extract_Initialization(source):
content_type = 'video'
elif codecs['acodec'] != 'none':
content_type = 'audio'
elif codecs.get('tcodec', 'none') != 'none':
elif codecs.get('scodec', 'none') != 'none':
content_type = 'text'
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
content_type = 'text'
Expand Down
1 change: 1 addition & 0 deletions yt_dlp/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,7 @@
YoutubeSearchURLIE,
YoutubeMusicSearchURLIE,
YoutubeSubscriptionsIE,
YoutubeStoriesIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeYtBeIE,
Expand Down
95 changes: 80 additions & 15 deletions yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import calendar
import copy
import datetime
Expand Down Expand Up @@ -2210,7 +2211,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
},
'params': {'skip_download': True}
},
}, {
# Story. Requires specific player params to work.
# Note: stories get removed after some period of time
'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
'info_dict': {
'id': 'yN3x1t3sieA',
'ext': 'mp4',
'uploader': 'Linus Tech Tips',
'duration': 13,
'channel': 'Linus Tech Tips',
'playable_in_embed': True,
'tags': [],
'age_limit': 0,
'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
'upload_date': '20220402',
'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
'title': 'Story',
'live_status': 'not_live',
'uploader_id': 'LinusTechTips',
'view_count': int,
'description': '',
'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
'categories': ['Science & Technology'],
'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
'availability': 'unlisted',
}
}
]

@classmethod
Expand Down Expand Up @@ -2861,12 +2888,17 @@ def extract_thread(contents):
lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

continuation = self._extract_continuation(root_continuation_data)
message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
if message and not parent:
self.report_warning(message, video_id=video_id)

response = None
is_forced_continuation = False
is_first_continuation = parent is None
if is_first_continuation and not continuation:
# Sometimes you can get comments by generating the continuation yourself,
# even if YouTube initially reports them being disabled - e.g. stories comments.
# Note: if the comment section is actually disabled, YouTube may return a response with
# required check_get_keys missing. So we will disable that check initially in this case.
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
is_forced_continuation = True

for page_num in itertools.count(0):
if not continuation:
Expand All @@ -2887,8 +2919,8 @@ def extract_thread(contents):
response = self._extract_response(
item_id=None, query=continuation,
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
check_get_keys='onResponseReceivedEndpoints')

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
is_forced_continuation = False
continuation_contents = traverse_obj(
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

Expand All @@ -2913,6 +2945,18 @@ def extract_thread(contents):
if continuation:
break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
if message and not parent and tracker['running_total'] == 0:
self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

@staticmethod
def _generate_comment_continuation(video_id):
"""
Generates initial comment section continuation token from given video id
"""
token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
return base64.b64encode(token.encode()).decode()

def _get_comments(self, ytcfg, video_id, contents, webpage):
"""Entry for comment extraction"""
def _real_comment_extract(contents):
Expand Down Expand Up @@ -2966,7 +3010,10 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
headers = self.generate_api_headers(
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client, hl=hl)

yt_query = {'videoId': video_id}
yt_query = {
'videoId': video_id,
'params': '8AEB' # enable stories
}
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
Expand Down Expand Up @@ -3327,7 +3374,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url,
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
webpage = self._download_webpage(
webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

Expand Down Expand Up @@ -3788,7 +3835,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
unified_strdate(get_first(microformats, 'uploadDate'))
or unified_strdate(search_meta('uploadDate')))
if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
info['upload_date'] = upload_date

for to, frm in fallbacks.items():
Expand Down Expand Up @@ -4303,7 +4350,7 @@ def _get_uncropped(url):
self._extract_visitor_data(data, ytcfg)),
**metadata)

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
first_id = last_id = response = None
for page_num in itertools.count(1):
videos = list(self._playlist_entries(playlist))
Expand All @@ -4313,9 +4360,6 @@ def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
if start >= len(videos):
return
for video in videos[start:]:
if video['id'] == first_id:
self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
return
yield video
first_id = first_id or videos[0]['id']
last_id = videos[-1]['id']
Expand Down Expand Up @@ -4347,13 +4391,18 @@ def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str))
if playlist_url and playlist_url != url:

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

if playlist_url and playlist_url != url and not is_known_unviewable:
return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)

return self.playlist_result(
self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
playlist_id=playlist_id, playlist_title=title)

def _extract_availability(self, data):
Expand Down Expand Up @@ -5890,6 +5939,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
}]


class YoutubeStoriesIE(InfoExtractor):
IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
IE_NAME = 'youtube:stories'
_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
_TESTS = [{
'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
'only_matching': True,
}]

def _real_extract(self, url):
playlist_id = f'RLTD{self._match_id(url)}'
return self.url_result(
f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
ie=YoutubeTabIE, video_id=playlist_id)


class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list
Expand Down
4 changes: 2 additions & 2 deletions yt_dlp/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _dict_from_options_callback(
general.add_option(
'-U', '--update',
action='store_true', dest='update_self',
help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
help='Update this program to latest version')
general.add_option(
'-i', '--ignore-errors',
action='store_true', dest='ignoreerrors',
Expand Down Expand Up @@ -1517,7 +1517,7 @@ def _dict_from_options_callback(
dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
help='Replace text in a metadata field using the given regex. This option can be used multiple times')
postproc.add_option(
'--xattrs',
'--xattrs', '--xattr',
action='store_true', dest='xattrs', default=False,
help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
Expand Down
8 changes: 8 additions & 0 deletions yt_dlp/postprocessor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ def write_debug(self, text, *args, **kwargs):
if self._downloader:
return self._downloader.write_debug(text, *args, **kwargs)

def _delete_downloaded_files(self, *files_to_delete, **kwargs):
if not self._downloader:
import os
for filename in set(filter(None, files_to_delete)):
os.remove(filename)
return
return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)

def get_param(self, name, default=None, *args, **kwargs):
if self._downloader:
return self._downloader.params.get(name, default, *args, **kwargs)
Expand Down
14 changes: 6 additions & 8 deletions yt_dlp/postprocessor/embedthumbnail.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,9 @@ def run(self, info):
os.replace(temp_filename, filename)

self.try_utime(filename, mtime, mtime)

files_to_delete = [thumbnail_filename]
if self._already_have_thumbnail:
if original_thumbnail == thumbnail_filename:
files_to_delete = []
elif original_thumbnail != thumbnail_filename:
files_to_delete.append(original_thumbnail)
return files_to_delete, info
converted = original_thumbnail != thumbnail_filename
self._delete_downloaded_files(
thumbnail_filename if converted or not self._already_have_thumbnail else None,
original_thumbnail if converted and not self._already_have_thumbnail else None,
info=info)
return [], info
7 changes: 3 additions & 4 deletions yt_dlp/postprocessor/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def concat_files(self, in_files, out_file, concat_opts=None):
self.real_run_ffmpeg(
[(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
[(out_file, out_flags)])
os.remove(concat_file)
self._delete_downloaded_files(concat_file)

@classmethod
def _concat_spec(cls, in_files, concat_opts=None):
Expand Down Expand Up @@ -770,8 +770,7 @@ def run(self, info):
self.run_ffmpeg_multiple_files(
(filename, metadata_filename), temp_filename,
itertools.chain(self._options(info['ext']), *options))
for file in filter(None, files_to_delete):
os.remove(file) # Don't obey --keep-files
self._delete_downloaded_files(*files_to_delete)
os.replace(temp_filename, filename)
return [], info

Expand Down Expand Up @@ -1122,7 +1121,7 @@ def run(self, info):
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
if in_file != info['filepath']:
os.remove(in_file)
self._delete_downloaded_files(in_file, msg=None)
return [], info


Expand Down

0 comments on commit 77931f3

Please sign in to comment.