From 5caf30dbc34f10b0be60676fece635b5c59f0d72 Mon Sep 17 00:00:00 2001 From: Audrey <45548254+tntmod54321@users.noreply.github.com> Date: Fri, 26 May 2023 08:24:39 -0400 Subject: [PATCH] [extractor/youtube] Extract `heatmap` data (#7100) Closes #3888 Authored by: tntmod54321 --- yt_dlp/extractor/common.py | 4 ++++ yt_dlp/extractor/youtube.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 78288f8091c..1b1dd560fd8 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -350,6 +350,10 @@ class InfoExtractor: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds * "title" (optional, string) + heatmap: A list of dictionaries, with the following entries: + * "start_time" - The start time of the data point in seconds + * "end_time" - The end time of the data point in seconds + * "value" - The normalized value of the data point (float between 0 and 1) playable_in_embed: Whether this video is allowed to play in embedded players on other sites. Can be True (=always allowed), False (=never allowed), None (=unknown), or a string diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 654bf5e6b63..80edcd77dac 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1273,6 +1273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', + 'heatmap': 'count:100', } }, { @@ -1426,6 +1427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'FlyingKitty', 'uploader_url': 'https://www.youtube.com/@FlyingKitty900', 'uploader_id': '@FlyingKitty900', + 'comment_count': int, }, }, { @@ -3244,6 +3246,17 @@ def _extract_chapters_from_engagement_panel(self, data, duration): chapter_time, chapter_title, duration) for contents in content_list)), []) + def _extract_heatmap_from_player_overlay(self, data): + content_list = traverse_obj(data, ( + 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar', + 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list})) + return next(filter(None, ( + traverse_obj(contents, (..., 'heatMarkerRenderer', { + 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}), + 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000}, + 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}), + })) for contents in content_list)), None) + def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') if not comment_id: @@ -4313,6 +4326,8 @@ def process_language(container, base_url, lang_code, sub_name, query): or self._extract_chapters_from_description(video_description, duration) or None) + info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data) + contents = traverse_obj( initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), expected_type=list, default=[])