Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[extractor/medaltv] Fix medal.tv clips URL (#6489) #6502

Merged
merged 12 commits into from Mar 12, 2023
23 changes: 6 additions & 17 deletions yt_dlp/extractor/medaltv.py
Expand Up @@ -8,12 +8,12 @@
float_or_none,
int_or_none,
str_or_none,
traverse_obj,
traverse_obj
)


class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
Expand Down Expand Up @@ -80,25 +80,14 @@ class MedalTVIE(InfoExtractor):

def _real_extract(self, url):
video_id = self._match_id(url)
path = self._match_valid_url(url).group('path')

webpage = self._download_webpage(url, video_id)

next_data = self._search_json(
'<script[^>]*__NEXT_DATA__[^>]*>', webpage,
hydration_data = self._search_json(
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
'next data', video_id, end_pattern='</script>', fatal=False)

build_id = next_data.get('buildId')
if not build_id:
raise ExtractorError(
'Could not find build ID.', video_id=video_id)

locale = next_data.get('locale', 'en')

api_response = self._download_json(
f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)

clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
if not clip:
raise ExtractorError(
'Could not find video information.', video_id=video_id)
Expand Down Expand Up @@ -152,7 +141,7 @@ def add_item(container, item_url, height, id_key='format_id', item_id=None):

# Necessary because the id of the author is not known in advance.
# Won't raise an issue if no profile can be found as this is optional.
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
author_id = str_or_none(author.get('userId'))
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')

Expand Down