Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/francetv] Fix extraction #9333

Merged
merged 7 commits into from
Mar 2, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions yt_dlp/extractor/francetv.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
import urllib.parse

from .common import InfoExtractor
from .dailymotion import DailymotionIE
from ..utils import (
ExtractorError,
determine_ext,
filter_dict,
format_field,
int_or_none,
join_nonempty,
parse_iso8601,
parse_qs,
smuggle_url,
unsmuggle_url,
url_or_none,
)


class FranceTVBaseInfoExtractor(InfoExtractor):
def _make_url_result(self, video_or_full_id, catalog=None):
def _make_url_result(self, video_or_full_id, catalog=None, url=None):
full_id = 'francetv:%s' % video_or_full_id
if '@' not in video_or_full_id and catalog:
full_id += '@%s' % catalog
if url:
full_id = smuggle_url(full_id, {'origin': urllib.parse.urlparse(url).hostname})
return self.url_result(
full_id, ie=FranceTVIE.ie_key(),
video_id=video_or_full_id.split('@')[0])
Expand Down Expand Up @@ -76,7 +84,7 @@ class FranceTVIE(InfoExtractor):
'only_matching': True,
}]

def _extract_video(self, video_id, catalogue=None):
def _extract_video(self, video_id, catalogue=None, origin=None):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
Expand All @@ -94,10 +102,11 @@ def _extract_video(self, video_id, catalogue=None):
for device_type in ('desktop', 'mobile'):
dinfo = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading %s video JSON' % device_type, query={
video_id, 'Downloading %s video JSON' % device_type, query=filter_dict({
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
'domain': origin,
}), fatal=False)

if not dinfo:
continue
Expand Down Expand Up @@ -130,18 +139,17 @@ def _extract_video(self, video_id, catalogue=None):
subtitles = {}
for video in videos:
format_id = video.get('format')
video_url = url_or_none(video.get('url'))
if not video_url:
continue

video_url = None
if video.get('workflow') == 'token-akamai':
token_url = video.get('token')
if token_url:
if token_url := url_or_none(video.get('token')):
token_json = self._download_json(
token_url, video_id,
'Downloading signed %s manifest URL' % format_id)
if token_json:
video_url = token_json.get('url')
if not video_url:
video_url = video.get('url')
token_url, video_id, f'Downloading signed {format_id} manifest URL',
fatal=False, query={'format': 'json', 'url': video_url}) or {}
if tokenized_url := url_or_none(token_json.get('url')):
video_url = tokenized_url

ext = determine_ext(video_url)
if ext == 'f4m':
Expand Down Expand Up @@ -213,6 +221,7 @@ def _extract_video(self, video_id, catalogue=None):
}

def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
catalog = mobj.group('catalog')
Expand All @@ -224,7 +233,7 @@ def _real_extract(self, url):
if not video_id:
raise ExtractorError('Invalid URL', expected=True)

return self._extract_video(video_id, catalog)
return self._extract_video(video_id, catalog, origin=smuggled_data.get('origin'))


class FranceTVSiteIE(FranceTVBaseInfoExtractor):
Expand Down Expand Up @@ -314,7 +323,7 @@ def _real_extract(self, url):
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')

return self._make_url_result(video_id, catalogue)
return self._make_url_result(video_id, catalogue, url=url)


class FranceTVInfoIE(FranceTVBaseInfoExtractor):
Expand Down Expand Up @@ -405,4 +414,4 @@ def _real_extract(self, url):
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
webpage, 'video id')

return self._make_url_result(video_id)
return self._make_url_result(video_id, url=url)
Loading