Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/ArteTV] Separate forced subtitle tracks #9945

Merged
merged 9 commits into from
May 22, 2024
64 changes: 21 additions & 43 deletions yt_dlp/extractor/arte.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,6 @@ class ArteTVIE(ArteTVBaseIE):
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True,
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'info_dict': {
'id': '100103-000-A',
'title': 'USA: Dyskryminacja na porodówce',
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
'alt_title': 'ARTE Reportage',
'upload_date': '20201103',
'duration': 554,
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
'timestamp': 1604417980,
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'}
}, {
'note': 'No alt_title',
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
Expand All @@ -59,33 +45,21 @@ class ArteTVIE(ArteTVBaseIE):
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'only_matching': True,
}, {
'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
'info_dict': {
'id': '006785-000-A',
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
'title': 'The Element of Crime',
'timestamp': 1696111200,
'duration': 5849,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
'upload_date': '20230930',
'ext': 'mp4',
},
}, {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't remove this test unless you can add another age-restricted test video

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you tell me why ? This test fails as the video is no longer available on the website. May be I can comment it if you really want to keep it as a reminder...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are added to test specific code paths generally. If a test no longer functions, that path is no longer tested, and the test needs to be replaced. Its left in there as a reminder to be replaced

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So can I comment it, as a reminder, to avoid breaking the tests before to be able to execute my own test on captions ?

Copy link
Member

@bashonly bashonly May 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

restore the test and add 'skip': '404 Not Found' to it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Works great! I put the skipped test at the end of the tests, otherwise my test was skipped.

'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
'info_dict': {
'id': '085374-003-A',
'id': '109067-000-A',
'ext': 'mp4',
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
'timestamp': 1702872000,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
'duration': 2594,
'title': 'Die kurze Zeit der Jugend',
'alt_title': 'Im hohen Norden geboren',
'upload_date': '20231218',
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
'timestamp': 1713927600,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
'duration': 7599,
'title': 'La loi de Téhéran',
'alt_title': None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the correct expected value is None, it doesn't need to be in the test. If not, put the correct value here and let the test fail

Suggested change
'alt_title': None,

'upload_date': '20240424',
'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
'fr-partial': 'mincount:1',
},
},
}]
Expand Down Expand Up @@ -142,17 +116,21 @@ class ArteTVIE(ArteTVBaseIE):
def _fix_accessible_subs_locale(subs):
updated_subs = {}
for lang, sub_formats in subs.items():
for fmt in sub_formats:
if fmt.get('url', '').endswith('-MAL.m3u8'):
lang += '-acc'
updated_subs.setdefault(lang, []).append(fmt)
# print(lang, sub_formats)
for format in sub_formats:
_lang = lang
if format.get('url', '').endswith('-MAL.m3u8'):
_lang += '-acc'
elif not "_VO" in format.get('url', ''):
_lang += f"-partial"
updated_subs.setdefault(_lang, []).append(format)
vtexier marked this conversation as resolved.
Show resolved Hide resolved
return updated_subs

def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
langauge_code = self._LANG_MAP.get(lang)
language_code = self._LANG_MAP.get(lang)

config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18'
Expand Down Expand Up @@ -180,10 +158,10 @@ def _real_extract(self, url):
m = self._VERSION_CODE_RE.match(stream_version_code)
if m:
lang_pref = int(''.join('01'[x] for x in (
m.group('vlang') == langauge_code, # we prefer voice in the requested language
m.group('vlang') == language_code, # we prefer voice in the requested language
not m.group('audio_desc'), # and not the audio description version
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
not m.group('has_sub'), # but we prefer no subtitles otherwise
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
)))
Expand Down
Loading