Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/Turbo] Fix turbo extractor #8927

Merged
merged 11 commits into from Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion yt_dlp/extractor/_extractors.py
Expand Up @@ -2019,7 +2019,6 @@
TuneInPodcastEpisodeIE,
TuneInShortenerIE,
)
from .turbo import TurboIE
from .tv2 import (
TV2IE,
TV2ArticleIE,
Expand Down Expand Up @@ -2223,6 +2222,7 @@
VikiIE,
VikiChannelIE,
)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
Expand Down
64 changes: 0 additions & 64 deletions yt_dlp/extractor/turbo.py

This file was deleted.

60 changes: 60 additions & 0 deletions yt_dlp/extractor/viously.py
@@ -0,0 +1,60 @@
import base64
import re

from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
parse_iso8601,
)
from ..utils.traversal import traverse_obj


class ViouslyIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
'md5': '37a6c3381599381ff53a7e1e0575c0bc',
'info_dict': {
'id': 'F_xQzS2jwb3',
'ext': 'mp4',
'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
'age_limit': 0,
'upload_date': '20230328',
'timestamp': 1680037507,
'duration': 3716,
'categories': ['motors'],
}
}]

def _extract_from_webpage(self, url, webpage):
viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
if not viously_players:
return

def custom_decode(text):
STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
return data.decode('utf-8').strip('\x00')

for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
formats = self._extract_m3u8_formats(
f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
data = self._download_json(
f'https://www.viously.com/export/json/{video_id}', video_id,
transform_source=custom_decode, fatal=False)
if not formats or not data:
continue
nbr23 marked this conversation as resolved.
Show resolved Hide resolved
yield {
'id': video_id,
'formats': formats,
**traverse_obj(data, ('video', {
'title': 'title',
'description': 'description',
nbr23 marked this conversation as resolved.
Show resolved Hide resolved
'duration': ('duration', {int_or_none}),
'timestamp': ('iso_date', {parse_iso8601}),
'categories': ('category', {lambda x: [x['name']]}),
nbr23 marked this conversation as resolved.
Show resolved Hide resolved
})),
}