Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/ORFTVthek] Remove extractor #10011

Merged
merged 3 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,6 @@
)
from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
ORFONIE,
ORFRadioIE,
Expand Down
183 changes: 3 additions & 180 deletions yt_dlp/extractor/orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,204 +3,24 @@
import re

from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
make_archive_id,
mimetype2ext,
orderedSet,
parse_age_limit,
remove_end,
smuggle_url,
strip_jsonp,
try_call,
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_or_none,
)
from ..utils.traversal import traverse_obj


class ORFTVthekIE(InfoExtractor):
IE_NAME = 'orf:tvthek'
IE_DESC = 'ORF TVthek'
_VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'

_TESTS = [{
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
'info_dict': {
'id': '14121079',
},
'playlist_count': 11,
'params': {'noplaylist': True}
}, {
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
'info_dict': {
'id': '14121079',
},
'playlist_count': 1,
'params': {'playlist_items': '5'}
}, {
'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
'info_dict': {
'id': '14121079',
},
'playlist': [{
'info_dict': {
'id': '15083150',
'ext': 'mp4',
'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
}
}],
'playlist_count': 1,
'params': {'noplaylist': True, 'skip_download': 'm3u8'}
}, {
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
'playlist': [{
'md5': '2942210346ed779588f428a92db88712',
'info_dict': {
'id': '8896777',
'ext': 'mp4',
'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
'description': 'md5:c1272f0245537812d4e36419c207b67d',
'duration': 2668,
'upload_date': '20141208',
},
}],
'skip': 'Blocked outside of Austria / Germany',
}, {
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
'info_dict': {
'id': '7982259',
'ext': 'mp4',
'title': 'Best of Ingrid Thurnher',
'upload_date': '20140527',
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
},
'params': {
'skip_download': True, # rtsp downloads
},
'skip': 'Blocked outside of Austria / Germany',
}, {
'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
'only_matching': True,
}, {
'url': 'http://tvthek.orf.at/profile/Universum/35429',
'only_matching': True,
}]

def _pagefunc(self, url, data_jsb, n, *, image=None):
sd = data_jsb[n]
video_id, title = str(sd['id']), sd['title']
formats = []
for fd in sd['sources']:
src = url_or_none(fd.get('src'))
if not src:
continue
format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
ext = determine_ext(src)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
if any('/geoprotection' in f['url'] for f in m3u8_formats):
self.raise_geo_restricted()
formats.extend(m3u8_formats)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
else:
formats.append({
'format_id': format_id,
'url': src,
'protocol': fd.get('protocol'),
})

# Check for geoblocking.
# There is a property is_geoprotection, but that's always false
geo_str = sd.get('geoprotection_string')
http_url = next(
(f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
None) if geo_str else None
if http_url:
self._request_webpage(
HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')

subtitles = {}
for sub in sd.get('subtitles', []):
sub_src = sub.get('src')
if not sub_src:
continue
subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
'url': sub_src,
})

upload_date = unified_strdate(sd.get('created_date'))

thumbnails = []
preview = sd.get('preview_image_url')
if preview:
thumbnails.append({
'id': 'preview',
'url': preview,
'preference': 0,
})
image = sd.get('image_full_url') or image
if image:
thumbnails.append({
'id': 'full',
'url': image,
'preference': 1,
})

yield {
'id': video_id,
'title': title,
'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
'formats': formats,
'subtitles': subtitles,
'description': sd.get('description'),
'duration': int_or_none(sd.get('duration_in_seconds')),
'upload_date': upload_date,
'thumbnails': thumbnails,
}

def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
webpage = self._download_webpage(url, playlist_id)

data_jsb = self._parse_json(
self._search_regex(
r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
webpage, 'playlist', group='json'),
playlist_id, transform_source=unescapeHTML)['playlist']['videos']

if not self._yes_playlist(playlist_id, video_id, smuggled_data):
data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]

playlist_count = len(data_jsb)
image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None

page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
return {
'_type': 'playlist',
'entries': InAdvancePagedList(page_func, playlist_count, 1),
'id': playlist_id,
}


class ORFRadioIE(InfoExtractor):
IE_NAME = 'orf:radio'

Expand Down Expand Up @@ -583,6 +403,7 @@ class ORFONIE(InfoExtractor):
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
'_old_archive_ids': ['orftvthek 14210000'],
}
}, {
'url': 'https://on.orf.at/video/3220355',
Expand All @@ -597,6 +418,7 @@ class ORFONIE(InfoExtractor):
'media_type': 'episode',
'timestamp': 52916400,
'upload_date': '19710905',
'_old_archive_ids': ['orftvthek 3220355'],
}
}]

Expand Down Expand Up @@ -631,6 +453,7 @@ def _extract_video(self, video_id):
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('ORFTVthek', video_id)],
**traverse_obj(api_json, {
'age_limit': ('age_classification', {parse_age_limit}),
'duration': ('duration_second', {float_or_none}),
Expand Down
Loading