Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ie/ondemandkorea] Overhaul extractor (#8386)
Closes #8374 Authored by: seproDev
- Loading branch information
Showing
2 changed files
with
149 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,87 +1,167 @@ | ||
import functools | ||
import re | ||
import uuid | ||
|
||
from .common import InfoExtractor | ||
from ..networking import HEADRequest | ||
from ..utils import ( | ||
ExtractorError, | ||
js_to_json, | ||
OnDemandPagedList, | ||
float_or_none, | ||
int_or_none, | ||
join_nonempty, | ||
parse_age_limit, | ||
parse_qs, | ||
unified_strdate, | ||
url_or_none, | ||
) | ||
from ..utils.traversal import traverse_obj | ||
|
||
|
||
class OnDemandKoreaIE(InfoExtractor): | ||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' | ||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)' | ||
_GEO_COUNTRIES = ['US', 'CA'] | ||
|
||
_TESTS = [{ | ||
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html', | ||
'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471', | ||
'md5': 'e2ff77255d989e3135bde0c5889fbce8', | ||
'info_dict': { | ||
'id': 'ask-us-anything-e351', | ||
'id': '686471', | ||
'ext': 'mp4', | ||
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022', | ||
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”', | ||
'thumbnail': r're:^https?://.*\.jpg$', | ||
'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won', | ||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | ||
'duration': 5486.955, | ||
'release_date': '20220924', | ||
'series': 'Ask Us Anything', | ||
'series_id': 11790, | ||
'episode_number': 351, | ||
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won', | ||
}, | ||
'params': { | ||
'skip_download': 'm3u8 download' | ||
} | ||
}, { | ||
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html', | ||
'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796', | ||
'md5': '57266c720006962be7ff415b24775caa', | ||
'info_dict': { | ||
'id': 'work-later-drink-now-e1', | ||
'id': '1595796', | ||
'ext': 'mp4', | ||
'title': 'Work Later, Drink Now : E01', | ||
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af', | ||
'thumbnail': r're:^https?://.*\.png$', | ||
'subtitles': { | ||
'English': 'mincount:1', | ||
}, | ||
'title': 'Breakup Probation, A Week: E08', | ||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | ||
'duration': 1586.0, | ||
'release_date': '20231001', | ||
'series': 'Breakup Probation, A Week', | ||
'series_id': 22912, | ||
'episode_number': 8, | ||
'episode': 'E08', | ||
}, | ||
'params': { | ||
'skip_download': 'm3u8 download' | ||
} | ||
}, { | ||
'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531', | ||
'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd', | ||
'info_dict': { | ||
'id': '369531', | ||
'ext': 'mp4', | ||
'release_date': '20220519', | ||
'duration': 7267.0, | ||
'title': 'The Outlaws: Main Movie', | ||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | ||
'age_limit': 18, | ||
}, | ||
}, { | ||
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006', | ||
'only_matching': True, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
webpage = self._download_webpage(url, video_id, fatal=False) | ||
|
||
if not webpage: | ||
# Page sometimes returns captcha page with HTTP 403 | ||
raise ExtractorError( | ||
'Unable to access page. You may have been blocked.', | ||
expected=True) | ||
|
||
if 'msg_block_01.png' in webpage: | ||
self.raise_geo_restricted( | ||
msg='This content is not available in your region', | ||
countries=self._GEO_COUNTRIES) | ||
|
||
if 'This video is only available to ODK PLUS members.' in webpage: | ||
raise ExtractorError( | ||
'This video is only available to ODK PLUS members.', | ||
expected=True) | ||
|
||
if 'ODK PREMIUM Members Only' in webpage: | ||
raise ExtractorError( | ||
'This video is only available to ODK PREMIUM members.', | ||
expected=True) | ||
|
||
title = self._search_regex( | ||
r'class=["\']episode_title["\'][^>]*>([^<]+)', | ||
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage) | ||
|
||
jw_config = self._parse_json( | ||
self._search_regex(( | ||
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$', | ||
r'playlist\s*=\s*\[(?P<options>.+)];?$', | ||
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;', | ||
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'), | ||
video_id, transform_source=js_to_json) | ||
info = self._parse_jwplayer_data( | ||
jw_config, video_id, require_title=False, m3u8_id='hls', | ||
base_url=url) | ||
|
||
info.update({ | ||
'title': title, | ||
'description': self._og_search_description(webpage), | ||
'thumbnail': self._og_search_thumbnail(webpage) | ||
}) | ||
return info | ||
|
||
data = self._download_json( | ||
f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False, | ||
headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404)) | ||
if not traverse_obj(data, ('result', {dict})): | ||
msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str) | ||
raise ExtractorError(msg or 'Got empty response from playback API', expected=True) | ||
|
||
data = data['result'] | ||
|
||
def try_geo_bypass(url): | ||
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url | ||
|
||
def try_upgrade_quality(url): | ||
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url) | ||
return mod_url if mod_url != url and self._request_webpage( | ||
HEADRequest(mod_url), video_id, note='Checking for higher quality format', | ||
errnote='No higher quality format found', fatal=False) else url | ||
|
||
formats = [] | ||
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})): | ||
formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False)) | ||
|
||
subtitles = {} | ||
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))): | ||
subtitles.setdefault(track.get('language', 'und'), []).append({ | ||
'url': track['url'], | ||
'ext': track.get('codec'), | ||
'name': track.get('label'), | ||
}) | ||
|
||
def if_series(key=None): | ||
return lambda obj: obj[key] if key and obj['kind'] == 'series' else None | ||
|
||
return { | ||
'id': video_id, | ||
'title': join_nonempty( | ||
('episode', 'program', 'title'), | ||
('episode', 'title'), from_dict=data, delim=': '), | ||
**traverse_obj(data, { | ||
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}), | ||
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}), | ||
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), | ||
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}), | ||
'series': ('episode', {if_series(key='program')}, 'title'), | ||
'series_id': ('episode', {if_series(key='program')}, 'id'), | ||
'episode': ('episode', {if_series(key='title')}), | ||
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}), | ||
}, get_all=False), | ||
'formats': formats, | ||
'subtitles': subtitles, | ||
} | ||
|
||
|
||
class OnDemandKoreaProgramIE(InfoExtractor): | ||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)' | ||
_GEO_COUNTRIES = ['US', 'CA'] | ||
|
||
_TESTS = [{ | ||
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news', | ||
'info_dict': { | ||
'id': 'uskn-news', | ||
}, | ||
'playlist_mincount': 755, | ||
}, { | ||
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land', | ||
'info_dict': { | ||
'id': 'the-land', | ||
}, | ||
'playlist_count': 52, | ||
}] | ||
|
||
_PAGE_SIZE = 100 | ||
|
||
def _fetch_page(self, display_id, page): | ||
page += 1 | ||
page_data = self._download_json( | ||
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id, | ||
headers={'service-name': 'odk'}, query={ | ||
'page': page, | ||
'page_size': self._PAGE_SIZE, | ||
}, note=f'Downloading page {page}', expected_status=404) | ||
for episode in traverse_obj(page_data, ('result', 'results', ...)): | ||
yield self.url_result( | ||
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}', | ||
ie=OnDemandKoreaIE, video_title=episode.get('title')) | ||
|
||
def _real_extract(self, url): | ||
display_id = self._match_id(url) | ||
|
||
entries = OnDemandPagedList(functools.partial( | ||
self._fetch_page, display_id), self._PAGE_SIZE) | ||
|
||
return self.playlist_result(entries, display_id) |