-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[extractor/odkmedia] Add
OnDemandChinaEpisodeIE
(#6116)
Authored by: HobbyistDev, pukkandan
- Loading branch information
1 parent
72671a2
commit 10fd9e6
Showing
2 changed files
with
106 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import json | ||
import urllib.error | ||
|
||
from .common import InfoExtractor | ||
from ..utils import ( | ||
ExtractorError, | ||
GeoRestrictedError, | ||
float_or_none, | ||
traverse_obj, | ||
try_call | ||
) | ||
|
||
|
||
class OnDemandChinaEpisodeIE(InfoExtractor): | ||
_VALID_URL = r'https?://www\.ondemandchina\.com/\w+/watch/(?P<series>[\w-]+)/(?P<id>ep-(?P<ep>\d+))' | ||
_TESTS = [{ | ||
'url': 'https://www.ondemandchina.com/en/watch/together-against-covid-19/ep-1', | ||
'info_dict': { | ||
'id': '264394', | ||
'ext': 'mp4', | ||
'duration': 3256.88, | ||
'title': 'EP 1 The Calling', | ||
'alt_title': '第1集 令出如山', | ||
'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg', | ||
'description': '疫情严峻,党政军民学、东西南北中协同应考', | ||
'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'], | ||
} | ||
}] | ||
|
||
_QUERY = ''' | ||
query Episode($programSlug: String!, $episodeNumber: Int!) { | ||
episode( | ||
programSlug: $programSlug | ||
episodeNumber: $episodeNumber | ||
kind: "series" | ||
part: null | ||
) { | ||
id | ||
title | ||
titleEn | ||
titleKo | ||
titleZhHans | ||
titleZhHant | ||
synopsis | ||
synopsisEn | ||
synopsisKo | ||
synopsisZhHans | ||
synopsisZhHant | ||
videoDuration | ||
images { | ||
thumbnail | ||
} | ||
} | ||
}''' | ||
|
||
def _real_extract(self, url): | ||
program_slug, display_id, ep_number = self._match_valid_url(url).group('series', 'id', 'ep') | ||
webpage = self._download_webpage(url, display_id) | ||
|
||
video_info = self._download_json( | ||
'https://odc-graphql.odkmedia.io/graphql', display_id, | ||
headers={'Content-type': 'application/json'}, | ||
data=json.dumps({ | ||
'operationName': 'Episode', | ||
'query': self._QUERY, | ||
'variables': { | ||
'programSlug': program_slug, | ||
'episodeNumber': int(ep_number), | ||
}, | ||
}).encode())['data']['episode'] | ||
|
||
try: | ||
source_json = self._download_json( | ||
f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id, | ||
headers={'Authorization': '', 'service-name': 'odc'}) | ||
except ExtractorError as e: | ||
if isinstance(e.cause, urllib.error.HTTPError): | ||
error_data = self._parse_json(e.cause.read(), display_id)['detail'] | ||
raise GeoRestrictedError(error_data) | ||
|
||
formats, subtitles = [], {} | ||
for source in traverse_obj(source_json, ('sources', ...)): | ||
if source.get('type') == 'hls': | ||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('url'), display_id) | ||
formats.extend(fmts) | ||
self._merge_subtitles(subs, target=subtitles) | ||
else: | ||
self.report_warning(f'Unsupported format {source.get("type")}', display_id) | ||
|
||
return { | ||
'id': str(video_info['id']), | ||
'duration': float_or_none(video_info.get('videoDuration'), 1000), | ||
'thumbnail': (traverse_obj(video_info, ('images', 'thumbnail')) | ||
or self._html_search_meta(['og:image', 'twitter:image'], webpage)), | ||
'title': (traverse_obj(video_info, 'title', 'titleEn') | ||
or self._html_search_meta(['og:title', 'twitter:title'], webpage) | ||
or self._html_extract_title(webpage)), | ||
'alt_title': traverse_obj(video_info, 'titleKo', 'titleZhHans', 'titleZhHant'), | ||
'description': (traverse_obj( | ||
video_info, 'synopsisEn', 'synopsisKo', 'synopsisZhHans', 'synopsisZhHant', 'synopisis') | ||
or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)), | ||
'formats': formats, | ||
'subtitles': subtitles, | ||
'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) | ||
} |