Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ondemandkorea] New extractor for ondemandkorea.com #10772

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions youtube_dl/extractor/extractors.py
Expand Up @@ -638,6 +638,7 @@
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
Expand Down
58 changes: 58 additions & 0 deletions youtube_dl/extractor/ondemandkorea.py
@@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor
from ..utils import ExtractorError


class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
'title': 'Ask Us Anything : E43',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
}

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)

if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError('Unable to access page. You may have been blocked.', expected=True)

if 'msg_block_01.png' in webpage:
raise ExtractorError('This content is not available in your region.', expected=True)

if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError('This video is only available to ODK PLUS members.', expected=True)

title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)

manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest')
formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)

subs = re.findall(r'file:\s\'(?P<file>[^\']+\.vtt)\',\s+label:\s+\'(?P<lang>[^\']+)\'', webpage)
subtitles = {}
for sub in subs:
subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}]

return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'subtitles': subtitles,
}