Skip to content

Commit

Permalink
[teachable] Add support for teachable based platform sites (closes #5451
Browse files Browse the repository at this point in the history
, closes #18150, closes #18272)
  • Loading branch information
dstftw committed Dec 9, 2018
1 parent 3ad6dab commit c486aa9
Show file tree
Hide file tree
Showing 3 changed files with 268 additions and 4 deletions.
8 changes: 4 additions & 4 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,10 @@
from .tastytrade import TastyTradeIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
TeachableCourseIE,
)
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
Expand Down Expand Up @@ -1240,10 +1244,6 @@
UplynkIE,
UplynkPreplayIE,
)
from .upskill import (
UpskillIE,
UpskillCourseIE,
)
from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
Expand Down
5 changes: 5 additions & 0 deletions youtube_dl/extractor/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .teachable import TeachableIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
Expand Down Expand Up @@ -3112,6 +3113,10 @@ def _real_extract(self, url):
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())

teachable_url = TeachableIE._extract_url(webpage, url)
if teachable_url:
return self.url_result(teachable_url)

indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:
return self.playlist_from_matches(
Expand Down
259 changes: 259 additions & 0 deletions youtube_dl/extractor/teachable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from .wistia import WistiaIE
from ..compat import compat_str
from ..utils import (
clean_html,
ExtractorError,
get_element_by_class,
urlencode_postdata,
urljoin,
)


class TeachableBaseIE(InfoExtractor):
_NETRC_MACHINE = 'teachable'
_URL_PREFIX = 'teachable:'

_SITES = {
# Only notable ones here
'upskillcourses.com': 'upskill',
'academy.gns3.com': 'gns3',
'academyhacker.com': 'academyhacker',
'stackskills.com': 'stackskills',
'market.saleshacker.com': 'saleshacker',
'learnability.org': 'learnability',
'edurila.com': 'edurila',
}

_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))

def _real_initialize(self):
self._logged_in = False

def _login(self, site):
if self._logged_in:
return

username, password = self._get_login_info(
netrc_machine=self._SITES.get(site, site))
if username is None:
return

login_page, urlh = self._download_webpage_handle(
'https://%s/sign_in' % site, None,
'Downloading %s login page' % site)

login_url = compat_str(urlh.geturl())

login_form = self._hidden_inputs(login_page)

login_form.update({
'user[email]': username,
'user[password]': password,
})

post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
'post url', default=login_url, group='url')

if not post_url.startswith('http'):
post_url = urljoin(login_url, post_url)

response = self._download_webpage(
post_url, None, 'Logging in to %s' % site,
data=urlencode_postdata(login_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': login_url,
})

if '>I accept the new Privacy Policy<' in response:
raise ExtractorError(
'Unable to login: %s asks you to accept new Privacy Policy. '
'Go to https://%s/ and accept.' % (site, site), expected=True)

# Successful login
if any(re.search(p, response) for p in (
r'class=["\']user-signout',
r'<a[^>]+\bhref=["\']/sign_out',
r'>\s*Log out\s*<')):
self._logged_in = True
return

message = get_element_by_class('alert', response)
if message is not None:
raise ExtractorError(
'Unable to login: %s' % clean_html(message), expected=True)

raise ExtractorError('Unable to log in')


class TeachableIE(TeachableBaseIE):
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P<site_t>[^/]+)|
https?://(?:www\.)?(?P<site>%s)
)
/courses/[^/]+/lectures/(?P<id>\d+)
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE

_TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
'info_dict': {
'id': 'uzw6zw58or',
'ext': 'mp4',
'title': 'Welcome to the Course!',
'description': 'md5:65edb0affa582974de4625b9cdea1107',
'duration': 138.763,
'timestamp': 1479846621,
'upload_date': '20161122',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
'only_matching': True,
}, {
'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
'only_matching': True,
}, {
'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
'only_matching': True,
}]

@staticmethod
def _is_teachable(webpage):
return 'teachableTracker.linker:autoLink' in webpage and re.search(
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
webpage)

@staticmethod
def _extract_url(webpage, source_url):
if not TeachableIE._is_teachable(webpage):
print('NOT TEACHABLE')
return
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site') or mobj.group('site_t')
video_id = mobj.group('id')

self._login(site)

prefixed = url.startswith(self._URL_PREFIX)
if prefixed:
url = url[len(self._URL_PREFIX):]

webpage = self._download_webpage(url, video_id)

wistia_url = WistiaIE._extract_url(webpage)
if not wistia_url:
if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked',
r'>\s*Lecture contents locked',
r'id=["\']lecture-locked')):
self.raise_login_required('Lecture contents locked')

title = self._og_search_title(webpage, default=None)

return {
'_type': 'url_transparent',
'url': wistia_url,
'ie_key': WistiaIE.ie_key(),
'title': title,
}


class TeachableCourseIE(TeachableBaseIE):
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P<site_t>[^/]+)|
https?://(?:www\.)?(?P<site>%s)
)
/(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
_TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
'info_dict': {
'id': 'essential-web-developer-course',
'title': 'The Essential Web Developer Course (Free)',
},
'playlist_count': 192,
}, {
'url': 'http://upskillcourses.com/courses/119763/',
'only_matching': True,
}, {
'url': 'http://upskillcourses.com/courses/enrolled/119763',
'only_matching': True,
}, {
'url': 'https://academy.gns3.com/courses/enrolled/423415',
'only_matching': True,
}, {
'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
'only_matching': True,
}, {
'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
'only_matching': True,
}]

@classmethod
def suitable(cls, url):
return False if TeachableIE.suitable(url) else super(
TeachableCourseIE, cls).suitable(url)

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site') or mobj.group('site_t')
course_id = mobj.group('id')

self._login(site)

prefixed = url.startswith(self._URL_PREFIX)
if prefixed:
prefix = self._URL_PREFIX
url = url[len(prefix):]

webpage = self._download_webpage(url, course_id)

url_base = 'https://%s/' % site

entries = []

for mobj in re.finditer(
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
webpage):
li = mobj.group('li')
if 'fa-youtube-play' not in li:
continue
lecture_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
'lecture url', default=None, group='url')
if not lecture_url:
continue
lecture_id = self._search_regex(
r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
title = self._html_search_regex(
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
'title', default=None)
entry_url = urljoin(url_base, lecture_url)
if prefixed:
entry_url = self._URL_PREFIX + entry_url
entries.append(
self.url_result(
entry_url,
ie=TeachableIE.ie_key(), video_id=lecture_id,
video_title=clean_html(title)))

course_title = self._html_search_regex(
(r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
webpage, 'course title', fatal=False)

return self.playlist_result(entries, course_id, course_title)

0 comments on commit c486aa9

Please sign in to comment.