Skip to content

Commit

Permalink
[teachable] Add support for teachable based platform sites (closes #5451
Browse files Browse the repository at this point in the history
, closes #18150, closes #18272)
  • Loading branch information
dstftw committed Dec 9, 2018
1 parent 3ad6dab commit 5ee7ae5
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 27 deletions.
8 changes: 4 additions & 4 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,10 @@
from .tastytrade import TastyTradeIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
TeachableCourseIE,
)
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
Expand Down Expand Up @@ -1240,10 +1244,6 @@
UplynkIE,
UplynkPreplayIE,
)
from .upskill import (
UpskillIE,
UpskillCourseIE,
)
from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
Expand Down
5 changes: 5 additions & 0 deletions youtube_dl/extractor/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .teachable import TeachableIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
Expand Down Expand Up @@ -3112,6 +3113,10 @@ def _real_extract(self, url):
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())

teachable_url = TeachableIE._extract_url(webpage, url)
if teachable_url:
return self.url_result(teachable_url)

indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:
return self.playlist_from_matches(
Expand Down
129 changes: 106 additions & 23 deletions youtube_dl/extractor/upskill.py → youtube_dl/extractor/teachable.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,38 @@
)


class UpskillBaseIE(InfoExtractor):
_LOGIN_URL = 'http://upskillcourses.com/sign_in'
_NETRC_MACHINE = 'upskill'
class TeachableBaseIE(InfoExtractor):
_NETRC_MACHINE = 'teachable'
_URL_PREFIX = 'teachable:'

_SITES = {
# Only notable ones here
'upskillcourses.com': 'upskill',
'academy.gns3.com': 'gns3',
'academyhacker.com': 'academyhacker',
'stackskills.com': 'stackskills',
'market.saleshacker.com': 'saleshacker',
'learnability.org': 'learnability',
'edurila.com': 'edurila',
}

_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))

def _real_initialize(self):
self._login()
self._logged_in = False

def _login(self):
username, password = self._get_login_info()
def _login(self, site):
if self._logged_in:
return

username, password = self._get_login_info(
netrc_machine=self._SITES.get(site, site))
if username is None:
return

login_page, urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login page')
'https://%s/sign_in' % site, None,
'Downloading %s login page' % site)

login_url = compat_str(urlh.geturl())

Expand All @@ -46,18 +64,24 @@ def _login(self):
post_url = urljoin(login_url, post_url)

response = self._download_webpage(
post_url, None, 'Logging in',
post_url, None, 'Logging in to %s' % site,
data=urlencode_postdata(login_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': login_url,
})

if '>I accept the new Privacy Policy<' in response:
raise ExtractorError(
'Unable to login: %s asks you to accept new Privacy Policy. '
'Go to https://%s/ and accept.' % (site, site), expected=True)

# Successful login
if any(re.search(p, response) for p in (
r'class=["\']user-signout',
r'<a[^>]+\bhref=["\']/sign_out',
r'>\s*Log out\s*<')):
self._logged_in = True
return

message = get_element_by_class('alert', response)
Expand All @@ -68,16 +92,22 @@ def _login(self):
raise ExtractorError('Unable to log in')


class UpskillIE(UpskillBaseIE):
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
class TeachableIE(TeachableBaseIE):
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P<site_t>[^/]+)|
https?://(?:www\.)?(?P<site>%s)
)
/courses/[^/]+/lectures/(?P<id>\d+)
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE

_TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
'info_dict': {
'id': 'uzw6zw58or',
'ext': 'mp4',
'title': 'Welcome to the Course!',
'description': 'md5:8d66c13403783370af62ca97a7357bdd',
'description': 'md5:65edb0affa582974de4625b9cdea1107',
'duration': 138.763,
'timestamp': 1479846621,
'upload_date': '20161122',
Expand All @@ -88,10 +118,38 @@ class UpskillIE(UpskillBaseIE):
}, {
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
'only_matching': True,
}, {
'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
'only_matching': True,
}, {
'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
'only_matching': True,
}]

@staticmethod
def _is_teachable(webpage):
return 'teachableTracker.linker:autoLink' in webpage and re.search(
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
webpage)

@staticmethod
def _extract_url(webpage, source_url):
if not TeachableIE._is_teachable(webpage):
print('NOT TEACHABLE')
return
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)

def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site') or mobj.group('site_t')
video_id = mobj.group('id')

self._login(site)

prefixed = url.startswith(self._URL_PREFIX)
if prefixed:
url = url[len(self._URL_PREFIX):]

webpage = self._download_webpage(url, video_id)

Expand All @@ -113,12 +171,18 @@ def _real_extract(self, url):
}


class UpskillCourseIE(UpskillBaseIE):
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
class TeachableCourseIE(TeachableBaseIE):
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P<site_t>[^/]+)|
https?://(?:www\.)?(?P<site>%s)
)
/(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
_TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
'info_dict': {
'id': '119763',
'id': 'essential-web-developer-course',
'title': 'The Essential Web Developer Course (Free)',
},
'playlist_count': 192,
Expand All @@ -128,21 +192,37 @@ class UpskillCourseIE(UpskillBaseIE):
}, {
'url': 'http://upskillcourses.com/courses/enrolled/119763',
'only_matching': True,
}, {
'url': 'https://academy.gns3.com/courses/enrolled/423415',
'only_matching': True,
}, {
'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
'only_matching': True,
}, {
'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
'only_matching': True,
}]

@classmethod
def suitable(cls, url):
return False if UpskillIE.suitable(url) else super(
UpskillCourseIE, cls).suitable(url)
return False if TeachableIE.suitable(url) else super(
TeachableCourseIE, cls).suitable(url)

def _real_extract(self, url):
course_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site') or mobj.group('site_t')
course_id = mobj.group('id')

self._login(site)

prefixed = url.startswith(self._URL_PREFIX)
if prefixed:
prefix = self._URL_PREFIX
url = url[len(prefix):]

webpage = self._download_webpage(url, course_id)

course_id = self._search_regex(
r'data-course-id=["\'](\d+)', webpage, 'course id',
default=course_id)
url_base = 'https://%s/' % site

entries = []

Expand All @@ -162,10 +242,13 @@ def _real_extract(self, url):
title = self._html_search_regex(
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
'title', default=None)
entry_url = urljoin(url_base, lecture_url)
if prefixed:
entry_url = self._URL_PREFIX + entry_url
entries.append(
self.url_result(
urljoin('http://upskillcourses.com/', lecture_url),
ie=UpskillIE.ie_key(), video_id=lecture_id,
entry_url,
ie=TeachableIE.ie_key(), video_id=lecture_id,
video_title=clean_html(title)))

course_title = self._html_search_regex(
Expand Down

0 comments on commit 5ee7ae5

Please sign in to comment.