Skip to content

Commit

Permalink
[extractor/camtasia] Separate into own extractor (#4307)
Browse files Browse the repository at this point in the history
Authored by: coletdjnz
  • Loading branch information
pukkandan committed Aug 1, 2022
1 parent f2e8dbc commit 5fff2e5
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 67 deletions.
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
CamdemyFolderIE
)
from .cammodels import CamModelsIE
from .camtasia import CamtasiaEmbedIE
from .camwithher import CamWithHerIE
from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE
Expand Down
71 changes: 71 additions & 0 deletions yt_dlp/extractor/camtasia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import urllib.parse

from .common import InfoExtractor
from ..utils import float_or_none


class CamtasiaEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
'playlist': [{
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
'ext': 'flv',
'duration': 2235.90,
}
}, {
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
'ext': 'flv',
'duration': 2235.93,
}
}],
'info_dict': {
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
},
'skip': 'webpage dead'
},

]

def _extract_from_webpage(self, url, webpage):
camtasia_cfg = self._search_regex(
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
webpage, 'camtasia configuration file', default=None)
if camtasia_cfg is None:
return None

title = self._html_search_meta('DC.title', webpage, fatal=True)

camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, self._generic_id(url),
note='Downloading camtasia configuration',
errnote='Failed to download camtasia configuration')
fileset_node = camtasia_cfg.find('./playlist/array/fileset')

entries = []
for n in fileset_node.getchildren():
url_n = n.find('./uri')
if url_n is None:
continue

entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})

return {
'_type': 'playlist',
'entries': entries,
'title': title,
}
67 changes: 0 additions & 67 deletions yt_dlp/extractor/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,30 +933,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
# Camtasia studio
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
'playlist': [{
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
'ext': 'flv',
'duration': 2235.90,
}
}, {
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
'info_dict': {
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
'ext': 'flv',
'duration': 2235.93,
}
}],
'info_dict': {
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
}
},
# Flowplayer
{
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
Expand Down Expand Up @@ -2680,43 +2656,6 @@ def itunes(key):
'entries': entries,
}

def _extract_camtasia(self, url, video_id, webpage):
""" Returns None if no camtasia video can be found. """

camtasia_cfg = self._search_regex(
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
webpage, 'camtasia configuration file', default=None)
if camtasia_cfg is None:
return None

title = self._html_search_meta('DC.title', webpage, fatal=True)

camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
errnote='Failed to download camtasia configuration')
fileset_node = camtasia_cfg.find('./playlist/array/fileset')

entries = []
for n in fileset_node.getchildren():
url_n = n.find('./uri')
if url_n is None:
continue

entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})

return {
'_type': 'playlist',
'entries': entries,
'title': title,
}

def _kvs_getrealurl(self, video_url, license_code):
if not video_url.startswith('function/0/'):
return video_url # not obfuscated
Expand Down Expand Up @@ -2920,12 +2859,6 @@ def _real_extract(self, url):
except xml.etree.ElementTree.ParseError:
pass

# Is it a Camtasia project?
camtasia_res = self._extract_camtasia(url, video_id, webpage)
if camtasia_res is not None:
self.report_detected('Camtasia video')
return camtasia_res

info_dict.update({
# it's tempting to parse this further, but you would
# have to take into account all the variations like
Expand Down

0 comments on commit 5fff2e5

Please sign in to comment.