yt-dlp · bashonly · Jan 19, 2024 · Dec 29, 2023 · Jan 5, 2024 · Jan 5, 2024
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -42,6 +42,7 @@
     AbemaTVTitleIE,
 )
 from .academicearth import AcademicEarthCourseIE
+from .academymel import AcademyMelIE
 from .acast import (
     ACastIE,
     ACastChannelIE,
@@ -680,6 +681,9 @@
     GeniusIE,
     GeniusLyricsIE,
 )
+from .getcourseru import (
+    GetCourseRuIE
+)
 from .gettr import (
     GettrIE,
     GettrStreamingIE,

diff --git a/yt_dlp/extractor/academymel.py b/yt_dlp/extractor/academymel.py
@@ -0,0 +1,86 @@
+import re
+import time
+
+from datetime import datetime
+from .common import InfoExtractor
+from ..utils import urlencode_postdata, ExtractorError
+
+
+class AcademyMelIE(InfoExtractor):
+    _TEST_EMAIL = 'meriat@jaga.email'  # use this as username in the test/local_parameters.json if running the test
+    _TEST_PASSWORD = 'bBY-ccbp$8'  # use this as password in the test/local_parameters.json if running the test
+
+    _CACHE_KEY = 'academymel'
+    _CACHE_SUBKEY = 'login-cookie-header'
+
+    _NETRC_MACHINE = 'academymel'
+    _LOGIN_URL = 'https://academymel.online/cms/system/login'
+    _VALID_URL = r'^https?:\/\/academymel\.online\/(?P<url>.*)$'
+
+    _TESTS = [{
+        'url': 'http://academymel.online/3video_1',
+        'info_dict': {
+            'id': '4885302',
+            'title': 'Промоуроки Академии МЕЛ',
+            'ext': 'mp4',
+            'duration': 1693
+        }
+    }]
+
+    def _perform_login(self, username, password):
+        login_body = urlencode_postdata({
+            'action': 'processXdget',
+            'xdgetId': 'r6335_1_1',
+            'params[action]': 'login',
+            'params[url]': 'http://academymel.online/cms/system/login?required=true',
+            'params[object_type]': 'cms_page',
+            'params[object_id]': -1,
+            'params[email]': username,
+            'params[password]': password,
+            'requestTime': int(time.time())
+        })
+
+        self._request_webpage(self._LOGIN_URL,
+                              None,
+                              data=login_body,
+                              note='Logging into the academymel.online',
+                              errnote='Failed to log in into academymel.online',
+                              fatal=True)
+
+    def playlist_from_entries(self, entries, valid_url):
+        current_timestamp = int(time.time())
+        current_datetime = datetime.fromtimestamp(current_timestamp)
+        formatted_datetime = current_datetime.strftime("%d.%m.%Y, %H:%M")
+
+        return self.playlist_result(entries,
+                                    'academymel-playlist-%d' % current_timestamp,
+                                    'AcademyMel playlist (%s)' % formatted_datetime,
+                                    'AcademyMel playlist for %s (at %s)' % (valid_url, formatted_datetime))
+
+    def _real_extract(self, url):
+        valid_url = self._match_valid_url(url)
+
+        if not valid_url:
+            raise ExtractorError('Invalid URL found', expected=True)
+
+        webpage = self._download_webpage(url,
+                                         None,
+                                         fatal=True,
+                                         note='Downloading video website',
+                                         errnote='Failed to download video website')
+
+        title = self._search_regex(r'<title>(?P<title>.*)</title>', webpage, 'title')
+
+        entries = []
+        processed_urls = set()  # Set to keep track of processed URLs
+
+        for video_url in re.findall(
+            r'data-iframe-src=\"(?P<url>https?://[^/]+\.getcourse\.ru/sign-player/\?.*?)\"',
+            webpage,
+                re.DOTALL + re.VERBOSE):
+            # Check if the URL has not been processed before
+            if video_url not in processed_urls:
+                entries.append(self.url_result(video_url, 'GetCourseRu', url_transparent=True, title=title))
+                processed_urls.add(video_url)  # Add the URL to the set of processed URLs
+
+        return self.playlist_from_entries(entries, valid_url)
diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py
@@ -0,0 +1,58 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class GetCourseRuIE(InfoExtractor):
+    _NETRC_MACHINE = 'getcourseru'
+    _VALID_URL = r'^https?:\/\/[^\/]+\.getcourse\.ru\/sign-player\/\?.*$'
+
+    _TESTS = [{
+        'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiZTJlZWE3MTI5ZDk3OWQzYzYzMDYzMDUzOGJkMzZlZjEiLCJ1c2VyX2lkIjozNTc3NjY5NjIsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4My44NSIsImdjX2hvc3QiOiJhY2FkZW15bWVsLm9ubGluZSIsInRpbWUiOjE3MDM4MDY1NzksInBheWxvYWQiOiJ1XzM1Nzc2Njk2MiIsInVpX2xhbmd1YWdlIjoicnUiLCJpc19oYXZlX2N1c3RvbV9zdHlsZSI6dHJ1ZX0=&s=a2ed5bd648a2ae7a4f7684abe815ec7a',
+        'info_dict': {
+            'id': 'master.m3u8?user-cdn=cdnvideo&acc-id=714517&user-id=357766962&loc-mode=ru&version=10:2:1:0:2:cdnvideo&consumer=vod&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyLWlkIjozNTc3NjY5NjJ9',
+            'title': 'master',
+            'ext': 'mp4',
+            'duration': 1871
+            # note: the original URL is necessary to obtain an up-to-date URL, because the URL is always changing
+        },
+        'skip': 'Requires authentication',
+        'note': 'This extractor is used by AcademyMel extractor, which has a login feature'
+    }]
+
+    def _real_extract(self, url):
+        valid_url = self._match_valid_url(url)
+
+        if not valid_url:
+            raise ExtractorError('Invalid URL found', expected=True)
+
+        webpage = self._download_webpage(url,
+                                         None,
+                                         fatal=True,
+                                         note='Retrieving metadata...',
+                                         errnote='Failed to retrieve metadata')
+
+        window_configs = self._search_json(
+            r'window\.configs\s*=\s*',
+            webpage,
+            'config',
+            video_id=None,
+            fatal=True)
+
+        self.to_screen('videoId: %s, videoHash: %s, masterPlaylistUrl: %s, thumbnail_url: %s'
+                       % (window_configs.get('videoId'),
+                          window_configs.get('videoHash'),
+                          window_configs.get('masterPlaylistUrl'),
+                          window_configs.get('previewUrl')))
+
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            window_configs.get('masterPlaylistUrl'),
+            window_configs.get('videoId'))
+
+        return {
+            'id': str(window_configs.get('videoId')),
+            'title': window_configs.get('videoHash'),
+            'thumbnail': window_configs.get('thumbnailUrl'),
+            'duration': int(window_configs.get('videoDuration')),
+            'formats': formats,
+            'subtitles': subtitles
+        }