From a6741366e048f46b921cc98254a0b427f1d0ef9e Mon Sep 17 00:00:00 2001 From: amir16yp <96111605+amir16yp@users.noreply.github.com> Date: Thu, 12 Oct 2023 11:06:46 +0300 Subject: [PATCH] Add maariv.co.il extractor --- yt_dlp/extractor/maariv.py | 72 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/maariv.py diff --git a/yt_dlp/extractor/maariv.py b/yt_dlp/extractor/maariv.py new file mode 100644 index 00000000000..bd070be0de9 --- /dev/null +++ b/yt_dlp/extractor/maariv.py @@ -0,0 +1,72 @@ +from yt_dlp.extractor.common import InfoExtractor +import re + +class MaarivIE(InfoExtractor): + IE_NAME = 'maariv.co.il' + _VALID_URL = r'(https?://)?(www\.)?(m\.)?maariv\.co\.il(?:/[^/]+)*/Article-(?P\d{7})' + _TESTS = [ + { + 'url': 'https://www.maariv.co.il/news/law/Article-1044008', + 'info_dict': { + 'id': '1044008', + 'title': '.*', + }, + } + ] + + @staticmethod + def extract_resolution(url): + pattern = r'(\d{2,4}x\d{2,4})\.mp4$' + match = re.search(pattern, url) + return match.group(1) if match else None + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # Find the correct iframes + video_urls = re.findall(r'