Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/joqrag] Add extractor #8384

Merged
merged 19 commits into from Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Expand Up @@ -864,6 +864,7 @@
)
from .jove import JoveIE
from .joj import JojIE
from .joqrag import JoqrAgIE
from .jstream import JStreamIE
from .jtbc import (
JTBCIE,
Expand Down
113 changes: 113 additions & 0 deletions yt_dlp/extractor/joqrag.py
@@ -0,0 +1,113 @@
import datetime
import urllib.parse

from .common import InfoExtractor
from ..utils import (
clean_html,
datetime_from_str,
unified_timestamp,
urljoin,
)


class JoqrAgIE(InfoExtractor):
IE_DESC = '超!A&G+ 文化放送 Nippon Cultural Broadcasting, Inc. (JOQR, AGQR)'
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/player\.php',
r'https?://www\.uniqueradio\.jp/agplayer5/inc-player-hls\.php',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/player\.php',
r'https?://www\.uniqueradio\.jp/agplayer5/inc-player-hls\.php',
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',

r'https?://(?:www\.)?joqr\.co\.jp/ag/',
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
_TESTS = [{
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
'info_dict': {
'id': 'live',
'title': str,
'channel': '超!A&G+',
'description': str,
'live_status': 'is_live',
'release_timestamp': int,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
'only_matching': True,
}, {
'url': 'https://www.joqr.co.jp/ag/article/103760/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
'only_matching': True,
}]
pzhlkj6612 marked this conversation as resolved.
Show resolved Hide resolved

def _extract_metadata(self, variable, html, name):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, name, group='value', default=''))) or None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's always non-fatal so the name param doesn't really matter

Suggested change
def _extract_metadata(self, variable, html, name):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, name, group='value', default=''))) or None
def _extract_metadata(self, variable, html):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, 'metadata', group='value', default=''))) or None


def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
date = dt.strftime('%Y%m%d')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(?P<time>\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time of the first program', default=None, group='time')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could make this a little less busy

Suggested change
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(?P<time>\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time of the first program', default=None, group='time')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)

if start_time:
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
return None

start_timestamp = extract_start_time_from('today')
if not start_timestamp:
return None

if not is_live or start_timestamp < datetime_from_str('now').timestamp():
return start_timestamp
else:
return extract_start_time_from('yesterday')

def _real_extract(self, url):
video_id = 'live'

metadata = self._download_webpage(
'https://www.uniqueradio.jp/aandg', video_id,
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata, 'program title')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
title = self._extract_metadata('Program_name', metadata, 'program title')
title = self._extract_metadata('Program_name', metadata)


if title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
r'<source\s[^>]*\bsrc="([^"]+)"',
self._download_webpage(
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
note='Downloading player data', errnote='Failed to download player data'),
'm3u8 url')
formats = self._extract_m3u8_formats(
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
live_status = 'is_live'
release_timestamp = self._extract_start_timestamp(video_id, True)

return {
'id': video_id,
'title': title,
'channel': '超!A&G+',
'description': self._extract_metadata('Program_text', metadata, 'program description'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
'description': self._extract_metadata('Program_text', metadata, 'program description'),
'description': self._extract_metadata('Program_text', metadata),

'formats': formats,
'live_status': live_status,
'release_timestamp': release_timestamp,
}