Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[extractor/zingmp3]: add live radio extractor #7189

Merged
merged 36 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f39766d
[utils,cleanup] `traverse_obj`: Allow `[]` and minor cleanup
pukkandan Feb 9, 2023
5f421a5
Merge branch 'yt-dlp:master' into master
Feb 26, 2023
df155e5
[extractor/zingmp3]: update get secretKey and apiKey and paged_list
Feb 27, 2023
9332a83
Merge branch 'yt-dlp:master' into master
Feb 28, 2023
5513b44
Merge branch 'yt-dlp:master' into master
Mar 1, 2023
627e42c
Apply suggestions from code review
pukkandan Mar 4, 2023
954b713
Update yt_dlp/extractor/zingmp3.py
pukkandan Mar 4, 2023
dec4273
feat: update zingmp3
Mar 11, 2023
cac5f00
Merge branch 'yt-dlp:master' into master
Mar 11, 2023
32a22f8
[zingmp3]: Add live radio and postcast program extractor
Mar 11, 2023
4b94f8b
Merge remote-tracking branch 'origin/update_apikey_and_secret' into z…
Mar 11, 2023
f8ff4a7
[zingmp3]: Add live radio and postcast program extractor
Mar 11, 2023
c815283
[zingmp3]: Add live radio and postcast program extractor
Mar 11, 2023
a6121a8
Merge branch 'yt-dlp:master' into master
Mar 11, 2023
1bfe1aa
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
Mar 11, 2023
45ee010
Merge branch 'yt-dlp:master' into master
Mar 12, 2023
b77feab
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
Mar 12, 2023
433c05b
update comment
Mar 18, 2023
2d86a46
Merge remote-tracking branch 'origin/update_apikey_and_secret' into z…
Mar 24, 2023
726f808
feat: add new postcast extractor
Mar 24, 2023
9ab1334
Merge branch 'yt-dlp:master' into master
Jun 1, 2023
3404726
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
Jun 1, 2023
3bfcf17
update test case
Jun 1, 2023
9cd49ba
Merge branch 'yt-dlp:master' into master
Jun 4, 2023
20aa291
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
Jun 4, 2023
9087dd0
fix build
Jun 4, 2023
0da74ff
fix build
Jun 4, 2023
dec463b
Merge branch 'yt-dlp:master' into master
hatienloi Oct 28, 2023
7c732b0
Merge branch 'yt-dlp:master' into master
hatienloi Nov 13, 2023
3490433
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
hatienloi Nov 13, 2023
b5f8cae
- fix comments
hatienloi Nov 13, 2023
22ed1a0
- typo issue
hatienloi Nov 14, 2023
b24fad4
- fix comments
hatienloi Nov 16, 2023
e961e99
Merge branch 'yt-dlp:master' into master
hatienloi Nov 16, 2023
986c80a
Merge remote-tracking branch 'origin/master' into zingmp3/add_live_ra…
hatienloi Nov 16, 2023
dc51556
Cleanup
bashonly Nov 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2588,6 +2588,9 @@
ZingMp3ChartMusicVideoIE,
ZingMp3UserIE,
ZingMp3HubIE,
ZingMp3LiveRadioIE,
ZingMp3PodcastEpisodeIE,
ZingMp3PodcastIE,
)
from .zoom import ZoomIE
from .zype import ZypeIE
254 changes: 229 additions & 25 deletions yt_dlp/extractor/zingmp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
import urllib.parse

from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj, try_call, urljoin
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
try_call,
urljoin,
url_or_none
)
from ..utils.traversal import traverse_obj


class ZingMp3BaseIE(InfoExtractor):
Expand All @@ -20,9 +28,17 @@ class ZingMp3BaseIE(InfoExtractor):
'video-clip': '/api/v2/page/get/video',
'lyric': '/api/v2/lyric/get/lyric',
'song-streaming': '/api/v2/song/get/streaming',
'liveradio': '/api/v2/livestream/get/info',
'eps': '/api/v2/page/get/podcast-episode',
'episode-streaming': '/api/v2/podcast/episode/get/streaming',
# Playlist
'playlist': '/api/v2/page/get/playlist',
'album': '/api/v2/page/get/playlist',
'pgr': '/api/v2/page/get/podcast-program',
'pgr-list': '/api/v2/podcast/episode/get/list',
'cgr': '/api/v2/page/get/podcast-category',
'cgr-list': '/api/v2/podcast/program/get/list-by-cate',
'cgrs': '/api/v2/page/get/podcast-categories',
# Chart
'zing-chart': '/api/v2/page/get/chart-home',
'zing-chart-tuan': '/api/v2/page/get/week-chart',
Expand All @@ -33,6 +49,10 @@ class ZingMp3BaseIE(InfoExtractor):
'user-list-song': '/api/v2/song/get/list',
'user-list-video': '/api/v2/video/get/list',
'hub': '/api/v2/page/get/hub-detail',
'new-release': '/api/v2/chart/get/new-release',
'top100': '/api/v2/page/get/top-100',
'podcast-new': '/api/v2/podcast/program/get/list-by-type',
'top-podcast': '/api/v2/podcast/program/get/top-episode',
}

def _api_url(self, url_type, params):
Expand Down Expand Up @@ -78,7 +98,7 @@ def _paged_list(self, _id, url_type):


class ZingMp3IE(ZingMp3BaseIE):
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed|eps'
IE_NAME = 'zingmp3'
IE_DESC = 'zingmp3.vn'
_TESTS = [{
Expand All @@ -102,7 +122,7 @@ class ZingMp3IE(ZingMp3BaseIE):
},
}, {
'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
'md5': '3c2081e79471a2f4a3edd90b70b185ea',
'md5': '92c6e7a019f06b4682a6c35ae5785fab',
'info_dict': {
'id': 'ZO8ZF7C7',
'title': 'Sương Hoa Đưa Lối',
Expand All @@ -128,6 +148,20 @@ class ZingMp3IE(ZingMp3BaseIE):
'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)',
'album_artist': 'Mr. Siro',
},
}, {
'url': 'https://zingmp3.vn/eps/Cham-x-Ban-Noi-Goi-La-Nha/ZZD9ACWI.html',
'md5': 'd52f9f63e2631e004e4f15188eedcf80',
'info_dict': {
'id': 'ZZD9ACWI',
'title': 'Chạm x Bạn - Nơi Gọi Là Nhà',
'ext': 'mp3',
'duration': 3716,
'thumbnail': r're:^https?://.+\.jpg',
'track': 'Chạm x Bạn - Nơi Gọi Là Nhà',
'artist': 'On Air',
'album': 'Top Podcast',
'album_artist': 'On Air',
},
}, {
'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
'only_matching': True,
Expand All @@ -147,6 +181,8 @@ def _real_extract(self, url):
'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id,
query={'requestdata': json.dumps({'id': item_id})},
note='Downloading mp4 JSON metadata').get('source')
elif url_type == 'eps':
source = self._call_api('episode-streaming', {'id': item_id})
else:
source = self._call_api('song-streaming', {'id': item_id})

Expand Down Expand Up @@ -189,9 +225,10 @@ def _real_extract(self, url):
'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
'duration': int_or_none(item.get('duration')),
'track': traverse_obj(item, 'title', 'alias'),
'artist': traverse_obj(item, 'artistsNames', 'artists_names'),
'album': traverse_obj(item, ('album', ('name', 'title')), get_all=False),
'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')), get_all=False),
'artist': traverse_obj(item, 'artistsNames', 'artists_names', ('artists', 0, 'name')),
'album': traverse_obj(item, ('album', ('name', 'title')), ('genres', 0, 'name'), get_all=False),
'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')),
('artists', 0, 'name'), get_all=False),
'formats': formats,
'subtitles': {'origin': [{'url': lyric}]} if lyric else None,
}
Expand All @@ -200,12 +237,12 @@ def _real_extract(self, url):
class ZingMp3AlbumIE(ZingMp3BaseIE):
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist'
_TESTS = [{
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
'url': 'https://zingmp3.vn/album/Ca-Phe-Quan-Quen-Hoang-Dung-My-Anh-Da-LAB-Thinh-Suy/ZOC7WUZC.html',
'info_dict': {
'id': 'ZWZBWDAF',
'title': 'Lâu Đài Tình Ái',
'id': 'ZOC7WUZC',
'title': 'Cà Phê Quán Quen',
},
'playlist_mincount': 9,
'playlist_mincount': 10,
}, {
'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
'info_dict': {
Expand All @@ -231,7 +268,7 @@ def _real_extract(self, url):


class ZingMp3ChartHomeIE(ZingMp3BaseIE):
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh))/?(?:[#?]|$)'
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh|top100|podcast-discover))/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://zingmp3.vn/zing-chart',
'info_dict': {
Expand All @@ -244,13 +281,34 @@ class ZingMp3ChartHomeIE(ZingMp3BaseIE):
'id': 'moi-phat-hanh',
},
'playlist_mincount': 100,
}, {
'url': 'https://zingmp3.vn/top100',
'info_dict': {
'id': 'top100',
},
'playlist_mincount': 50,
}, {
'url': 'https://zingmp3.vn/podcast-discover',
'info_dict': {
'id': 'podcast-discover',
},
'playlist_mincount': 4,
}]
IE_NAME = 'zingmp3:chart-home'

def _real_extract(self, url):
url_type = self._match_id(url)
data = self._call_api(url_type, {'id': url_type})
items = traverse_obj(data, ('RTChart', 'items') if url_type == 'zing-chart' else 'items')
params = {'id': url_type}
if url_type == 'podcast-discover':
params['type'] = 'discover'
data = self._call_api(url_type, params)
items = []
if url_type == 'top100':
items.extend(traverse_obj(data, (..., 'items', ..., {dict})))
elif url_type == 'zing-chart':
items.extend(traverse_obj(data, ('RTChart', 'items', ..., {dict})))
else:
items.extend(traverse_obj(data, ('items', ..., {dict})))
return self.playlist_result(self._parse_items(items), url_type)


Expand Down Expand Up @@ -334,7 +392,7 @@ def _real_extract(self, url):


class ZingMp3UserIE(ZingMp3BaseIE):
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video)/?(?:[?#]|$)'
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video|song)/?(?:[?#]|$)'
IE_NAME = 'zingmp3:user'
_TESTS = [{
'url': 'https://zingmp3.vn/Mr-Siro/bai-hat',
Expand Down Expand Up @@ -368,6 +426,18 @@ class ZingMp3UserIE(ZingMp3BaseIE):
'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
},
'playlist_mincount': 15,
}, {
'url': 'https://zingmp3.vn/new-release/song',
'info_dict': {
'id': 'new-release-song',
},
'playlist_mincount': 50,
}, {
'url': 'https://zingmp3.vn/new-release/album',
'info_dict': {
'id': 'new-release-album',
},
'playlist_mincount': 20,
}]

def _fetch_page(self, user_id, url_type, page):
Expand All @@ -380,20 +450,28 @@ def _fetch_page(self, user_id, url_type, page):
})

def _real_extract(self, url):
user_alias, url_type = self._match_valid_url(url).group('user', 'type')
alias, url_type = self._match_valid_url(url).group('user', 'type')
if not url_type:
url_type = 'bai-hat'

user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias})
if url_type in ('bai-hat', 'video'):
entries = self._paged_list(user_info['id'], url_type)
user_info = self._call_api('info-artist', {}, alias, query={'alias': alias})

# Handle for new-release
if alias == 'new-release' and url_type in ('song', 'album'):
_id = f'{alias}-{url_type}'
return self.playlist_result(self._parse_items(
self._call_api('new-release', params={'type': url_type}, display_id=_id)), _id)
else:
entries = self._parse_items(traverse_obj(user_info, (
'sections',
lambda _, v: v['sectionId'] == 'aAlbum' if url_type == 'album' else v['sectionId'] == 'aSingle',
'items', ...)))
return self.playlist_result(
entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography'))
# Handle for user/artist
if url_type in ('bai-hat', 'video'):
entries = self._paged_list(user_info['id'], url_type)
else:
section_id = 'aAlbum' if url_type == 'album' else 'aSingle'
entries = self._parse_items(traverse_obj(user_info, (
'sections', lambda _, v: v['sectionId'] == section_id, 'items', ...)))
return self.playlist_result(
entries, user_info['id'], join_nonempty(user_info.get('name'), url_type, delim=' - '),
user_info.get('biography'))


class ZingMp3HubIE(ZingMp3BaseIE):
Expand All @@ -403,7 +481,7 @@ class ZingMp3HubIE(ZingMp3BaseIE):
'url': 'https://zingmp3.vn/hub/Nhac-Moi/IWZ9Z0CA.html',
'info_dict': {
'id': 'IWZ9Z0CA',
'title': 'Nhạc Mới',
'title': 'BXH Nhạc Mới',
'description': 'md5:1cc31b68a6f746427b07b2756c22a558',
},
'playlist_mincount': 20,
Expand All @@ -424,3 +502,129 @@ def _real_extract(self, url):
'sections', lambda _, v: v['sectionId'] == 'hub', 'items', ...)))
return self.playlist_result(
entries, song_id, hub_detail.get('title'), hub_detail.get('description'))


class ZingMp3LiveRadioIE(ZingMp3BaseIE):
IE_NAME = 'zingmp3:liveradio'
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:liveradio))/(?P<id>\w+)(?:\.html|\?)'
_TESTS = [{
'url': 'https://zingmp3.vn/liveradio/IWZ979UB.html',
'info_dict': {
'id': 'IWZ979UB',
'title': r're:^V\-POP',
'description': 'md5:aa857f8a91dc9ce69e862a809e4bdc10',
'protocol': 'm3u8_native',
'ext': 'mp4',
'view_count': int,
'thumbnail': r're:^https?://.*\.jpg',
'like_count': int,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://zingmp3.vn/liveradio/IWZ97CWB.html',
'info_dict': {
'id': 'IWZ97CWB',
'title': r're:^Live\s247',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'protocol': 'm3u8_native',
'ext': 'm4a',
'view_count': int,
'thumbnail': r're:^https?://.*\.jpg',
'like_count': int,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}]

def _real_extract(self, url):
url_type, live_radio_id = self._match_valid_url(url).group('type', 'id')
info = self._call_api(url_type, {'id': live_radio_id})
manifest_url = info.get('streaming')
if not manifest_url:
raise ExtractorError('This radio is offline.', expected=True)
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(manifest_url, live_radio_id, fatal=False)
return {
'id': live_radio_id,
'is_live': True,
'formats': fmts,
'subtitles': subtitles,
**traverse_obj(info, {
'title': 'title',
'thumbnail': (('thumbnail', 'thumbnailM', 'thumbnailV', 'thumbnailH'), {url_or_none}),
'view_count': ('activeUsers', {int_or_none}),
'like_count': ('totalReaction', {int_or_none}),
'description': 'description',
}, get_all=False),
}


class ZingMp3PodcastEpisodeIE(ZingMp3BaseIE):
IE_NAME = 'zingmp3:podcast-episode'
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'pgr|cgr'
_TESTS = [{
'url': 'https://zingmp3.vn/pgr/Nhac-Moi-Moi-Ngay/68Z9W66B.html',
'info_dict': {
'id': '68Z9W66B',
'title': 'Nhạc Mới Mỗi Ngày',
'description': 'md5:2875dfa951f8e5356742f1610cf20691'
},
'playlist_mincount': 20,
}, {
'url': 'https://zingmp3.vn/cgr/Am-nhac/IWZ980AO.html',
'info_dict': {
'id': 'IWZ980AO',
'title': 'Âm nhạc'
},
'playlist_mincount': 2,
}]

def _fetch_page(self, eps_id, url_type, page):
return self._call_api(url_type, {
'id': eps_id,
'page': page,
'count': self._PER_PAGE
})

def _real_extract(self, url):
podcast_id, url_type = self._match_valid_url(url).group('id', 'type')
podcast_info = self._call_api(url_type, {'id': podcast_id})
entries = self._paged_list(podcast_id, 'pgr-list' if url_type == 'pgr' else 'cgr-list')
return self.playlist_result(
entries, podcast_id, podcast_info.get('title'), podcast_info.get('description'))


class ZingMp3PodcastIE(ZingMp3BaseIE):
IE_NAME = 'zingmp3:podcast'
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:cgr|top-podcast|podcast-new))/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://zingmp3.vn/cgr',
'info_dict': {
'id': 'cgr',
},
'playlist_mincount': 5,
}, {
'url': 'https://zingmp3.vn/top-podcast',
'info_dict': {
'id': 'top-podcast',
},
'playlist_mincount': 7,
}, {
'url': 'https://zingmp3.vn/podcast-new',
'info_dict': {
'id': 'podcast-new',
},
'playlist_mincount': 4,
}]

def _real_extract(self, url):
url_type = self._match_id(url)
params = {'id': url_type}
if url_type == 'podcast-new':
params['type'] = 'new'
items = self._call_api('cgrs' if url_type == 'cgr' else url_type, params)['items']
return self.playlist_result(self._parse_items(items), url_type)