Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[extractor/nekohacker] Add extractor (#7003)
Authored by: hasezoey
- Loading branch information
Showing
2 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
import re | ||
|
||
from .common import InfoExtractor | ||
from ..utils import ( | ||
ExtractorError, | ||
determine_ext, | ||
extract_attributes, | ||
get_element_by_class, | ||
get_element_text_and_html_by_tag, | ||
parse_duration, | ||
traverse_obj, | ||
try_call, | ||
url_or_none, | ||
) | ||
|
||
|
||
class NekoHackerIE(InfoExtractor): | ||
_VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)' | ||
_TESTS = [{ | ||
'url': 'https://nekohacker.com/nekoverse/', | ||
'info_dict': { | ||
'id': 'nekoverse', | ||
'title': 'Nekoverse', | ||
}, | ||
'playlist': [ | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3', | ||
'md5': '44223701ebedba0467ebda4cc07fb3aa', | ||
'info_dict': { | ||
'id': '1712', | ||
'ext': 'mp3', | ||
'title': 'Spaceship', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20221101', | ||
'album': 'Nekoverse', | ||
'artist': 'Neko Hacker', | ||
'track': 'Spaceship', | ||
'track_number': 1, | ||
'duration': 195.0 | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3', | ||
'md5': '8f853c71719389d32bbbd3f1a87b3f08', | ||
'info_dict': { | ||
'id': '1713', | ||
'ext': 'mp3', | ||
'title': 'City Runner', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20221101', | ||
'album': 'Nekoverse', | ||
'artist': 'Neko Hacker', | ||
'track': 'City Runner', | ||
'track_number': 2, | ||
'duration': 148.0 | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3', | ||
'md5': '5a8a8ae852720cee4c0ac95c7d1a7450', | ||
'info_dict': { | ||
'id': '1714', | ||
'ext': 'mp3', | ||
'title': 'Nature Talk', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20221101', | ||
'album': 'Nekoverse', | ||
'artist': 'Neko Hacker', | ||
'track': 'Nature Talk', | ||
'track_number': 3, | ||
'duration': 174.0 | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3', | ||
'md5': 'd8e59a48061764e50d92386a294abd50', | ||
'info_dict': { | ||
'id': '1715', | ||
'ext': 'mp3', | ||
'title': 'Crystal World', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20221101', | ||
'album': 'Nekoverse', | ||
'artist': 'Neko Hacker', | ||
'track': 'Crystal World', | ||
'track_number': 4, | ||
'duration': 199.0 | ||
} | ||
} | ||
] | ||
}, { | ||
'url': 'https://nekohacker.com/susume/', | ||
'info_dict': { | ||
'id': 'susume', | ||
'title': '進め!むじなカンパニー', | ||
}, | ||
'playlist': [ | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3', | ||
'md5': 'fb13f008aa81f26ba48f91fd2d6186ce', | ||
'info_dict': { | ||
'id': '711', | ||
'ext': 'mp3', | ||
'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20210115', | ||
'album': '進め!むじなカンパニー', | ||
'artist': 'Neko Hacker', | ||
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', | ||
'track_number': 1, | ||
'duration': None | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3', | ||
'md5': '028803f70241df512b7764e73396fdd1', | ||
'info_dict': { | ||
'id': '709', | ||
'ext': 'mp3', | ||
'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20210115', | ||
'album': '進め!むじなカンパニー', | ||
'artist': 'Neko Hacker', | ||
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', | ||
'track_number': 2, | ||
'duration': None | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3', | ||
'md5': 'adde9e9a16e1da5e602b579c247d0fb9', | ||
'info_dict': { | ||
'id': '710', | ||
'ext': 'mp3', | ||
'title': '進め!むじなカンパニー (instrumental)', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20210115', | ||
'album': '進め!むじなカンパニー', | ||
'artist': 'Neko Hacker', | ||
'track': '進め!むじなカンパニー (instrumental)', | ||
'track_number': 3, | ||
'duration': None | ||
} | ||
}, | ||
{ | ||
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3', | ||
'md5': 'ebb0443039cf5f9ff7fd557ed9b23599', | ||
'info_dict': { | ||
'id': '712', | ||
'ext': 'mp3', | ||
'title': 'むじな de なじむ (instrumental)', | ||
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png', | ||
'vcodec': 'none', | ||
'acodec': 'mp3', | ||
'release_date': '20210115', | ||
'album': '進め!むじなカンパニー', | ||
'artist': 'Neko Hacker', | ||
'track': 'むじな de なじむ (instrumental)', | ||
'track_number': 4, | ||
'duration': None | ||
} | ||
} | ||
] | ||
}] | ||
|
||
def _real_extract(self, url): | ||
playlist_id = self._match_id(url) | ||
|
||
webpage = self._download_webpage(url, playlist_id) | ||
playlist = get_element_by_class('playlist', webpage) | ||
|
||
if not playlist: | ||
iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or '' | ||
iframe_src = url_or_none(extract_attributes(iframe).get('src')) | ||
if not iframe_src: | ||
raise ExtractorError('No playlist or embed found in webpage') | ||
elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src): | ||
raise ExtractorError('Spotify embeds are not supported', expected=True) | ||
return self.url_result(url, 'Generic') | ||
|
||
entries = [] | ||
for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1): | ||
entry = traverse_obj(extract_attributes(track), { | ||
'url': ('data-audiopath', {url_or_none}), | ||
'ext': ('data-audiopath', {determine_ext}), | ||
'id': 'data-trackid', | ||
'title': 'data-tracktitle', | ||
'track': 'data-tracktitle', | ||
'album': 'data-albumtitle', | ||
'duration': ('data-tracktime', {parse_duration}), | ||
'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0), | ||
'thumbnail': ('data-albumart', {url_or_none}), | ||
}) | ||
entries.append({ | ||
**entry, | ||
'track_number': track_number, | ||
'artist': 'Neko Hacker', | ||
'vcodec': 'none', | ||
'acodec': 'mp3' if entry['ext'] == 'mp3' else None, | ||
}) | ||
|
||
return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album'))) |