Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ie/Vbox7] Fix extractor #9100

Merged
merged 7 commits into from Jan 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
82 changes: 43 additions & 39 deletions yt_dlp/extractor/vbox7.py
@@ -1,5 +1,6 @@
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import ExtractorError, base_url, int_or_none, url_basename
from ..utils.traversal import traverse_obj


class Vbox7IE(InfoExtractor):
Expand All @@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
'md5': '50ca1f78345a9c15391af47d8062d074',
'info_dict': {
'id': '0946fff23c',
'ext': 'mp4',
Expand All @@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
'timestamp': 1470982814,
'upload_date': '20160812',
'uploader': 'zdraveibulgaria',
},
'params': {
'proxy': '127.0.0.1:8118',
'view_count': int,
'duration': 2640,
},
}, {
'url': 'http://vbox7.com/play:249bb972c2',
'md5': '99f65c0c9ef9b682b97313e052734c3f',
'md5': 'da1dd2eb245200cb86e6d09d43232116',
'info_dict': {
'id': '249bb972c2',
'ext': 'mp4',
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
'uploader': 'svideteliat_ot_varshava',
'view_count': int,
'timestamp': 1360215023,
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
Copy link
Contributor

@dirkf dirkf Feb 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really? noimg6.png isn't a likely name for a useful image (and the image isn't). Should it be filtered out?
I get:

            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',

and that is an image that looks like a frame of the video.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this to #8968. I am not sure why the API responded like this when I wrote the extractor. Maybe the image took some time to be generated?
We will have to see how many of the videos remain in ~4 days, when the site will supposedly delete most user-generated content.

'description': 'Смях! Чудо - чист за секунди - Скрита камера',
'upload_date': '20130207',
'duration': 83,
},
'skip': 'georestricted',
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True,
Expand All @@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)

response = self._download_json(
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
video_id)

if 'error' in response:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)

video = response['options']

title = video['title']
video_url = video['src']

if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
data = self._download_json(
'https://www.vbox7.com/aj/player/item/options', video_id,
query={'vid': video_id})['options']

uploader = video.get('uploader')
src_url = data.get('src')
if src_url in (None, '', 'blank'):
raise ExtractorError('Video is unavailable', expected=True)

webpage = self._download_webpage(
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
if fmt_base == 'vn':
self.raise_geo_restricted()

info = {}
fmt_base = base_url(src_url) + fmt_base

if webpage:
info = self._search_json_ld(
webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False)
formats = self._extract_m3u8_formats(
f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
# TODO: Add MPD formats, when dash range support is added
for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
formats.append({
'url': f'{fmt_base}_{res}.mp4',
'format_id': f'http-{res}',
'height': res,
})

info.update({
return {
'id': video_id,
'title': title,
'url': video_url,
'uploader': uploader,
'thumbnail': self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage),
'http:'),
})
return info
'formats': formats,
**self._search_json_ld(self._download_webpage(
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
**traverse_obj(data, {
'title': ('title', {str}),
'uploader': ('uploader', {str}),
'duration': ('duration', {int_or_none}),
}),
}