Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/yt-dlp/yt-dlp into ytdlp
Browse files Browse the repository at this point in the history
* 'master' of https://github.com/yt-dlp/yt-dlp:
  [Piapro] Extract description with break lines
  Ignore `mhtml` formats from `-f mergeall`
  [AfreecaTV] Add `AfreecaTVUserIE` (#3286)
  [FfmpegMetadata] Write id3v1 tags
  [youtube] Fix uploader for collaborative playlists (#3332)
  [TVer] Fix extractor (#3268)
  [test] Add `test_locked_file`
  [utils] locked_file: Fix non-blocking non-exclusive lock
  • Loading branch information
Lesmiscore committed Apr 7, 2022
2 parents 45c2c39 + b52e788 commit 87279b8
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 32 deletions.
31 changes: 31 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
is_html,
js_to_json,
limit_length,
locked_file,
merge_dicts,
mimetype2ext,
month_by_name,
Expand Down Expand Up @@ -1795,6 +1796,36 @@ def test_hide_login_info(self):
self.assertEqual(Config.hide_login_info(['--username=foo']),
['--username=PRIVATE'])

def test_locked_file(self):
TEXT = 'test_locked_file\n'
FILE = 'test_locked_file.ytdl'
MODES = 'war' # Order is important

try:
for lock_mode in MODES:
with locked_file(FILE, lock_mode, False) as f:
if lock_mode == 'r':
self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
else:
f.write(TEXT)
for test_mode in MODES:
testing_write = test_mode != 'r'
try:
with locked_file(FILE, test_mode, False):
pass
except (BlockingIOError, PermissionError):
if not testing_write: # FIXME
print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
continue
self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
else:
self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
finally:
try:
os.remove(FILE)
except Exception:
pass


if __name__ == '__main__':
unittest.main()
3 changes: 2 additions & 1 deletion yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -2260,7 +2260,8 @@ def selector_function(ctx):
yield from _check_formats(ctx['formats'][::-1])
elif format_spec == 'mergeall':
def selector_function(ctx):
formats = list(_check_formats(ctx['formats']))
formats = list(_check_formats(
f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
if not formats:
return
merged_format = formats[-1]
Expand Down
58 changes: 57 additions & 1 deletion yt_dlp/extractor/afreecatv.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals

import functools
import re

from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
ExtractorError,
OnDemandPagedList,
date_from_str,
determine_ext,
ExtractorError,
int_or_none,
qualities,
traverse_obj,
Expand Down Expand Up @@ -482,3 +484,57 @@ def _real_extract(self, url):
'formats': formats,
'is_live': True,
}


class AfreecaTVUserIE(InfoExtractor):
IE_NAME = 'afreecatv:user'
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
_TESTS = [{
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - review',
},
'playlist_count': 218,
}, {
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
'info_dict': {
'_type': 'playlist',
'id': 'parang1995',
'title': 'parang1995 - highlight',
},
'playlist_count': 997,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - all',
},
'playlist_count': 221,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
'title': 'ryuryu24 - balloonclip',
},
'playlist_count': 0,
}]
_PER_PAGE = 60

def _fetch_page(self, user_id, user_type, page):
page += 1
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}')
for item in info['data']:
yield self.url_result(
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])

def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
user_type = user_type or 'all'
entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')
4 changes: 2 additions & 2 deletions yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
compat_getpass,
compat_http_client,
compat_os_name,
compat_Pattern,
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
Expand All @@ -45,7 +46,6 @@
base_url,
bug_reports_message,
clean_html,
compiled_regex_type,
determine_ext,
determine_protocol,
dict_get,
Expand Down Expand Up @@ -1270,7 +1270,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
"""
if string is None:
return None
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
elif isinstance(pattern, (str, compat_Pattern)):
mobj = re.search(pattern, string, flags)
else:
for p in pattern:
Expand Down
1 change: 1 addition & 0 deletions yt_dlp/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from .afreecatv import (
AfreecaTVIE,
AfreecaTVLiveIE,
AfreecaTVUserIE,
)
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
Expand Down
14 changes: 13 additions & 1 deletion yt_dlp/extractor/piapro.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ class PiaproIE(InfoExtractor):
'title': '裏表ラバーズ',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'note': 'There are break lines in description, mandating (?s) flag',
'url': 'https://piapro.jp/t/9cSd',
'md5': '952bb6d1e8de95050206408a87790676',
'info_dict': {
'id': '9cSd',
'ext': 'mp3',
'title': '青に溶けた風船 / 初音ミク',
'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
'uploader': 'シアン・キノ',
'uploader_id': 'cyankino',
}
}]

_login_status = False
Expand Down Expand Up @@ -81,7 +93,7 @@ def _real_extract(self, url):
return {
'id': video_id,
'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
'description': self._html_search_regex(r'<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
'uploader': uploader,
'uploader_id': uploader_id,
'timestamp': unified_timestamp(create_date, False),
Expand Down
21 changes: 8 additions & 13 deletions yt_dlp/extractor/tver.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
ExtractorError,
Expand All @@ -13,15 +11,13 @@


class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?:lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
# NOTE: episode/ is an old URL
_NEW_URL_COMPONENT = '|'.join(re.escape(f'/{x}/') for x in ('series', 'episodes'))
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ephss8yveb',
'info_dict': {
'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP',
'description': '【宮城】極厚とんかつ定食500円 マル秘女性歌手大ファン店主\n【福岡】学生感動パワー店主!!名物パワー定食って!?\n【埼玉】暴れん坊そば名人!!弟子50人に!?師弟愛シーズン3',
'description': 'md5:66985373a66fed8ad3cd595a3cfebb13',
},
'add_ie': ['BrightcoveNew'],
}, {
Expand All @@ -30,7 +26,7 @@ class TVerIE(InfoExtractor):
'info_dict': {
# sorry but this is "correct"
'title': '4月11日(月)23時06分 ~ 放送予定',
'description': '吉祥寺の格安シェアハウスに引っ越して来た高校教師の安彦聡(増田貴久)や、元ファッション誌編集長の大庭桜(田中みな実)など6人。鍵が掛かった部屋に絶対入らないことが絶対ルール。奇妙な共同生活が今始まる! テレビ東京にて4月11日(月)夜11時6分放送スタート!',
'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b',
},
'add_ie': ['BrightcoveNew'],
}, {
Expand All @@ -56,12 +52,11 @@ def _real_initialize(self):
self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))

def _real_extract(self, url):
video_id = self._match_id(url)
if not re.search(self._NEW_URL_COMPONENT, url):
webpage = self._download_webpage(
url, video_id, note='Resolving to new URL')
video_id, video_type = self._match_valid_url(url).group('id', 'type')
if video_type not in {'series', 'episodes'}:
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/.+?)"', r'&link=(https?://tver\.jp/.+?)[?&]'),
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
webpage, 'url regex'))
video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
Expand All @@ -78,7 +73,7 @@ def _real_extract(self, url):

additional_info = self._download_json(
f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
video_id,
video_id, fatal=False,
query={
'platform_uid': self._PLATFORM_UID,
'platform_token': self._PLATFORM_TOKEN,
Expand Down
27 changes: 23 additions & 4 deletions yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -4201,14 +4201,15 @@ def _extract_selected_tab(tabs, fatal=True):
if fatal:
raise ExtractorError('Unable to find selected tab')

@classmethod
def _extract_uploader(cls, data):
def _extract_uploader(self, data):
uploader = {}
renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
owner = try_get(
renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
if owner:
uploader['uploader'] = owner.get('text')
owner_text = owner.get('text')
uploader['uploader'] = self._search_regex(
r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
uploader['uploader_id'] = try_get(
owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
uploader['uploader_url'] = urljoin(
Expand Down Expand Up @@ -5228,6 +5229,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'note': 'non-standard redirect to regional channel',
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True
}, {
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': {
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'modified_date': '20220407',
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
'tags': [],
'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
'uploader': 'pukkandan',
'availability': 'unlisted',
'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
'channel': 'pukkandan',
'description': 'Test for collaborative playlist',
'title': 'yt-dlp test - collaborative playlist',
'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
},
'playlist_mincount': 2
}]

@classmethod
Expand Down
3 changes: 3 additions & 0 deletions yt_dlp/postprocessor/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,9 @@ def add(meta_list, info_list=None):
if value is not None and mobj:
metadata[mobj.group('i') or 'common'][mobj.group('key')] = value

# Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
yield ('-write_id3v1', '1')

for name, value in metadata['common'].items():
yield ('-metadata', f'{name}={value}')

Expand Down
18 changes: 8 additions & 10 deletions yt_dlp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,9 @@ def sanitize_open(filename, open_mode):
try:
try:
if sys.platform == 'win32':
# FIXME: Windows only has mandatory locking which also locks the file from being read.
# So for now, don't lock the file on windows. Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
# FIXME: An exclusive lock also locks the file from being read.
# Since windows locks are mandatory, don't lock the file on windows (for now).
# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
raise LockingUnsupportedError()
stream = locked_file(filename, open_mode, block=False).__enter__()
except LockingUnsupportedError:
Expand Down Expand Up @@ -2204,18 +2205,15 @@ def _unlock_file(f):
import fcntl

def _lock_file(f, exclusive, block):
flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
if not block:
flags |= fcntl.LOCK_NB
try:
fcntl.flock(f,
fcntl.LOCK_SH if not exclusive
else fcntl.LOCK_EX if block
else fcntl.LOCK_EX | fcntl.LOCK_NB)
fcntl.flock(f, flags)
except BlockingIOError:
raise
except OSError: # AOSP does not have flock()
fcntl.lockf(f,
fcntl.LOCK_SH if not exclusive
else fcntl.LOCK_EX if block
else fcntl.LOCK_EX | fcntl.LOCK_NB)
fcntl.lockf(f, flags)

def _unlock_file(f):
try:
Expand Down

0 comments on commit 87279b8

Please sign in to comment.