Skip to content

Commit

Permalink
[extractor/generic] Decode unicode-escaped embed URLs (#5919)
Browse files Browse the repository at this point in the history
Authored by: bashonly
Closes #5854
  • Loading branch information
bashonly committed Jan 2, 2023
1 parent 32a84bc commit 05997b6
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions yt_dlp/extractor/generic.py
Expand Up @@ -2135,7 +2135,8 @@ class GenericIE(InfoExtractor):
'age_limit': 0,
'direct': True,
}
}, {
},
{
'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
'info_dict': {
Expand All @@ -2149,7 +2150,23 @@ class GenericIE(InfoExtractor):
'duration': 318.0,
'direct': True,
'age_limit': 0,
}
},
},
{
'note': 'JW Player embed with unicode-escape sequences in URL',
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
'info_dict': {
'id': 'm',
'ext': 'mp4',
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
'description': 'Mahler\'s ',
'uploader': 'www.medici.tv',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
Expand Down Expand Up @@ -2751,6 +2768,7 @@ def filter_video(urls):

entries = []
for video_url in orderedSet(found):
video_url = video_url.encode().decode('unicode-escape')
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
video_url = urllib.parse.urljoin(url, video_url)
Expand Down

0 comments on commit 05997b6

Please sign in to comment.