From baf7ab7ef5ae57897800bbc7f58b7574950795fe Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Wed, 13 Nov 2019 18:50:33 +0000 Subject: [PATCH 1/2] [pornhub] Fix PC page video extraction --- youtube_dl/extractor/pornhub.py | 54 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ba0ad7da29d..beae14c9805 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -173,6 +173,32 @@ def dl_webpage(platform): 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), video_id, 'Downloading %s webpage' % platform) + def parse_js(_webpage, _regex): + js_vars = {} + assignments = self._search_regex( + _regex, _webpage, + 'encoded url').split(';') + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + return js_vars + webpage = dl_webpage('pc') error_msg = self._html_search_regex( @@ -212,13 +238,14 @@ def dl_webpage(platform): thumbnail = flashvars.get('image_url') duration = int_or_none(flashvars.get('video_duration')) media_definitions = flashvars.get('mediaDefinitions') + js_vars = parse_js(webpage, r'(var.+?rahttps.+?)\n') if isinstance(media_definitions, list): for definition in media_definitions: if not isinstance(definition, dict): continue video_url = definition.get('videoUrl') if not video_url or not isinstance(video_url, compat_str): - continue + video_url = js_vars["quality_%sp" % (definition.get("quality"))] if video_url in video_urls_set: continue video_urls_set.add(video_url) @@ -230,30 +257,7 @@ def dl_webpage(platform): if not video_urls: tv_webpage = dl_webpage('tv') - assignments = self._search_regex( - r'(var.+?mediastring.+?)', tv_webpage, - 'encoded url').split(';') - - js_vars = {} - - def parse_js_value(inp): - inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) - if '+' in inp: - inps = inp.split('+') - return functools.reduce( - operator.concat, map(parse_js_value, inps)) - inp = inp.strip() - if inp in js_vars: - return js_vars[inp] - return remove_quotes(inp) - - for assn in assignments: - assn = assn.strip() - if not assn: - continue - assn = re.sub(r'var\s+', '', assn) - vname, value = assn.split('=', 1) - js_vars[vname] = parse_js_value(value) + js_vars = parse_js(tv_webpage, r'(var.+?mediastring.+?)') video_url = js_vars['mediastring'] if video_url not in video_urls_set: From c004bcf78d5f97ba84de2b703bfc678cf68465f6 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Thu, 14 Nov 2019 10:20:19 +0000 Subject: [PATCH 2/2] [pornhub] Fix error on quality higher than 1080p --- youtube_dl/extractor/pornhub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index beae14c9805..15c64bf9a17 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -245,7 +245,10 @@ def parse_js_value(inp): continue video_url = definition.get('videoUrl') if not video_url or not isinstance(video_url, compat_str): - video_url = js_vars["quality_%sp" % (definition.get("quality"))] + js_vars_qual = js_vars.get('quality_%sp' % (definition.get('quality'))) + if not js_vars_qual: + continue + video_url = js_vars_qual if video_url in video_urls_set: continue video_urls_set.add(video_url)