Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rt media #25321

Closed
wants to merge 3 commits into from
Closed

Rt media #25321

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions youtube_dl/extractor/redtube.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor
Expand Down Expand Up @@ -75,10 +76,25 @@ def _real_extract(self, url):
'format_id': format_id,
'height': int_or_none(format_id),
})
# Find the mediaDefinitions string that is json-parsable
# Note: This regex pattern does not necessarily match
# the complete json expression; the complete json may
# extend beyond the matching ']'. Use this regex pattern
# to find the start of the json expression. We don't yet
# know where the json expression ends.
mobj = re.search(r'mediaDefinition\s*:\s*(\[.+?\])', webpage)
doc1 = webpage[mobj.start(1):] # get json plus remaining html
try:
# Use the json decoder to find the end of the json
# expression. The decoder will raise an exception when it
# goes past the valid part.
json.loads(doc1)
except json.JSONDecodeError as exc:
# Use the exception 'pos' attribute to get the complete
# and valid json expression
doc1 = doc1[0:exc.pos]
medias = self._parse_json(
self._search_regex(
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
'media definitions', default='{}'),
doc1,
video_id, fatal=False)
if medias and isinstance(medias, list):
for media in medias:
Expand Down