ytdl-org · stentrav · May 19, 2020 · May 19, 2020 · May 19, 2020
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
@@ -75,10 +76,25 @@ def _real_extract(self, url):
                         'format_id': format_id,
                         'height': int_or_none(format_id),
                     })
+        # Find the mediaDefinitions string that is json-parsable
+        # Note: This regex pattern does not necessarily match
+        # the complete json expression; the complete json may
+        # extend beyond the matching ']'. Use this regex pattern
+        # to find the start of the json expression. We don't yet
+        # know where the json expression ends.
+        mobj = re.search(r'mediaDefinition\s*:\s*(\[.+?\])', webpage)
+        doc1 = webpage[mobj.start(1):] # get json plus remaining html
+        try:
+            # Use the json decoder to find the end of the json
+            # expression. The decoder will raise an exception when it
+            # goes past the valid part. 
+            json.loads(doc1)
+        except json.JSONDecodeError as exc:
+            # Use the exception 'pos' attribute to get the complete
+            # and valid json expression
+            doc1 = doc1[0:exc.pos]
         medias = self._parse_json(
-            self._search_regex(
-                r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
-                'media definitions', default='{}'),
+            doc1,
             video_id, fatal=False)
         if medias and isinstance(medias, list):
             for media in medias: