Skip to content

Commit

Permalink
[sites:youtube] Fix authentication issue for YouTube member-only live…
Browse files Browse the repository at this point in the history
…streams (Closes #76)

Special thanks to @Lytexx and @tam-carre for bringing up this issue and helping with testing!
  • Loading branch information
xenova committed Jul 4, 2021
2 parents 3c30ead + fab95cb commit 4722fc4
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 42 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Custom
testing/*
cookies.txt

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
124 changes: 82 additions & 42 deletions chat_downloader/sites/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@

from ..debugging import log

import time
import random
import re
import hashlib
from requests.exceptions import RequestException
from json.decoder import JSONDecodeError
from urllib import parse
from datetime import datetime
from base64 import b64decode


class YouTubeChatDownloader(BaseChatDownloader):
Expand Down Expand Up @@ -303,9 +304,7 @@ def __init__(self, **kwargs):
_YT_VIDEO_TEMPLATE = _YT_HOME + '/watch?v={}'

_YOUTUBE_INIT_API_TEMPLATE = _YT_HOME + '/{}?continuation={}'
_YOUTUBE_CHAT_API_TEMPLATE = _YT_HOME + \
b64decode(
'L3lvdXR1YmVpL3YxL2xpdmVfY2hhdC9nZXRfe30/a2V5PUFJemFTeUFPX0ZKMlNscVU4UTRTVEVITEdDaWx3X1k5XzExcWNXOA==').decode()
_YOUTUBE_CHAT_API_TEMPLATE = _YT_HOME + '/youtubei/v1/live_chat/get_{}?key={}'

_MESSAGE_GROUPS = {
'messages': [
Expand Down Expand Up @@ -1049,23 +1048,6 @@ def get_playlist_items(self, playlist_url):
if playlist_video:
yield self._parse_video(playlist_video)

# "continuationItemRenderer":{
# "trigger":"CONTINUATION_TRIGGER_ON_ITEM_SHOWN",
# "continuationEndpoint":{
# "clickTrackingParams":"CCgQ7zsYACITCNT0zMKim-4CFU-V1QodgR4KyA==",
# "commandMetadata":{
# "webCommandMetadata":{
# "sendPost":true,
# "apiUrl":"/youtubei/v1/browse"
# }
# },
# "continuationCommand":{
# "token":"4qmFsgJhEiRWTFBMRXJ1a1gxVzFPWWpGeDJwRzh6aldpTXVQTUcwRi1MYkkaFENBRjZCbEJVT2tOSFVRJTNEJTNEmgIiUExFcnVrWDFXMU9ZakZ4MnBHOHpqV2lNdVBNRzBGLUxiSQ%3D%3D",
# "request":"CONTINUATION_REQUEST_TYPE_BROWSE"
# }
# }
# }

_CONSENT_ID_REGEX = r'PENDING\+(\d+)'
# https://github.com/ytdl-org/youtube-dl/blob/a8035827177d6b59aca03bd717acb6a9bdd75ada/youtube_dl/extractor/youtube.py#L251

Expand All @@ -1090,6 +1072,25 @@ def _initialize_consent(self):
self.set_cookie_value('.youtube.com', 'CONSENT',
'YES+cb.20210328-17-p0.en+FX+{}'.format(consent_id))

def _generate_sapisidhash_header(self):
sapis_id = self.get_cookie_value('SAPISID')
sapisid_cookie = self.get_cookie_value('__Secure-3PAPISID') or sapis_id

if sapisid_cookie is None:
return

time_now = round(time.time())

# SAPISID cookie is required if not already present
if not sapis_id:
self.set_cookie_value(
'.youtube.com', 'SAPISID', sapisid_cookie, secure=True, expire_time=time_now + 3600)

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
sapisidhash = hashlib.sha1(
f'{time_now} {sapisid_cookie} {self._YT_HOME}'.encode('utf-8')).hexdigest()
return f'SAPISIDHASH {time_now}_{sapisidhash}'

def _get_initial_info(self, url):
html = self._session_get(url).text
yt = re.search(self._YT_INITIAL_DATA_RE, html)
Expand All @@ -1106,6 +1107,11 @@ def _get_initial_video_info(self, video_id):
raise ParsingError(
'Unable to parse initial video data. {}'.format(html))

details = {}

cfg = re.search(self._YT_CFG_RE, html)
details['ytcfg'] = try_parse_json(cfg.group(1)) if cfg else {}

player_response = re.search(self._YT_INITIAL_PLAYER_RESPONSE_RE, html)
player_response_info = try_parse_json(
player_response.group(1)) if player_response else None
Expand All @@ -1118,9 +1124,8 @@ def _get_initial_video_info(self, video_id):
formats = streaming_data.get(
'adaptiveFormats') or streaming_data.get('formats')

details = {
'start_time': float_or_none(multi_get(formats, 0, 'lastModified')),
}
details['start_time'] = float_or_none(
multi_get(formats, 0, 'lastModified'))

# Try to get continuation info
contents = yt_initial_data.get('contents') or {}
Expand Down Expand Up @@ -1229,19 +1234,12 @@ def _get_chat_messages(self, initial_info, params):
if not is_live:
api_type += '_replay'

init_page = self._YOUTUBE_INIT_API_TEMPLATE.format(
api_type, continuation)
# must run to get first few messages, otherwise might miss some
html, yt_info = self._get_initial_info(init_page)
yt_cfg_data = initial_info.get('ytcfg')

cfg = re.search(self._YT_CFG_RE, html)
yt_cfg_data = try_parse_json(cfg.group(1)) if cfg else {}

continuation_params = {
'context': yt_cfg_data.get('INNERTUBE_CONTEXT') or {}
}
api_key = yt_cfg_data.get('INNERTUBE_API_KEY')

continuation_url = self._YOUTUBE_CHAT_API_TEMPLATE.format(api_type)
continuation_url = self._YOUTUBE_CHAT_API_TEMPLATE.format(
api_type, api_key)
offset_milliseconds = (
start_time * 1000) if isinstance(start_time, (float, int)) else None

Expand All @@ -1262,29 +1260,69 @@ def _get_chat_messages(self, initial_info, params):
self.check_for_invalid_types(
messages_types_to_add, self._MESSAGE_TYPES)

click_tracking_params = None

innertube_context = yt_cfg_data.get('INNERTUBE_CONTEXT') or {}

self.update_session_headers({
'x-goog-visitor-id': multi_get(innertube_context, 'client', 'visitorData'),
'x-youtube-client-name': str(yt_cfg_data.get('INNERTUBE_CONTEXT_CLIENT_NAME')),
'x-youtube-client-version': str(yt_cfg_data.get('INNERTUBE_CLIENT_VERSION')),
'origin': self._YT_HOME,
'content-type': 'application/json'
})

message_count = 0
first_time = True
while True:
continuation_params = {
'context': innertube_context,
'continuation': continuation
}

# Update authentication header, if necessary
auth = self._generate_sapisidhash_header()
if auth:
self.update_session_headers({
'authorization': auth,
'x-origin': self._YT_HOME
})

info = None
for attempt_number in attempts(max_attempts):

try:
if first_time:
# must run to get first few messages, otherwise might miss some
init_page = self._YOUTUBE_INIT_API_TEMPLATE.format(
api_type, continuation)
html, yt_info = self._get_initial_info(init_page)

if not first_time:

continuation_params['continuation'] = continuation

else:
if not is_live and offset_milliseconds is not None:
continuation_params['currentPlayerState'] = {
'playerOffsetMs': offset_milliseconds}
log('debug', 'Continuation: {}'.format(continuation))

if click_tracking_params:
continuation_params['context']['clickTracking'] = {
'clickTrackingParams': click_tracking_params}

yt_info = self._session_post(
continuation_url, json=continuation_params).json()

debug_info = {
'click_tracking': multi_get(continuation_params, 'context', 'clickTracking'),
'continuation': multi_get(continuation_params, 'continuation')
}
log('debug', 'Continuation: {}'.format(debug_info))

info = multi_get(
yt_info, 'continuationContents', 'liveChatContinuation')

logged_in_info = multi_get(
yt_info, 'responseContext', 'serviceTrackingParams', 1, 'params', 0)
log('debug', 'Logged-in info: {}'.format(logged_in_info))

if not info:
log('debug', 'No continuation information found: {}'.format(
yt_info))
Expand All @@ -1294,8 +1332,6 @@ def _get_chat_messages(self, initial_info, params):

except (JSONDecodeError, RequestException) as e:
self.retry(attempt_number, max_attempts, e, retry_timeout)
self.clear_cookies()

continue

actions = info.get('actions') or []
Expand Down Expand Up @@ -1517,12 +1553,16 @@ def _get_chat_messages(self, initial_info, params):
continuation_key = try_get_first_key(cont)
continuation_info = cont[continuation_key]

log('debug', 'Continuation info: {}'.format(continuation_info))

if continuation_key in self._KNOWN_CHAT_CONTINUATIONS:

# set new chat continuation
# overwrite if there is continuation data
continuation = continuation_info.get('continuation')

click_tracking_params = continuation_info.get(
'clickTrackingParams') or continuation_info.get('trackingParams')
# there is a chat continuation
no_continuation = False

Expand Down

0 comments on commit 4722fc4

Please sign in to comment.