Skip to content

Commit

Permalink
[sites:zoom] Add support for Zoom.us (Closes #159)
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Nov 30, 2022
1 parent 2c92ed5 commit e6adb4b
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ Supported sites:
- YouTube.com - Livestreams, past broadcasts and premieres.
- Twitch.tv - Livestreams, past broadcasts and clips.
- Reddit.com - Livestreams, past broadcasts
- Zoom.us - Past broadcasts
- Facebook.com (currently in development) - Livestreams and past
broadcasts.

Expand Down
2 changes: 1 addition & 1 deletion chat_downloader/sites/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from .youtube import YouTubeChatDownloader
from .twitch import TwitchChatDownloader
# from .facebook import FacebookChatDownloader
from .reddit import RedditChatDownloader
from .zoom import ZoomChatDownloader
from .common import BaseChatDownloader


Expand Down
156 changes: 156 additions & 0 deletions chat_downloader/sites/zoom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@


import json
import re
from .common import (
BaseChatDownloader,
Chat,
Remapper as r
)
from ..utils.core import (
time_to_seconds,
regex_search,
ensure_seconds
)
from ..errors import (
SiteError,
ParsingError
)

# TODO add debugging options
# from ..debugging import (
# log,
# debug_log
# )


class ZoomError(SiteError):
"""Raised when an error occurs with a Zoom video."""
pass


class ZoomChatDownloader(BaseChatDownloader):
_NAME = 'zoom.us'

_ZOOM_HOMEPAGE = 'https://zoom.us/'
_ZOOM_PATH_TEMPLATE = 'rec/play/{id}'

_INITIAL_INFO_REGEX = r'(?s)window\.__data__\s*=\s*({.+?});'
_CHAT_MESSAGES_REGEX = r'window\.__data__\.chatList\.push\((\{[\s\S]+?\})\)'

_SITE_DEFAULT_PARAMS = {
'format': 'default', # TODO create zoom format
}

_REMAPPING = {
'username': 'author_name',
'time': 'time_text',
'content': 'message',
}

_TESTS = [
{
'name': 'Get chat messages from past broadcast #1',
'params': {
'url': 'https://zoom.us/rec/play/6ccrIuigqG83GIaT4wSDAv59W9W5J_-s1HUe_6UPykq3V3hVN1emMucTYLEJiA87rIkEPcGptB0Dp_dH',
'max_messages': 10
},
'expected_result': {
'messages_condition': lambda messages: len(messages) > 0,
}
},
{
'name': 'Get chat messages from past broadcast #2',
'params': {
'url': 'https://zoom.us/rec/play/65V5deGq-Do3T9bHuASDAv4tW420f_ms1iIb-vIKzEqzUiEFNFWiYONAN-vRvNmKnlg6z95Y4mNQ9QJQ',
'max_messages': 10
},
'expected_result': {
'messages_condition': lambda messages: len(messages) > 0,
}
},
{
'name': 'Get chat messages from past broadcast #3',
'params': {
'url': 'https://zoom.us/rec/play/75Usc7j8rjg3E92S4gSDAf95W9S9K6-sg3dP_voImR60WiEHYVSmYrsbNwNE1_6-jwlwLx5cg1IeyjM',
'max_messages': 10
},
'expected_result': {
'messages_condition': lambda messages: len(messages) > 0,
}
},
{
'name': 'Invalid video',
'params': {
'url': 'https://zoom.us/rec/play/invalid',
},
'expected_result': {
'error': ZoomError
}
},
]

# Regex provided by youtube-dl
_VALID_URLS = {
'_get_chat_by_video_id': r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)',
}
_ERROR_MESSAGE_REGEX = r'<span class="error-message">\s*([^<]+?)\s*<\/span>'

def _get_chat_by_video_id(self, match, params):
match_id = match.group('id')
base_url = match.group('base_url')
return self.get_chat_by_video_id(match_id, params, base_url=base_url)

def get_chat_by_video_id(self, video_id, params, base_url=_ZOOM_HOMEPAGE):

url = base_url + self._ZOOM_PATH_TEMPLATE.format(id=video_id)
page_data = self._session_get(url).text

json_string = regex_search(page_data, self._INITIAL_INFO_REGEX)

if json_string is None:
error_message = regex_search(page_data, self._ERROR_MESSAGE_REGEX)
if error_message:
raise ZoomError(error_message.split('\n')[0])
else:
raise ParsingError('Error parsing video')

initial_info = self._parse_js_dict(json_string)

video_type = 'video' if initial_info.get('isVideo') else 'not_video'

return Chat(
self._get_chat_messages(page_data, params),

title=initial_info.get('topic'),
video_type=video_type,
start_time=initial_info.get('fileStartTime'),
id=initial_info.get('recordingId'),
)

def _parse_js_dict(self, json_string):
# Helper method to parse JS dictionary format
result = re.sub(r"^([^:\s]+):\s+", r'"\g<1>": ',
json_string, 0, re.MULTILINE)
result = result.replace(r"\'", "'")
result = re.sub(r":\s+'(.*)'", ": \"\\g<1>\"", result, 0, re.MULTILINE)
return json.loads(result)

def _get_chat_messages(self, page_data, params):
start_time = ensure_seconds(params.get('start_time'), 0)
end_time = ensure_seconds(params.get('end_time'), float('inf'))

for item in re.findall(self._CHAT_MESSAGES_REGEX, page_data):
data = self._parse_js_dict(item)
data = r.remap_dict(data, self._REMAPPING)

# Process time inforamtion
data['time_in_seconds'] = time_to_seconds(data['time_text'])
if data['time_in_seconds'] < start_time:
continue

if data['time_in_seconds'] > end_time:
return

BaseChatDownloader._move_to_dict(data, 'author')
yield data
1 change: 1 addition & 0 deletions docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ Supported sites:
- YouTube.com - Livestreams, past broadcasts and premieres.
- Twitch.tv - Livestreams, past broadcasts and clips.
- Reddit.com - Livestreams, past broadcasts
- Zoom.us - Past broadcasts
- Facebook.com (currently in development) - Livestreams and past
broadcasts.

Expand Down
17 changes: 17 additions & 0 deletions docs/source/sites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,23 @@
:undoc-members:
:show-inheritance:

:py:class:`RedditChatDownloader`
********************************

.. autoclass:: chat_downloader.sites.RedditChatDownloader
:members:
:undoc-members:
:show-inheritance:


:py:class:`ZoomChatDownloader`
********************************

.. autoclass:: chat_downloader.sites.ZoomChatDownloader
:members:
:undoc-members:
:show-inheritance:


:py:class:`BaseChatDownloader`
******************************
Expand Down

0 comments on commit e6adb4b

Please sign in to comment.