Skip to content

Commit

Permalink
message: Do not include details of inaccessible users in message data.
Browse files Browse the repository at this point in the history
This commit adds code to not include original details of senders like
name, email and avatar url in the message objects sent through events
and in the response of endpoint used to fetch messages.

This is the last major commit for the project to add support for
limiting guest access to an entire organization.

Fixes #10970.
  • Loading branch information
sahil839 authored and timabbott committed Dec 10, 2023
1 parent 22d59f1 commit 1985685
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 12 deletions.
28 changes: 27 additions & 1 deletion zerver/actions/message_send.py
Expand Up @@ -72,7 +72,7 @@
num_subscribers_for_stream_id,
)
from zerver.lib.stream_topic import StreamTopicTarget
from zerver.lib.streams import access_stream_for_send_message, ensure_stream
from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream
from zerver.lib.string_validation import check_stream_name
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.lib.topic import participants_for_topic
Expand All @@ -83,7 +83,9 @@
check_user_can_access_all_users,
get_accessible_user_ids,
get_subscribers_of_target_user_subscriptions,
get_user_ids_who_can_access_user,
get_users_involved_in_dms_with_target_users,
user_access_restricted_in_realm,
)
from zerver.lib.validator import check_widget_content
from zerver.lib.widget import do_widget_post_save_actions
Expand Down Expand Up @@ -1104,6 +1106,7 @@ class UserData(TypedDict):
muted_sender_user_ids=list(send_request.muted_sender_user_ids),
all_bot_user_ids=list(send_request.all_bot_user_ids),
disable_external_notifications=send_request.disable_external_notifications,
realm_host=send_request.realm.host,
)

if send_request.message.is_stream_message():
Expand All @@ -1123,6 +1126,29 @@ class UserData(TypedDict):
if send_request.stream.first_message_id is None:
send_request.stream.first_message_id = send_request.message.id
send_request.stream.save(update_fields=["first_message_id"])

# Performance note: This check can theoretically do
# database queries in a loop if many messages are being
# sent via a single do_send_messages call.
#
# This is not a practical concern at present, because our
# only use case for bulk-sending messages via this API
# endpoint is for direct messages bulk-sent by system
# bots; and for system bots,
# "user_access_restricted_in_realm" will always return
# False without doing any database queries at all.
if user_access_restricted_in_realm(
send_request.message.sender
) and not subscribed_to_stream(send_request.message.sender, send_request.stream.id):
user_ids_who_can_access_sender = get_user_ids_who_can_access_user(
send_request.message.sender
)
user_ids_receiving_event = {user["id"] for user in users}
user_ids_without_access_to_sender = user_ids_receiving_event - set(
user_ids_who_can_access_sender
)
event["user_ids_without_access_to_sender"] = user_ids_without_access_to_sender

if send_request.local_id is not None:
event["local_id"] = send_request.local_id
if send_request.sender_queue_id is not None:
Expand Down
1 change: 1 addition & 0 deletions zerver/lib/email_mirror.py
Expand Up @@ -120,6 +120,7 @@ def get_usable_missed_message_address(address: str) -> MissedMessageEmailAddress
mm_address = MissedMessageEmailAddress.objects.select_related(
"user_profile",
"user_profile__realm",
"user_profile__realm__can_access_all_users_group",
"message",
"message__sender",
"message__recipient",
Expand Down
55 changes: 49 additions & 6 deletions zerver/lib/message.py
Expand Up @@ -2,6 +2,7 @@
import zlib
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from email.headerregistry import Address
from typing import (
Any,
Collection,
Expand All @@ -28,7 +29,7 @@

from analytics.lib.counts import COUNT_STATS
from analytics.models import RealmCount
from zerver.lib.avatar import get_avatar_field
from zerver.lib.avatar import get_avatar_field, get_avatar_for_inaccessible_user
from zerver.lib.cache import (
cache_set_many,
cache_with_key,
Expand Down Expand Up @@ -60,6 +61,7 @@
from zerver.lib.url_preview.types import UrlEmbedData
from zerver.lib.user_groups import is_user_in_group
from zerver.lib.user_topics import build_get_topic_visibility_policy, get_topic_visibility_policy
from zerver.lib.users import get_inaccessible_user_ids
from zerver.models import (
MAX_TOPIC_NAME_LENGTH,
Message,
Expand All @@ -74,6 +76,7 @@
UserProfile,
UserTopic,
get_display_recipient_by_id,
get_fake_email_domain,
get_usermessage_by_message_id,
query_for_ids,
)
Expand Down Expand Up @@ -245,6 +248,8 @@ def messages_for_ids(
apply_markdown: bool,
client_gravatar: bool,
allow_edit_history: bool,
user_profile: Optional[UserProfile],
realm: Realm,
) -> List[Dict[str, Any]]:
cache_transformer = MessageDict.build_dict_from_raw_db_row
id_fetcher = lambda row: row["id"]
Expand All @@ -261,6 +266,9 @@ def messages_for_ids(

message_list: List[Dict[str, Any]] = []

sender_ids = [message_dicts[message_id]["sender_id"] for message_id in message_ids]
inaccessible_sender_ids = get_inaccessible_user_ids(sender_ids, user_profile)

for message_id in message_ids:
msg_dict = message_dicts[message_id]
flags = user_message_flags[message_id]
Expand All @@ -278,9 +286,10 @@ def messages_for_ids(
# in realms with allow_edit_history disabled.
if "edit_history" in msg_dict and not allow_edit_history:
del msg_dict["edit_history"]
msg_dict["can_access_sender"] = msg_dict["sender_id"] not in inaccessible_sender_ids
message_list.append(msg_dict)

MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar, realm)

return message_list

Expand Down Expand Up @@ -389,7 +398,10 @@ def wide_dict(message: Message, realm_id: Optional[int] = None) -> Dict[str, Any

@staticmethod
def post_process_dicts(
objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool
objs: List[Dict[str, Any]],
apply_markdown: bool,
client_gravatar: bool,
realm: Realm,
) -> None:
"""
NOTE: This function mutates the objects in
Expand All @@ -403,7 +415,15 @@ def post_process_dicts(
MessageDict.bulk_hydrate_recipient_info(objs)

for obj in objs:
MessageDict.finalize_payload(obj, apply_markdown, client_gravatar, skip_copy=True)
can_access_sender = obj.get("can_access_sender", True)
MessageDict.finalize_payload(
obj,
apply_markdown,
client_gravatar,
skip_copy=True,
can_access_sender=can_access_sender,
realm_host=realm.host,
)

@staticmethod
def finalize_payload(
Expand All @@ -412,6 +432,8 @@ def finalize_payload(
client_gravatar: bool,
keep_rendered_content: bool = False,
skip_copy: bool = False,
can_access_sender: bool = True,
realm_host: str = "",
) -> Dict[str, Any]:
"""
By default, we make a shallow copy of the incoming dict to avoid
Expand All @@ -428,7 +450,20 @@ def finalize_payload(
# here if the current user's role has access to the target user's email address.
client_gravatar = False

MessageDict.set_sender_avatar(obj, client_gravatar)
if not can_access_sender:
# Enforce inability to access details of inaccessible
# users. We should be able to remove the realm_host and
# can_access_user plumbing to this function if/when we
# shift the Zulip API to not send these denormalized
# fields about message senders favor of just sending the
# sender's user ID.
obj["sender_full_name"] = str(UserProfile.INACCESSIBLE_USER_NAME)
sender_id = obj["sender_id"]
obj["sender_email"] = Address(
username=f"user{sender_id}", domain=get_fake_email_domain(realm_host)
).addr_spec

MessageDict.set_sender_avatar(obj, client_gravatar, can_access_sender)
if apply_markdown:
obj["content_type"] = "text/html"
obj["content"] = obj["rendered_content"]
Expand All @@ -446,6 +481,8 @@ def finalize_payload(
del obj["recipient_type_id"]
del obj["sender_is_mirror_dummy"]
del obj["sender_email_address_visibility"]
if "can_access_sender" in obj:
del obj["can_access_sender"]
return obj

@staticmethod
Expand Down Expand Up @@ -734,7 +771,13 @@ def bulk_hydrate_recipient_info(objs: List[Dict[str, Any]]) -> None:
MessageDict.hydrate_recipient_info(obj, display_recipients[obj["recipient_id"]])

@staticmethod
def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None:
def set_sender_avatar(
obj: Dict[str, Any], client_gravatar: bool, can_access_sender: bool = True
) -> None:
if not can_access_sender:
obj["avatar_url"] = get_avatar_for_inaccessible_user()
return

sender_id = obj["sender_id"]
sender_realm_id = obj["sender_realm_id"]
sender_delivery_email = obj["sender_delivery_email"]
Expand Down
29 changes: 29 additions & 0 deletions zerver/tests/test_events.py
Expand Up @@ -1059,6 +1059,35 @@ def test_send_message_to_existing_recipient(self) -> None:
state_change_expected=True,
)

def test_events_for_message_from_inaccessible_sender(self) -> None:
reset_email_visibility_to_everyone_in_zulip_realm()
self.set_up_db_for_testing_user_access()
othello = self.example_user("othello")
self.user_profile = self.example_user("polonius")

events = self.verify_action(
lambda: self.send_stream_message(
othello, "test_stream1", "hello 2", allow_unsubscribed_sender=True
),
)
check_message("events[0]", events[0])
message_obj = events[0]["message"]
self.assertEqual(message_obj["sender_full_name"], "Unknown user")
self.assertEqual(message_obj["sender_email"], f"user{othello.id}@zulip.testserver")
self.assertTrue(message_obj["avatar_url"].endswith("images/unknown-user-avatar.png"))

iago = self.example_user("iago")
events = self.verify_action(
lambda: self.send_stream_message(
iago, "test_stream1", "hello 2", allow_unsubscribed_sender=True
),
)
check_message("events[0]", events[0])
message_obj = events[0]["message"]
self.assertEqual(message_obj["sender_full_name"], iago.full_name)
self.assertEqual(message_obj["sender_email"], iago.delivery_email)
self.assertIsNone(message_obj["avatar_url"])

def test_add_reaction(self) -> None:
message_id = self.send_stream_message(self.example_user("hamlet"), "Verona", "hello")
message = Message.objects.get(id=message_id)
Expand Down
57 changes: 56 additions & 1 deletion zerver/tests/test_message_dict.py
Expand Up @@ -101,6 +101,7 @@ def get_fetch_payload(
[unhydrated_dict],
apply_markdown=apply_markdown,
client_gravatar=client_gravatar,
realm=get_realm("zulip"),
)
final_dict = unhydrated_dict
return final_dict
Expand Down Expand Up @@ -180,7 +181,9 @@ def test_bulk_message_fetching(self) -> None:
rows = list(MessageDict.get_raw_db_rows(ids))

objs = [MessageDict.build_dict_from_raw_db_row(row) for row in rows]
MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False)
MessageDict.post_process_dicts(
objs, apply_markdown=False, client_gravatar=False, realm=realm
)

self.assert_length(rows, num_ids)

Expand Down Expand Up @@ -422,6 +425,8 @@ def test_messages_for_ids(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)

self.assert_length(messages, 2)
Expand All @@ -438,6 +443,46 @@ def test_messages_for_ids(self) -> None:
self.assertIn('class="user-mention"', new_message["content"])
self.assertEqual(new_message["flags"], ["mentioned"])

def test_message_for_ids_for_restricted_user_access(self) -> None:
self.set_up_db_for_testing_user_access()
hamlet = self.example_user("hamlet")
self.send_stream_message(
hamlet,
"test_stream1",
topic_name="test",
content="test message again",
)
realm = get_realm("zulip")
stream = get_stream("test_stream1", realm)

assert stream.recipient_id is not None
message_ids = Message.objects.filter(
recipient_id=stream.recipient_id, realm=realm
).values_list("id", flat=True)
self.assert_length(message_ids, 2)

user_message_flags = {
message_ids[0]: ["read", "historical"],
message_ids[1]: ["read"],
}

messages = messages_for_ids(
message_ids=list(message_ids),
user_message_flags=user_message_flags,
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=self.example_user("polonius"),
realm=realm,
)
(inaccessible_sender_msg,) = (msg for msg in messages if msg["sender_id"] != hamlet.id)
self.assertEqual(inaccessible_sender_msg["sender_id"], self.example_user("othello").id)
self.assertEqual(inaccessible_sender_msg["sender_full_name"], "Unknown user")
self.assertTrue(
inaccessible_sender_msg["avatar_url"].endswith("images/unknown-user-avatar.png")
)

def test_display_recipient_up_to_date(self) -> None:
"""
This is a test for a bug where due to caching of message_dicts,
Expand Down Expand Up @@ -473,6 +518,8 @@ def test_display_recipient_up_to_date(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
message = messages[0]

Expand Down Expand Up @@ -516,6 +563,8 @@ def test_display_recipient_personal(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)

self._verify_display_recipient(messages[0]["display_recipient"], [hamlet, cordelia])
Expand All @@ -537,6 +586,8 @@ def test_display_recipient_stream(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)

self.assertEqual(messages[0]["display_recipient"], "Verona")
Expand All @@ -559,6 +610,8 @@ def test_display_recipient_huddle(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)

self._verify_display_recipient(
Expand Down Expand Up @@ -593,6 +646,8 @@ def test_display_recipient_various_types(self) -> None:
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)

self._verify_display_recipient(
Expand Down
4 changes: 3 additions & 1 deletion zerver/tests/test_message_edit.py
Expand Up @@ -55,7 +55,7 @@ def check_topic(self, msg_id: int, topic_name: str) -> None:

def check_message(self, msg_id: int, topic_name: str, content: str) -> None:
# Make sure we saved the message correctly to the DB.
msg = Message.objects.get(id=msg_id)
msg = Message.objects.select_related("realm").get(id=msg_id)
self.assertEqual(msg.topic_name(), topic_name)
self.assertEqual(msg.content, content)

Expand All @@ -75,6 +75,8 @@ def check_message(self, msg_id: int, topic_name: str, content: str) -> None:
apply_markdown=False,
client_gravatar=False,
allow_edit_history=True,
user_profile=None,
realm=msg.realm,
)

self.assert_length(queries, 1)
Expand Down

0 comments on commit 1985685

Please sign in to comment.