Skip to content

Commit 2f6c5a8

Browse files
committed
CVE-2023-22735: Provide the Content-Disposition header from S3.
The Content-Type of user-provided uploads was provided by the browser at initial upload time, and stored in S3; however, 04cf68b switched to determining the Content-Disposition merely from the filename. This makes uploads vulnerable to a stored XSS, wherein a file uploaded with a content-type of `text/html` and an extension of `.png` would be served to browsers as `Content-Disposition: inline`, which is unsafe. The `Content-Security-Policy` headers in the previous commit mitigate this, but only for browsers which support them. Revert parts of 04cf68b, specifically by allowing S3 to provide the Content-Disposition header, and using the `ResponseContentDisposition` argument when necessary to override it to `attachment`. Because we expect S3 responses to vary based on this argument, we include it in the cache key; since the query parameter has dashes in it, we can't use use the helper `$arg_` variables, and must parse it from the query parameters manually. Adding the disposition may decrease the cache hit rate somewhat, but downloads are infrequent enough that it is unlikely to have a noticeable effect. We take care to not adjust the cache key for requests which do not specify the disposition.
1 parent 36e97f8 commit 2f6c5a8

File tree

4 files changed

+55
-29
lines changed

4 files changed

+55
-29
lines changed

Diff for: puppet/zulip/files/nginx/zulip-include-frontend/uploads-internal.conf

+10-8
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
1414

1515
# Ensure that we only get _one_ of these headers: the one that
1616
# Django added, not the one from S3.
17-
proxy_hide_header Content-Disposition;
1817
proxy_hide_header Cache-Control;
1918
proxy_hide_header Expires;
2019
proxy_hide_header Set-Cookie;
21-
# We are _leaving_ S3 to provide Content-Type and Accept-Ranges
22-
# headers, which are the two remaining headers which nginx would
23-
# also pass through from the first response. Django explicitly
24-
# unsets the former, and does not set the latter.
20+
# We are _leaving_ S3 to provide Content-Type,
21+
# Content-Disposition, and Accept-Ranges headers, which are the
22+
# three remaining headers which nginx would also pass through from
23+
# the first response. Django explicitly unsets the first, and
24+
# does not set the latter two.
2525

2626
# nginx does its own DNS resolution, which is necessary here to
2727
# resolve the IP of the S3 server. Point it at the local caching
@@ -38,9 +38,11 @@ location ~ ^/internal/s3/(?<s3_hostname>[^/]+)/(?<s3_path>.*) {
3838
# `s3_disk_cache_size` and read frequency, set via
3939
# `s3_cache_inactive_time`.
4040
proxy_cache_valid 200 1y;
41-
# Don't include query parameters in the cache key, since those
42-
# include a time-based auth token
43-
proxy_cache_key $download_url;
41+
42+
# We only include the requested content-disposition in the cache
43+
# key, so that we cache "Content-Disposition: attachment"
44+
# separately from the inline version.
45+
proxy_cache_key $download_url$s3_disposition_cache_key;
4446
}
4547

4648
# Internal file-serving

Diff for: puppet/zulip/templates/nginx/s3-cache.template.erb

+13
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,16 @@ proxy_cache_path /srv/zulip-uploaded-files-cache
44
keys_zone=uploads:<%= @s3_memory_cache_size %>
55
inactive=<%= @s3_cache_inactive_time %>
66
max_size=<%= @s3_disk_cache_size %>;
7+
8+
# This is used when proxying requests to S3; we wish to know if the
9+
# proxied request is asking to override the Content-Disposition in its
10+
# response, so we can adjust our cache key. Unfortunately, $arg_foo
11+
# style variables pre-parsed from query parameters don't work with
12+
# query parameters with dashes, so we parse it out by hand. Despite
13+
# needing to be declared at the 'http' level,. nginx applies maps like
14+
# this lazily, so this only affects internal S3 proxied requests.
15+
16+
map $args $s3_disposition_cache_key {
17+
default "";
18+
"~(^|&)(?<param>response-content-disposition=[^&]+)" "?$param";
19+
}

Diff for: zerver/lib/upload/s3.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -91,21 +91,24 @@ def upload_image_to_s3(
9191
)
9292

9393

94-
def get_signed_upload_url(path: str) -> str:
94+
def get_signed_upload_url(path: str, force_download: bool = False) -> str:
9595
client = boto3.client(
9696
"s3",
9797
aws_access_key_id=settings.S3_KEY,
9898
aws_secret_access_key=settings.S3_SECRET_KEY,
9999
region_name=settings.S3_REGION,
100100
endpoint_url=settings.S3_ENDPOINT_URL,
101101
)
102+
params = {
103+
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
104+
"Key": path,
105+
}
106+
if force_download:
107+
params["ResponseContentDisposition"] = "attachment"
102108

103109
return client.generate_presigned_url(
104110
ClientMethod="get_object",
105-
Params={
106-
"Bucket": settings.S3_AUTH_UPLOADS_BUCKET,
107-
"Key": path,
108-
},
111+
Params=params,
109112
ExpiresIn=SIGNED_UPLOAD_URL_DURATION,
110113
HttpMethod="GET",
111114
)

Diff for: zerver/views/upload.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ def internal_nginx_redirect(internal_path: str) -> HttpResponse:
8888
return response
8989

9090

91-
def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponse:
92-
url = get_signed_upload_url(path_id)
91+
def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -> HttpResponse:
92+
url = get_signed_upload_url(path_id, force_download=force_download)
9393
assert url.startswith("https://")
9494

9595
if settings.DEVELOPMENT:
@@ -107,18 +107,30 @@ def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> Http
107107
assert parsed_url.query is not None
108108
escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query
109109
response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts)
110-
patch_disposition_header(response, path_id, download)
110+
111+
# It is important that S3 generate both the Content-Type and
112+
# Content-Disposition headers; when the file was uploaded, we
113+
# stored the browser-provided value for the former, and set
114+
# Content-Disposition according to if that was safe. As such,
115+
# only S3 knows if a given attachment is safe to inline; we only
116+
# override Content-Disposition to "attachment", and do so by
117+
# telling S3 that is what we want in the signed URL.
111118
patch_cache_control(response, private=True, immutable=True)
112119
return response
113120

114121

115-
def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponseBase:
122+
def serve_local(
123+
request: HttpRequest, path_id: str, force_download: bool = False
124+
) -> HttpResponseBase:
116125
assert settings.LOCAL_FILES_DIR is not None
117126
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
118127
assert_is_local_storage_path("files", local_path)
119128
if not os.path.isfile(local_path):
120129
return HttpResponseNotFound("<p>File not found</p>")
121130

131+
mimetype, encoding = guess_type(path_id)
132+
download = force_download or mimetype not in INLINE_MIME_TYPES
133+
122134
if settings.DEVELOPMENT:
123135
# In development, we do not have the nginx server to offload
124136
# the response to; serve it directly ourselves.
@@ -138,7 +150,9 @@ def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> H
138150
def serve_file_download_backend(
139151
request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str
140152
) -> HttpResponseBase:
141-
return serve_file(request, user_profile, realm_id_str, filename, url_only=False, download=True)
153+
return serve_file(
154+
request, user_profile, realm_id_str, filename, url_only=False, force_download=True
155+
)
142156

143157

144158
def serve_file_backend(
@@ -167,7 +181,7 @@ def serve_file(
167181
realm_id_str: str,
168182
filename: str,
169183
url_only: bool = False,
170-
download: bool = False,
184+
force_download: bool = False,
171185
) -> HttpResponseBase:
172186
path_id = f"{realm_id_str}/{filename}"
173187
realm = get_valid_realm_from_request(request)
@@ -181,13 +195,10 @@ def serve_file(
181195
url = generate_unauthed_file_access_url(path_id)
182196
return json_success(request, data=dict(url=url))
183197

184-
mimetype, encoding = guess_type(path_id)
185-
download = download or mimetype not in INLINE_MIME_TYPES
186-
187198
if settings.LOCAL_UPLOADS_DIR is not None:
188-
return serve_local(request, path_id, download=download)
199+
return serve_local(request, path_id, force_download=force_download)
189200
else:
190-
return serve_s3(request, path_id, download=download)
201+
return serve_s3(request, path_id, force_download=force_download)
191202

192203

193204
USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_"
@@ -221,13 +232,10 @@ def serve_file_unauthed_from_token(
221232
if path_id.split("/")[-1] != filename:
222233
raise JsonableError(_("Invalid filename"))
223234

224-
mimetype, encoding = guess_type(path_id)
225-
download = mimetype not in INLINE_MIME_TYPES
226-
227235
if settings.LOCAL_UPLOADS_DIR is not None:
228-
return serve_local(request, path_id, download=download)
236+
return serve_local(request, path_id)
229237
else:
230-
return serve_s3(request, path_id, download=download)
238+
return serve_s3(request, path_id)
231239

232240

233241
def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase:

0 commit comments

Comments
 (0)