forked from openedx/edx-platform
/
middleware.py
206 lines (174 loc) · 7.7 KB
/
middleware.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
"""
This is a middleware layer which keeps a log of all requests made
to the server. It is responsible for removing security tokens and
similar from such events, and relaying them to the event tracking
framework.
"""
import hashlib
import hmac
import json
import logging
import re
import sys
from django.conf import settings
from track import views
from track import contexts
from eventtracking import tracker
log = logging.getLogger(__name__)
CONTEXT_NAME = 'edx.request'
META_KEY_TO_CONTEXT_KEY = {
'REMOTE_ADDR': 'ip',
'SERVER_NAME': 'host',
'HTTP_USER_AGENT': 'agent',
'PATH_INFO': 'path',
# Not a typo. See:
# http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer
'HTTP_REFERER': 'referer',
'HTTP_ACCEPT_LANGUAGE': 'accept_language',
}
class TrackMiddleware(object):
"""
Tracks all requests made, as well as setting up context for other server
emitted events.
"""
def process_request(self, request):
try:
self.enter_request_context(request)
if not self.should_process_request(request):
return
# Removes passwords from the tracking logs
# WARNING: This list needs to be changed whenever we change
# password handling functionality.
#
# As of the time of this comment, only 'password' is used
# The rest are there for future extension.
#
# Passwords should never be sent as GET requests, but
# this can happen due to older browser bugs. We censor
# this too.
#
# We should manually confirm no passwords make it into log
# files when we change this.
censored_strings = ['password', 'newpassword', 'new_password',
'oldpassword', 'old_password', 'new_password1', 'new_password2']
post_dict = dict(request.POST)
get_dict = dict(request.GET)
for string in censored_strings:
if string in post_dict:
post_dict[string] = '*' * 8
if string in get_dict:
get_dict[string] = '*' * 8
event = {
'GET': dict(get_dict),
'POST': dict(post_dict),
}
# TODO: Confirm no large file uploads
event = json.dumps(event)
event = event[:512]
views.server_track(request, request.META['PATH_INFO'], event)
except:
## Why do we have the overly broad except?
##
## I added instrumentation so if we drop events on the
## floor, we at least know about it. However, we really
## should just return a 500 here: (1) This will translate
## to much more insidious user-facing bugs if we make any
## decisions based on incorrect data. (2) If the system
## is down, we should fail and fix it.
event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])}
try:
views.server_track(request, request.META['PATH_INFO'], event)
except:
# At this point, things are really broken. We really
# should fail return a 500 to the user here. However,
# the interim decision is to just fail in order to be
# consistent with current policy, and expedite the PR.
# This version of the code makes no compromises
# relative to the code before, while a proper failure
# here would involve shifting compromises and
# discussion.
pass
def should_process_request(self, request):
"""Don't track requests to the specified URL patterns"""
path = request.META['PATH_INFO']
ignored_url_patterns = getattr(settings, 'TRACKING_IGNORE_URL_PATTERNS', [])
for pattern in ignored_url_patterns:
# Note we are explicitly relying on python's internal caching of
# compiled regular expressions here.
if re.match(pattern, path):
return False
return True
def enter_request_context(self, request):
"""
Extract information from the request and add it to the tracking
context.
The following fields are injected into the context:
* session - The Django session key that identifies the user's session.
* user_id - The numeric ID for the logged in user.
* username - The username of the logged in user.
* ip - The IP address of the client.
* host - The "SERVER_NAME" header, which should be the name of the server running this code.
* agent - The client browser identification string.
* path - The path part of the requested URL.
* client_id - The unique key used by Google Analytics to identify a user
"""
context = {
'session': self.get_session_key(request),
'user_id': self.get_user_primary_key(request),
'username': self.get_username(request),
}
for header_name, context_key in META_KEY_TO_CONTEXT_KEY.iteritems():
context[context_key] = request.META.get(header_name, '')
# Google Analytics uses the clientId to keep track of unique visitors. A GA cookie looks like
# this: _ga=GA1.2.1033501218.1368477899. The clientId is this part: 1033501218.1368477899.
google_analytics_cookie = request.COOKIES.get('_ga')
if google_analytics_cookie is None:
context['client_id'] = None
else:
context['client_id'] = '.'.join(google_analytics_cookie.split('.')[2:])
context.update(contexts.course_context_from_url(request.build_absolute_uri()))
tracker.get_tracker().enter_context(
CONTEXT_NAME,
context
)
def get_session_key(self, request):
""" Gets and encrypts the Django session key from the request or an empty string if it isn't found."""
try:
return self.encrypt_session_key(request.session.session_key)
except AttributeError:
return ''
def encrypt_session_key(self, session_key):
"""Encrypts a Django session key to another 32-character hex value."""
if not session_key:
return ''
# Follow the model of django.utils.crypto.salted_hmac() and
# django.contrib.sessions.backends.base._hash() but use MD5
# instead of SHA1 so that the result has the same length (32)
# as the original session_key.
# TODO: Switch to SHA224, which is secure.
# If necessary, drop the last little bit of the hash to make it the same length.
# Using a known-insecure hash to shorten is silly.
# Also, why do we need same length?
key_salt = "common.djangoapps.track" + self.__class__.__name__
key = hashlib.md5(key_salt + settings.SECRET_KEY).digest()
encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest()
return encrypted_session_key
def get_user_primary_key(self, request):
"""Gets the primary key of the logged in Django user"""
try:
return request.user.pk
except AttributeError:
return ''
def get_username(self, request):
"""Gets the username of the logged in Django user"""
try:
return request.user.username
except AttributeError:
return ''
def process_response(self, _request, response):
"""Exit the context if it exists."""
try:
tracker.get_tracker().exit_context(CONTEXT_NAME)
except Exception: # pylint: disable=broad-except
pass
return response