/
text-frontend.inc.vcl.erb
216 lines (181 loc) · 8.16 KB
/
text-frontend.inc.vcl.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// Varnish VCL include file for text frontends
include "text-common.inc.vcl";
include "zero.inc.vcl";
include "geoip.inc.vcl";
// Note that analytics.inc.vcl will set an X-Analytics value of proxy=IORG
// without inspecting whether there's an existing proxy=<proxy> key-
// value pair inside X-Analytics. We do this because if the traffic
// had come from a known proxy (e.g., Opera or Nokia), that would
// imply that Internet.org was not the rightmost endpoint. In time
// we will need to add the notion of proxy chaining to record whether
// something came through both a known proxy and had Via: Internet.org
// with a corresponding unknown rightmost endpoint (the rightmost
// Internet.org endpoint with an unpredictable Internet-facing IP
// address) in X-Forwarded-For, even if it's the only value, as in
// the example of traffic sourced directly by satellite or something.
sub mobile_redirect {
if (!req.http.X-Subdomain && (req.request == "GET" || req.request == "HEAD")
&& (req.http.User-Agent ~ "(?i)(mobi|240x240|240x320|320x320|alcatel|android|audiovox|bada|benq|blackberry|cdm-|compal-|docomo|ericsson|hiptop|htc[-_]|huawei|ipod|kddi-|kindle|meego|midp|mitsu|mmp\/|mot-|motor|ngm_|nintendo|opera.m|palm|panasonic|philips|phone|playstation|portalmmm|sagem-|samsung|sanyo|sec-|semc-browser|sendo|sharp|silk|softbank|symbian|teleca|up.browser|vodafone|webos)"
|| req.http.User-Agent ~ "^(?i)(lge?|sie|nec|sgh|pg)-" || req.http.Accept ~ "vnd.wap.wml")
&& req.http.Cookie !~ "(stopMobileRedirect=true|mf_useformat=desktop)"
&& (
req.url ~ "^/(wiki|(gan|ike|iu|kk|ku|shi|sr|tg|uz|zh)(-[a-z]+)?)[/\?]"
|| req.url ~ "^/(w/index\.php)?\?title=[^&]*$"
)) {
// Separate regexps for clarity, but multiple regsubs instead of
// "if host ~"/regsub matches for efficiency. Be careful to not
// write overlapping/chaining regexps.
set req.http.MobileHost = req.http.Host;
set req.http.MobileHost = regsub(req.http.MobileHost, "^(www\.)?(mediawiki|wikimediafoundation|wikisource|wikidata)\.", "m.\2.");
set req.http.MobileHost = regsub(req.http.MobileHost, "^(commons|incubator|legalteam|meta|office|outreach|pl|species|strategy|wikimania201[2-5])\.wikimedia\.", "\1.m.wikimedia.");
set req.http.MobileHost = regsub(req.http.MobileHost, "^((?!commons|meta|nostalgia|quote|quality|sep11|sources|species|textbook|m\b)\w+)\.(wikipedia|wiktionary|wikinews|wikisource|wikiquote|wikibooks|wikiversity|wikivoyage)\.", "\1.m.\2.");
if (req.http.Host != req.http.MobileHost) {
if (req.http.X-Forwarded-Proto) {
set req.http.Location = req.http.X-Forwarded-Proto + "://" + req.http.MobileHost + req.url;
} else {
set req.http.Location = "http://" + req.http.MobileHost + req.url;
}
error 666 "Found";
}
unset req.http.MobileHost;
}
}
sub cluster_fe_recv_pre_purge {
// Forged UAs on zerodot. This largely handles lazywebtools below, incidentally.
if (req.http.host ~ "zero\.wikipedia\.org" && req.http.User-Agent && req.http.User-Agent ~ "Facebookbot|Googlebot") {
error 403 "Noise";
}
if (req.http.referer && req.http.referer ~ "^http://(www\.(keeprefreshing|refreshthis|refresh-page|urlreload)\.com|tuneshub\.blogspot\.com|itunes24x7\.blogspot\.com|autoreload\.net|www\.lazywebtools\.co\.uk)/") {
error 403 "Noise";
}
if (req.request == "POST" && req.url ~ "index\.php\?option=com_jce&task=plugin&plugin=imgmanager&file=imgmanager&method=form&cid=") {
error 403 "Noise";
}
// FIXME: we're seeing an issue with Range requests and gzip/gunzip.
// Disable Range requests for now.
unset req.http.Range;
if (req.url == "/geoiplookup" || req.http.host == "geoiplookup.wikimedia.org") {
error 668 "geoiplookup";
}
if (req.restarts == 0) {
// Always set or clear X-Subdomain and X-Orig-Cookie
unset req.http.X-Orig-Cookie;
unset req.http.X-Subdomain;
unset req.http.x-dt-host; // desktop host, if mobile hostname on request
if (req.http.host ~ "^([a-zA-Z0-9-]+\.)?zero\." && req.http.host != "zero.wikimedia.org") {
set req.http.X-Subdomain = "ZERO";
} else if (req.http.host ~ "^([a-zA-Z0-9-]+\.)?m\.") {
set req.http.X-Subdomain = "M";
}
// mobile-subdomains-only for tag-carrier and Host-rewrite
if (req.http.X-Subdomain) {
// Only do tag_carrier logic on first start, and only for (m|zero).wp
if (req.http.host ~ "^([a-zA-Z0-9-]+\.)?(m|zero)\.wikipedia\.") {
call tag_carrier;
}
// Rewrite mobile hostnames to desktop hostnames as x-dt-host
if (req.http.host == "m.mediawiki.org") {
set req.http.x-dt-host = "www.mediawiki.org";
} else if (req.http.host == "m.wikimediafoundation.org") {
set req.http.x-dt-host = "wikimediafoundation.org";
} else if (req.http.host == "m.wikisource.org") {
set req.http.x-dt-host = "wikisource.org";
} else if (req.http.host == "m.wikidata.org") {
set req.http.x-dt-host = "www.wikidata.org";
} else {
// Replace <language>.(m|zero).<project>.org by <language>.<project>.org
set req.http.x-dt-host = regsub(req.http.host, "^([a-zA-Z0-9-]+)\.(m|zero)\.", "\1.");
}
if (req.url ~ "^/api/rest_v1/") {
// for Restbase, there is no difference in desktop-vs-mobile hostnames,
// so rewrite mobile hostnames to desktop hostnames for singular caching
// (this affects the Host: header, and also the url rewrite for restbase
// elsewhere that uses req.http.host)
set req.http.host = req.http.x-dt-host;
}
}
}
// Normalize paths before purging
call text_normalize_path;
}
sub cluster_fe_recv {
// BITS: legacy bits.wm.o domain support
if (req.http.host == "<%= @vcl_config.fetch('bits_domain') %>") {
if (req.url ~ "^/event\.gif") {
error 204;
}
return (lookup);
}
call mobile_redirect;
# normalize all /static to the same hostname for caching
if (req.url ~ "^/static/") { set req.http.host = "<%= @vcl_config.fetch("static_host") %>"; }
# normalize all /w/static.php to the same wiki host for caching
# ignore urls without hash query as those are affected by multiversion
if (req.url ~ "^/w/(skins|resources|extensions)/.+\?[a-fA-F0-9]+$" ) {
set req.http.host = "<%= @vcl_config.fetch("static_host") %>";
}
// Users that just logged out, should not get a 304 for their
// (locally cached) logged in pages.
if (req.http.If-Modified-Since && req.http.Cookie ~ "LoggedOut") {
unset req.http.If-Modified-Since;
}
call text_common_recv;
}
sub cluster_fe_hash {
call text_common_hash;
}
sub cluster_fe_hit { }
sub cluster_fe_miss {
call text_common_misspass_restore_cookie;
}
sub cluster_fe_pass {
call text_common_misspass_restore_cookie;
}
sub cluster_fe_backend_response {
call text_common_backend_response;
return (deliver);
}
sub cluster_fe_deliver {
call zero_deliver;
// Strip s-maxage Cache-Control of wiki pages. The s-maxage still applies to Varnish (sent
// by MediaWiki $wgUseSquid, sends purges internally). But pages musn't be cached elsewhere.
// NOTE: Language variants URLs are not currently covered by these regexps.
// Instead of writing regexps for every edge-case, we should impose some order and coherence
// on our URL routing schemes.
// NOTE: Only apply to pages. Don't steal cachability of api.php, load.php, etc. (T102898, T113007)
if (req.url ~ "^/wiki/" || req.url ~ "^/w/index\.php" || req.url ~ "^/\?title=") {
// ...but exempt CentralNotice banner special pages
if (req.url !~ "^/(wiki/|(w/index\.php)?\?title=)Special:Banner") {
set resp.http.Cache-Control = "private, s-maxage=0, max-age=0, must-revalidate";
}
}
// Perform GeoIP look-up and send the result as a session cookie
if (req.http.X-Orig-Cookie !~ "(^|;\s*)GeoIP=[^;]"
&& req.http.Cookie !~ "(^|;\s*)GeoIP=[^;]") {
call geoip_cookie;
}
}
sub cluster_fe_err_synth {
// BITS: legacy bits.wm.o domain support
if (req.http.host == "<%= @vcl_config.fetch('bits_domain') %>") {
if (obj.status == 204) {
set obj.http.Connection = "keep-alive";
return (deliver);
}
}
// Support geoiplookup
if (obj.status == 668) {
call geoip_lookup;
set obj.status = 200;
set obj.http.Connection = "keep-alive";
return (deliver);
}
// Support mobile redirects
if (obj.status == 666) {
set obj.http.Location = req.http.Location;
set obj.status = 302;
set obj.http.Connection = "keep-alive";
set obj.http.Content-Length = "0"; // BZ #62245
return (deliver);
}
}