-
Notifications
You must be signed in to change notification settings - Fork 521
/
index.js
363 lines (319 loc) · 15.4 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
const modulename = 'HealthMonitor';
import got from 'got'; //we need internal requests to have 127.0.0.1 src
import { convars } from '@core/globalData';
import getHostStats from './getHostStats';
import consoleFactory from '@extras/console';
const console = consoleFactory(modulename);
//Helper functions
const now = () => { return Math.round(Date.now() / 1000); };
const isUndefined = (x) => { return (typeof x === 'undefined'); };
export default class HealthMonitor {
constructor(config) {
this.config = config;
//Checking config validity
if (this.config.cooldown < 15) throw new Error('The monitor.cooldown setting must be 15 seconds or higher.');
if (this.config.resourceStartingTolerance < 30) throw new Error('The monitor.resourceStartingTolerance setting must be 30 seconds or higher.');
//Hardcoded Configs
//NOTE: done mainly because the timeout/limit was never useful, and makes things more complicated
this.hardConfigs = {
timeout: 1500,
//HTTP GET /dynamic.json from txAdmin to sv_main.lua
healthCheck: {
failThreshold: 15,
failLimit: 300,
},
//HTTP POST /intercom/monitor from sv_main.lua to txAdmin
heartBeat: {
failThreshold: 15,
failLimit: 60,
resStartedCooldown: 45, //wait for HB up to 45 seconds after last resource started
},
};
//Setting up
this.hostStats = null;
this.resetMonitorStats();
//Cron functions
setInterval(() => {
this.sendHealthCheck();
this.refreshServerStatus();
}, 1000);
//NOTE: if ever changing this, need to make sure the other data
//in the status event will be pushed, since right some of now it
//relies on this event every 5 seconds
setInterval(async () => {
this.hostStats = await getHostStats();
globals.webServer?.webSocket.pushRefresh('status');
}, 5000);
}
/**
* Refresh Monitor configurations
*/
refreshConfig() {
this.config = globals.configVault.getScoped('monitor');
}//Final refreshConfig()
/**
* Restart the FXServer and logs everything
* @param {string} reasonInternal
* @param {string} reasonTranslated
*/
async restartFXServer(reasonInternal, reasonTranslated) {
//sanity check
if (globals.fxRunner.fxChild === null) {
console.warn('Server not started, no need to restart');
return false;
}
//Restart server
this.isAwaitingRestart = true;
const logMessage = `Restarting server (${reasonInternal}).`;
globals.logger.admin.write('MONITOR', logMessage);
globals.fxRunner.restartServer(reasonTranslated, null);
}
//================================================================
setCurrentStatus(newStatus) {
if(newStatus !== this.currentStatus){
this.currentStatus = newStatus;
globals.discordBot.updateStatus().catch((e) => {});
globals.webServer?.webSocket.pushRefresh('status');
}
}
//================================================================
resetMonitorStats() {
this.setCurrentStatus('OFFLINE'); // options: OFFLINE, ONLINE, PARTIAL
this.lastRefreshStatus = null; //to prevent DDoS crash false positive
this.lastSuccessfulHealthCheck = null; //to see if its above limit
this.lastStatusWarningMessage = null; //to prevent spamming
this.lastHealthCheckErrorMessage = null; //to print warning
this.healthCheckRestartWarningIssued = false; //to prevent spamming
this.isAwaitingRestart = false; //to prevent spamming while the server restarts (5s)
//to track http vs fd3
//to see if its above limit
this.lastSuccessfulFD3HeartBeat = null;
this.lastSuccessfulHTTPHeartBeat = null;
//to collect statistics
this.hasServerStartedYet = false;
}
//================================================================
async sendHealthCheck() {
//Check if the server is supposed to be offline
if (globals.fxRunner.fxChild === null || globals.fxRunner.fxServerHost === null) return;
//Make request
let dynamicResp;
const requestOptions = {
url: `http://${globals.fxRunner.fxServerHost}/dynamic.json`,
maxRedirects: 0,
timeout: {
request: this.hardConfigs.timeout
},
retry: {
limit: 0
},
};
try {
const data = await got.get(requestOptions).json();
if (typeof data !== 'object') throw new Error("FXServer's dynamic endpoint didn't return a JSON object.");
if (isUndefined(data.hostname) || isUndefined(data.clients)) throw new Error("FXServer's dynamic endpoint didn't return complete data.");
dynamicResp = data;
} catch (error) {
this.lastHealthCheckErrorMessage = error.message;
return;
}
//Checking for the maxClients
if (dynamicResp && dynamicResp.sv_maxclients !== undefined) {
const maxClients = parseInt(dynamicResp.sv_maxclients);
if (!isNaN(maxClients)) {
globals.persistentCache.set('fxsRuntime:maxClients', maxClients);
if (convars.deployerDefaults?.maxClients && maxClients > convars.deployerDefaults.maxClients) {
globals.fxRunner.srvCmd(`sv_maxclients ${convars.deployerDefaults.maxClients} ##ZAP-Hosting: please don't modify`);
console.error(`ZAP-Hosting: Detected that the server has sv_maxclients above the limit (${convars.deployerDefaults.maxClients}). Changing back to the limit.`);
globals.logger.admin.write('SYSTEM', `changing sv_maxclients back to ${convars.deployerDefaults.maxClients}`);
}
}
}
//Set variables
this.healthCheckRestartWarningIssued = false;
this.lastHealthCheckErrorMessage = false;
this.lastSuccessfulHealthCheck = now();
}
//================================================================
/**
* Refreshes the Server Status and calls for a restart if neccessary.
* - HealthCheck: performing an GET to the /dynamic.json file
* - HeartBeat: receiving an intercom POST or FD3 txAdminHeartBeat event
*/
refreshServerStatus() {
//Check if the server is supposed to be offline
if (globals.fxRunner.fxChild === null) return this.resetMonitorStats();
//Ignore check while server is restarting
if (this.isAwaitingRestart) return;
//Helper func
const cleanET = (et) => { return (et > 99999) ? '--' : et; };
//Check if process was frozen
const currTimestamp = now();
const elapsedRefreshStatus = currTimestamp - this.lastRefreshStatus;
if (this.lastRefreshStatus !== null && elapsedRefreshStatus > 10) {
console.error(`FXServer was frozen for ${elapsedRefreshStatus - 1} seconds for unknown reason (random issue, VPS Lag, DDoS, etc).`);
console.error('Don\'t worry, txAdmin is preventing the server from being restarted.');
this.lastRefreshStatus = currTimestamp;
return;
}
this.lastRefreshStatus = currTimestamp;
//Get elapsed times & process status
const elapsedHealthCheck = currTimestamp - this.lastSuccessfulHealthCheck;
const healthCheckFailed = (elapsedHealthCheck > this.hardConfigs.healthCheck.failThreshold);
const anySuccessfulHeartBeat = (this.lastSuccessfulFD3HeartBeat !== null || this.lastSuccessfulHTTPHeartBeat !== null);
const elapsedHeartBeat = currTimestamp - Math.max(this.lastSuccessfulFD3HeartBeat, this.lastSuccessfulHTTPHeartBeat);
const heartBeatFailed = (elapsedHeartBeat > this.hardConfigs.heartBeat.failThreshold);
const processUptime = globals.fxRunner.getUptime();
//Check if its online and return
if (
this.lastSuccessfulHealthCheck
&& !healthCheckFailed
&& anySuccessfulHeartBeat
&& !heartBeatFailed
) {
this.setCurrentStatus('ONLINE');
if (this.hasServerStartedYet == false) {
this.hasServerStartedYet = true;
globals.statisticsManager.registerFxserverBoot(processUptime);
}
return;
}
//Now to the (un)fun part: if the status != healthy
this.setCurrentStatus((healthCheckFailed && heartBeatFailed) ? 'OFFLINE' : 'PARTIAL');
const timesPrefix = `(HB:${cleanET(elapsedHeartBeat)}|HC:${cleanET(elapsedHealthCheck)})`;
const elapsedLastWarning = currTimestamp - this.lastStatusWarningMessage;
//Check if still in cooldown
if (processUptime < this.config.cooldown) {
if (console.isVerbose && processUptime > 10 && elapsedLastWarning > 10) {
console.warn(`${timesPrefix} FXServer is not responding. Still in cooldown of ${this.config.cooldown}s.`);
this.lastStatusWarningMessage = currTimestamp;
}
return;
}
//Check if fxChild is closed, in this case no need to wait the failure count
const processStatus = globals.fxRunner.getStatus();
if (processStatus == 'closed') {
globals.statisticsManager.registerFxserverRestart('close');
this.restartFXServer(
'server close detected',
globals.translator.t('restarter.crash_detected'),
);
return;
}
//Log failure message
if (elapsedLastWarning >= 15) {
const msg = (healthCheckFailed)
? `${timesPrefix} FXServer is not responding. (${this.lastHealthCheckErrorMessage})`
: `${timesPrefix} FXServer is not responding. (HB Failed)`;
this.lastStatusWarningMessage = currTimestamp;
console.warn(msg);
}
//If http partial crash, warn 1 minute before
if (
!(elapsedHeartBeat > this.hardConfigs.heartBeat.failLimit)
&& !this.healthCheckRestartWarningIssued
&& elapsedHealthCheck > (this.hardConfigs.healthCheck.failLimit - 60)
) {
globals.discordBot.sendAnnouncement(globals.translator.t(
'restarter.partial_hang_warn_discord',
{ servername: globals.config.serverName },
));
// Dispatch `txAdmin:events:announcement`
const _cmdOk = globals.fxRunner.sendEvent('announcement', {
author: 'txAdmin',
message: globals.translator.t('restarter.partial_hang_warn'),
});
this.healthCheckRestartWarningIssued = currTimestamp;
}
//Give a bit more time to the very very slow servers to come up
//They usually start replying to healthchecks way before sending heartbeats
//Only logWarn/skip if there is a resource start pending
const starting = globals.resourcesManager.tmpGetPendingStart();
if (
anySuccessfulHeartBeat === false
&& starting.startingElapsedSecs !== null
&& starting.startingElapsedSecs < this.config.resourceStartingTolerance
) {
if (processUptime % 15 == 0) {
console.warn(`Still waiting for the first HeartBeat. Process started ${processUptime}s ago.`);
console.warn(`The server is currently starting ${starting.startingResName} (${starting.startingElapsedSecs}s ago).`);
}
return;
}
//Maybe it just finished loading the resources, but no HeartBeat yet
if (
anySuccessfulHeartBeat === false
&& starting.lastStartElapsedSecs !== null
&& starting.lastStartElapsedSecs < this.hardConfigs.heartBeat.resStartedCooldown
) {
if (processUptime % 15 == 0) {
console.warn(`Still waiting for the first HeartBeat. Process started ${processUptime}s ago.`);
console.warn(`No resource start pending, last resource started ${starting.lastStartElapsedSecs}s ago.`);
}
return;
}
//Check if already over the limit
if (
elapsedHealthCheck > this.hardConfigs.healthCheck.failLimit
|| elapsedHeartBeat > this.hardConfigs.heartBeat.failLimit
) {
if (anySuccessfulHeartBeat === false) {
if (starting.startingElapsedSecs !== null) {
//Resource didn't finish starting (if res boot still active)
this.restartFXServer(
`resource "${starting.startingResName}" failed to start within the ${this.config.resourceStartingTolerance}s time limit`,
globals.translator.t('restarter.start_timeout'),
);
} else if (starting.lastStartElapsedSecs !== null) {
//Resources started, but no heartbeat whithin limit after that
this.restartFXServer(
`server failed to start within time limit - ${this.hardConfigs.heartBeat.resStartedCooldown}s after last resource started`,
globals.translator.t('restarter.start_timeout'),
);
} else {
//No resource started starting, hb over limit
this.restartFXServer(
`server failed to start within time limit - ${this.hardConfigs.heartBeat.failLimit}s, no onResourceStarting received`,
globals.translator.t('restarter.start_timeout'),
);
}
} else if (elapsedHealthCheck > this.hardConfigs.healthCheck.failLimit) {
//FIXME: se der hang tanto HB quanto HC, ele ainda sim cai nesse caso
globals.statisticsManager.registerFxserverRestart('healthCheck');
this.restartFXServer(
'server partial hang detected',
globals.translator.t('restarter.hang_detected'),
);
} else {
globals.statisticsManager.registerFxserverRestart('heartBeat');
this.restartFXServer(
'server hang detected',
globals.translator.t('restarter.hang_detected'),
);
}
}
}
//================================================================
handleHeartBeat(source, postData) {
const tsNow = now();
if (source === 'fd3') {
if (
this.lastSuccessfulHTTPHeartBeat
&& tsNow - this.lastSuccessfulHTTPHeartBeat > 15
&& tsNow - this.lastSuccessfulFD3HeartBeat < 5
) {
globals.statisticsManager.registerFxserverRestart('http');
}
this.lastSuccessfulFD3HeartBeat = tsNow;
} else if (source === 'http') {
if (
this.lastSuccessfulFD3HeartBeat
&& tsNow - this.lastSuccessfulFD3HeartBeat > 15
&& tsNow - this.lastSuccessfulHTTPHeartBeat < 5
) {
globals.statisticsManager.registerFxserverRestart('fd3');
}
this.lastSuccessfulHTTPHeartBeat = tsNow;
}
}
};