Permalink
Browse files

Agent modifications for metrics

  • Loading branch information...
1 parent 7613ff4 commit 4960973028cc32c7b404f5e7e2dc4e816c160aef Ryan Phillips committed Aug 30, 2012
@@ -0,0 +1,173 @@
+--[[
+Copyright 2012 Rackspace
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS-IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--]]
+local BaseCheck = require('./base').BaseCheck
+local CheckResult = require('./base').CheckResult
+local Metric = require('./base').Metric
+local misc = require('../util/misc')
+local constants = require('../util/constants')
+local logging = require('logging')
+local async = require('async')
+local url = require('url')
+local http = require('http')
+local https = require('https')
+local Error = require('core').Error
+
+local fmt = require('string').format
+
+local ApacheCheck = BaseCheck:extend()
+function ApacheCheck:initialize(params)
+ BaseCheck.initialize(self, 'agent.apache', params)
+
+ self._params = params
+ self._url = params.details.url and params.details.url or 'http://localhost/server-status?auto'
+ self._timeout = params.details.timeout and params.details.timeout or constants.DEFAULT_PLUGIN_TIMEOUT
+
+ -- setup default port
+ local parsed = url.parse(self._url)
+ if not parsed.port then
+ if parsed.protocol == 'http' then
+ parsed.port = 80
+ else
+ parsed.port = 443
+ end
+ end
+
+ self._parsed = parsed
+ self._parsed.path = '/server-status?auto'
+end
+
+-- "_" Waiting for Connection, "S" Starting up, "R" Reading Request,
+-- "W" Sending Reply, "K" Keepalive (read), "D" DNS Lookup,
+-- "C" Closing connection, "L" Logging, "G" Gracefully finishing,
+-- "I" Idle cleanup of worker, "." Open slot with no current process
+function ApacheCheck:_parseScoreboard(board)
+ local t = { waiting = 0, starting = 0, reading = 0, sending = 0,
+ keepalive = 0, dns = 0, closing = 0, logging = 0,
+ gracefully_finishing = 0, idle = 0, open = 0 }
+
+ for c in board:gmatch"." do
+ if c == '_' then t.waiting = t.waiting + 1
+ elseif c == 'S' then t.starting = t.starting + 1
+ elseif c == 'R' then t.reading = t.reading + 1
+ elseif c == 'W' then t.sending = t.sending + 1
+ elseif c == 'K' then t.keepalive = t.keepalive + 1
+ elseif c == 'D' then t.dns = t.dns + 1
+ elseif c == 'C' then t.closing = t.closing + 1
+ elseif c == 'L' then t.logging = t.logging + 1
+ elseif c == 'G' then t.gracefully_finishing = t.gracefully_finishing + 1
+ elseif c == 'I' then t.idle = t.idle + 1
+ elseif c == '.' then t.open = t.open + 1
+ end
+ end
+
+ return t
+end
+
+function ApacheCheck:_parseLine(line, checkResult)
+ local i, j = line:find(":")
+
+ if not i then
+ return Error:new('Invalid Apache Status Page')
+ end
+
+ local f = line:sub(0, i-1)
+ local v = line:sub(i+1, #line)
+
+ f = misc.trim(f:gsub(" ", "_"))
+ v = misc.trim(v)
+
+ local metrics = {
+ ['Total_Accesses'] = {
+ ['type'] = 'gauge'
+ },
+ ['Total_kBytes'] = {
+ ['type'] = 'uint64'
+ },
+ ['Uptime'] = {
+ ['type'] = 'uint64'
+ },
+ ['BytesPerSec'] = {
+ ['type'] = 'uint64'
+ },
+ ['BytesPerReq'] = {
+ ['type'] = 'uint64'
+ },
+ ['BusyWorkers'] = {
+ ['type'] = 'uint64'
+ },
+ ['IdleWorkers'] = {
+ ['type'] = 'uint64'
+ },
+ ['CPULoad'] = {
+ ['type'] = 'double'
+ },
+ ['ReqPerSec'] = {
+ ['type'] = 'double'
+ }
+ }
+
+ if metrics[f] then
+ checkResult:addMetric(f, nil, metrics[f].type, v)
+ end
+
+ if f == 'ReqPerSec' then
+ checkResult:setStatus(fmt('ReqPerSec: %.2f', v))
+ end
+
+ if f == 'Scoreboard' then
+ local t = self:_parseScoreboard(v)
+ for i,x in pairs(t) do
+ checkResult:addMetric(i, nil, 'uint64', x)
+ end
+ end
+
+ return
+end
+
+function ApacheCheck:_parse(data, checkResult)
+ for line in data:gmatch("([^\n]*)\n") do
+ local err = self:_parseLine(line, checkResult)
+ if err then
+ checkResult:setError(err.message)
+ return
+ end
+ end
+end
+
+function ApacheCheck:run(callback)
+ local checkResult = CheckResult:new(self, {})
+ local protocol = self._parsed.protocol == 'http' and http or https
+ local req = protocol.request(self._parsed, function(res)
+ local data = ''
+ res:on('data', function(_data)
+ data = data .. _data
+ end)
+ res:on('end', function()
+ self:_parse(data, checkResult)
+ res:destroy()
+ callback(checkResult)
+ end)
+ res:on('error', function(err)
+ checkResult:setError(err.message)
+ callback(checkResult)
+ end)
+ end)
+ req:done()
+end
+
+local exports = {}
+exports.ApacheCheck = ApacheCheck
+return exports
@@ -1,32 +1,198 @@
+--[[
+Copyright 2012 Rackspace
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS-IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--]]
local BaseCheck = require('./base').BaseCheck
local CheckResult = require('./base').CheckResult
local Metric = require('./base').Metric
+local logging = require('logging')
+local timer = require('timer')
+local math = require('math')
+local table = require('table')
+local async = require('async')
+local sctx = require('../sigar').ctx
local CpuCheck = BaseCheck:extend()
-local DIMENSION_PREFIX = 'cpu.'
+local SAMPLE_RATE = 5000 -- Milliseconds to sample on initial run
+
+local function metricCpuKey(index)
+ return 'cpu' .. index
+end
+
+local function metricPercentKey(sigarMetric)
+ return sigarMetric .. '_percent'
+end
+
+local function metricAverageKey(sigarMetric)
+ return sigarMetric .. '_average'
+end
+
+local SIGAR_METRICS = {
+ 'user',
+ 'sys',
+ 'idle',
+ 'wait',
+ 'irq',
+ 'stolen'
+}
+
+local AGGREGATE_METRICS = {}
+for _, v in pairs(SIGAR_METRICS) do
+ table.insert(AGGREGATE_METRICS, metricPercentKey(v))
+end
function CpuCheck:initialize(params)
BaseCheck.initialize(self, 'agent.cpu', params)
+ -- store the previous cpuinfo so we can aggregate percent differences
+ self._previousCpuinfo = nil
end
-function CpuCheck:run(callback)
- -- Perform Check
- local s = sigar:new()
- local cpuinfo = s:cpus()
+function CpuCheck:_getCpuInfo()
+ local cpuinfo = sctx:cpus()
+ local results = {}
+
+ for i = 1, #cpuinfo do
+ local data = cpuinfo[i]:data()
+
+ -- store sigar metrics
+ results[i] = {}
+ for _, v in pairs(SIGAR_METRICS) do
+ results[i][v] = data[v]
+ end
+ end
+
+ return results
+end
+
+function CpuCheck:_aggregateMetrics(cpuinfo, callback)
+ local diffcpuinfo = {}
+ local percentages = {}
local metrics = {}
- local checkResult = CheckResult:new(self, {})
+ local total = 0
- for i=1, #cpuinfo do
- for key, value in pairs(cpuinfo[i]:data()) do
- local index = i - 1
- checkResult:addMetric(key, DIMENSION_PREFIX .. index, nil, value)
+ -- calculate the delta between two runs
+ for i = 1, #cpuinfo do
+ diffcpuinfo[i] = {}
+ for _, v in pairs(SIGAR_METRICS) do
+ diffcpuinfo[i][v] = cpuinfo[i][v] - self._previousCpuinfo[i][v]
end
end
- -- Return Result
- self._lastResult = checkResult
- callback(checkResult)
+ -- calculate CPU usage percentages across all cpus
+ for i = 1, #cpuinfo do
+ total = diffcpuinfo[i]['user'] + diffcpuinfo[i]['sys'] + diffcpuinfo[i]['idle'] +
+ diffcpuinfo[i]['wait'] + diffcpuinfo[i]['irq'] + diffcpuinfo[i]['stolen']
+
+ percentages[i] = {}
+ percentages[i]['total'] = total
+ for _, v in pairs(SIGAR_METRICS) do
+ local percent, key
+ percent = (diffcpuinfo[i][v] / total) * 100
+ key = metricPercentKey(v)
+ percentages[i][key] = percent
+ cpuinfo[i][key] = percent
+ end
+ end
+
+ -- average all the cpu state percentages across all cpus
+ for _, key in pairs(AGGREGATE_METRICS) do
+ total = 0
+ for i = 1, #cpuinfo do
+ total = total + percentages[i][key]
+ end
+ local average = total / #cpuinfo
+ metrics[metricAverageKey(key)] = average
+ end
+
+ -- calculate CPU usage percentage averages across all CPUs
+ total = 0
+ for i = 1, #cpuinfo do
+ local current_cpu_total = 0
+ for _, v in pairs(AGGREGATE_METRICS) do
+ if v ~= metricPercentKey('idle') then -- discard idle percentage
+ current_cpu_total = current_cpu_total + percentages[i][v]
+ end
+ end
+ total = total + current_cpu_total
+ percentages[i]['current_cpu_usage'] = current_cpu_total
+ end
+ metrics['usage_average'] = total / #cpuinfo
+
+ -- find cpu with minimum and maximum usage usage
+ local cpu_max_index = 0
+ local cpu_min_index = 0
+ local cpu_max_usage = 0
+ local cpu_min_usage = 100
+ for i = 1, #cpuinfo do
+ local usage = percentages[i]['current_cpu_usage']
+ if math.max(usage, cpu_max_usage) == usage then
+ cpu_max_usage = usage
+ cpu_max_index = i - 1
+ end
+ if math.min(usage, cpu_min_usage) == usage then
+ cpu_min_usage = usage
+ cpu_min_index = i - 1
+ end
+ end
+ metrics['max_cpu_usage'] = cpu_max_usage
+ metrics['max_cpu_usage_name'] = metricCpuKey(cpu_max_index)
+ metrics['min_cpu_usage'] = cpu_min_usage
+ metrics['min_cpu_usage_name'] = metricCpuKey(cpu_min_index)
+
+ -- store run for next time
+ self._previousCpuinfo = cpuinfo
+
+ callback(nil, metrics)
+end
+
+function CpuCheck:run(callback)
+ -- Perform Check
+ local checkResult = CheckResult:new(self, {})
+
+ async.waterfall({
+ function(callback)
+ -- check if this is _not_ our first run
+ if self._previousCpuinfo ~= nil then
+ callback(nil, self:_getCpuInfo())
+ return
+ end
+ -- store the cpu info, then spawn a timer to wait
+ self._previousCpuinfo = self:_getCpuInfo()
+ timer.setTimeout(SAMPLE_RATE, function()
+ callback(nil, self:_getCpuInfo())
+ end)
+ end,
+ -- attach cpu average metrics
+ function(cpuinfo, callback)
+ self:_aggregateMetrics(cpuinfo, function(err, metrics)
+ callback(err, cpuinfo, metrics)
+ end)
+ end,
+ -- add metrics to checkResult
+ function(cpuinfo, metrics, callback)
+ -- attach percentages and averages
+ for key, value in pairs(metrics) do
+ checkResult:addMetric(key, nil, nil, value)
+ end
+ callback()
+ end
+ }, function()
+ -- Return Result
+ self._lastResult = checkResult
+ callback(checkResult)
+ end)
end
local exports = {}
Oops, something went wrong.

0 comments on commit 4960973

Please sign in to comment.