Skip to content
This repository has been archived by the owner on Jan 21, 2021. It is now read-only.

Commit

Permalink
Merge pull request #10 from zopefoundation/nagios
Browse files Browse the repository at this point in the history
Nagios monitor
  • Loading branch information
freddrake committed Feb 11, 2015
2 parents 2480cfd + 6f8a44d commit 5dd5889
Show file tree
Hide file tree
Showing 11 changed files with 402 additions and 37 deletions.
1 change: 1 addition & 0 deletions .travis.yml
@@ -1,5 +1,6 @@
language: python
install:
- pip install -U setuptools
- python bootstrap.py
- bin/buildout
script:
Expand Down
25 changes: 18 additions & 7 deletions bootstrap.py
Expand Up @@ -35,7 +35,7 @@
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --find-links to point to local resources, you can keep
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''

Expand All @@ -59,6 +59,8 @@
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--setuptools-version",
help="use a specific setuptools version")


options, args = parser.parse_args()
Expand All @@ -79,16 +81,20 @@

if not options.allow_site_packages:
# ez_setup imports site, which adds site packages
# this will remove them from the path to ensure that incompatible versions
# this will remove them from the path to ensure that incompatible versions
# of setuptools are not in the path
import site
# inside a virtualenv, there is no 'getsitepackages'.
# inside a virtualenv, there is no 'getsitepackages'.
# We can't remove these reliably
if hasattr(site, 'getsitepackages'):
for sitepackage_path in site.getsitepackages():
sys.path[:] = [x for x in sys.path if sitepackage_path not in x]

setup_args = dict(to_dir=tmpeggs, download_delay=0)

if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version

ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
Expand Down Expand Up @@ -128,10 +134,15 @@
_final_parts = '*final-', '*final'

def _final_version(parsed_version):
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True

index = setuptools.package_index.PackageIndex(
search_path=[setuptools_path])
if find_links:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -26,6 +26,7 @@
zkresumelb = zc.resumelb.zk:lbmain
get-worker-resume = zc.resumelb.worker:get_resume_main
get-lb-status = zc.resumelb.zk:get_lb_status
rlb-nagios = zc.resumelb.nagios:main
[paste.server_runner]
main = zc.resumelb.worker:server_runner
Expand Down
2 changes: 2 additions & 0 deletions src/zc/resumelb/README.txt
Expand Up @@ -242,6 +242,8 @@ maintaining ZooKeeper trees.
Change History
==============

- Nagios monitoring plugin

- You can now supply alternative pool implementations.

Thanks to: https://github.com/zopefoundation/zc.resumelb/pull/3
Expand Down
49 changes: 25 additions & 24 deletions src/zc/resumelb/lb.py
Expand Up @@ -108,7 +108,31 @@ def handle_wsgi(self, env, start_response):
finally:
self.pool.put(worker)

class Pool:
class PoolStatus:

def status(self):
return dict(
backlog = self.backlog,
mean_backlog = self.mbacklog,
workers = [
(worker.__name__,
worker.backlog,
worker.mbacklog,
(worker.oldest_time if worker.oldest_time else None),
)
for worker in sorted(
self.workers, key=lambda w: w.__name__)
],
workers_ex = [
(worker.__name__,
worker.write_queue.qsize(),
)
for worker in sorted(
self.workers, key=lambda w: w.__name__)
],
)

class Pool(PoolStatus):

def __init__(self,
unskilled_score=None, variance=None, backlog_history=None,
Expand Down Expand Up @@ -331,29 +355,6 @@ def put(self, worker):
assert worker.backlog >= 0
_decay_backlog(worker, self.worker_decay)

def status(self):
return dict(
backlog = self.backlog,
mean_backlog = self.mbacklog,
workers = [
(worker.__name__,
worker.backlog,
worker.mbacklog,
(int(worker.oldest_time)
if worker.oldest_time else None),
)
for worker in sorted(
self.workers, key=lambda w: w.__name__)
],
workers_ex = [
(worker.__name__,
worker.write_queue.qsize(),
)
for worker in sorted(
self.workers, key=lambda w: w.__name__)
],
)

def _init_backlog(worker):
worker.backlog = getattr(worker, 'backlog', 0)
worker.dbacklog = getattr(worker, 'dbacklog', worker.backlog)
Expand Down
113 changes: 113 additions & 0 deletions src/zc/resumelb/nagios.py
@@ -0,0 +1,113 @@
"""Nagios monitor for resumelb
"""
from __future__ import print_function
import argparse
import gevent.socket
import json
import socket
import sys
import time


parser = argparse.ArgumentParser()

parser.add_argument('socket',
help='Path to a load-balancer status socket')

parser.add_argument('--worker-mean-backlog-warn', '-b', type=int,
help='Mean worker backlog at which we warn.')
parser.add_argument('--worker-mean-backlog-error', '-B', type=int,
help='Mean worker backlog at which we error.')

parser.add_argument('--worker-max-backlog-warn', '-x', type=int,
help='Maximum worker backlog at which we warn.')
parser.add_argument('--worker-max-backlog-error', '-X', type=int,
help='Maximim worker backlog at which we error.')

parser.add_argument('--worker-request-age-warn', '-a', type=int,
help='Maximum request age at which we warn.')
parser.add_argument('--worker-request-age-error', '-A', type=int,
help='Maximim request age at which we error.')

parser.add_argument('--minimum-worker-warn', '-w', type=int,
help='Maximum request age at which we warn.')
parser.add_argument('--minimum-worker-error', '-W', type=int,
help='Maximim request age at which we error.')

parser.add_argument('--metrics', '-m', action="store_true",
help='Output metrics.')


def _check(value, warn, error, format, message, severity, sign=1):
if error is not None and sign*value >= sign*error:
message.append(format % abs(value))
return 2

if warn is not None and sign*value >= sign*warn:
message.append(format % abs(value))
return max(1, severity)

return severity

def main(args=None):
if args is None:
args = sys.argv[1:]

args = parser.parse_args(args)
for o in (args.worker_mean_backlog_warn, args.worker_mean_backlog_error,
args.worker_max_backlog_warn, args.worker_max_backlog_error,
args.minimum_worker_warn, args.minimum_worker_error):
if o is not None:
break
else:
if not args.metrics:
print("You need to request metrics and/or alert settings")
return 3

now = time.time()
status_socket = gevent.socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
status_socket.connect(args.socket)
status_file = status_socket.makefile()
status = status_file.read()
status = json.loads(status)
status_file.close()
status_socket.close()

nworkers = len(status['workers'])
mbacklog = status["mean_backlog"]
max_backlog = max(w[1] for w in status['workers'])
max_age = max((now-w[3] if w[3] else -1) for w in status['workers'])

severity = 0
message = []
severity = _check(
mbacklog, args.worker_mean_backlog_warn, args.worker_mean_backlog_error,
"mean backlog high (%s)", message, severity)
severity = _check(
max_backlog,
args.worker_max_backlog_warn, args.worker_max_backlog_error,
"max backlog high (%s)", message, severity)
severity = _check(
max_age, args.worker_request_age_warn, args.worker_request_age_error,
"max age too high (%.1f)", message, severity)
severity = _check(
nworkers, args.minimum_worker_warn, args.minimum_worker_error,
"too few workers (%s)", message, severity, -1)
if message:
message = ' '.join(message)
else:
message = "OK %s %s %s %s" % (
nworkers, mbacklog, max_backlog,
int(round(max_age)) if max_age >= 0 else '-')

if args.metrics:
message += (
'|workers=%s mean_backlog=%s max_backlog=%s max_age=%.1fseconds'
% (nworkers, mbacklog, max_backlog, max_age))

print(message)
return severity or None


if __name__ == '__main__':
main()

0 comments on commit 5dd5889

Please sign in to comment.