diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..eac37979 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,14 @@ +CHANGELOG +========= + +# 1.1.0 / 2016-06-27 + +* [FEATURE] Allow APT repo settings to be user-defined. See [#20][] (thanks [@geoffwright][]) + +# 1.0.0 / 2016-06-08 + +Initial release, compatible with Ansible v1 & v2 + +[#20]: https://github.com/DataDog/ansible-datadog/issues/20 + +[@geoffwright]: https://github.com/geoffwright diff --git a/README.md b/README.md new file mode 100644 index 00000000..e9a98fa7 --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +Ansible Datadog Role +======== +[![Ansible Galaxy](http://img.shields.io/badge/galaxy-Datadog.datadog-660198.svg)](https://galaxy.ansible.com/Datadog/datadog/) + +Install and configure Datadog base agent & checks. + +Installation +------------ + +``` +ansible-galaxy install Datadog.datadog +``` + +Role Variables +-------------- + +- `datadog_api_key` - Your Datadog API key. +- `datadog_checks` - YAML configuration for agent checks to drop into `/etc/dd-agent/conf.d`. +- `datadog_config` - Settings to place in `/etc/dd-agent/datadog.conf`. +- `datadog_process_checks` - Array of process checks and options (DEPRECATED: use `process` under +`datadog_checks` instead) +- `datadog_apt_repo` - Override default Datadog `apt` repository +- `datadog_apt_key_url` - Override default url to Datadog `apt` key + +Dependencies +------------ +None + +Example Playbooks +------------------------- +``` +- hosts: servers + roles: + - { role: Datadog.datadog, become: yes } # On Ansible < 1.9, use `sudo: yes` instead of `become: yes` + vars: + datadog_api_key: "123456" + datadog_config: + tags: "mytag0, mytag1" + log_level: INFO + datadog_checks: + process: + init_config: + instances: + - name: ssh + search_string: ['ssh', 'sshd' ] + - name: syslog + search_string: ['rsyslog' ] + cpu_check_interval: 0.2 + exact_match: true + ignore_denied_access: true + ssh_check: + init_config: + instances: + - host: localhost + port: 22 + username: root + password: changeme + sftp_check: True + private_key_file: + add_missing_keys: True + nginx: + init_config: + instances: + - nginx_status_url: http://example.com/nginx_status/ + tags: + - instance:foo + - nginx_status_url: http://example2.com:1234/nginx_status/ + tags: + - instance:bar +``` + +``` +- hosts: servers + roles: + - { role: Datadog.datadog, become: yes, datadog_api_key: "mykey" } # On Ansible < 1.9, use `sudo: yes` instead of `become: yes` +``` + +License +------- + +Apache2 + +Author Information +------------------ + +brian@akins.org + +dustinjamesbrown@gmail.com --Forked from brian@akins.org + +Datadog --Forked from dustinjamesbrown@gmail.com diff --git a/defaults/main.yml b/defaults/main.yml new file mode 100644 index 00000000..cdbbeb3e --- /dev/null +++ b/defaults/main.yml @@ -0,0 +1,33 @@ +--- +datadog_enabled: yes +datadog_api_key: "youshouldsetthis" + +# Comma seperated list of tags +datadog_tags: "" + +datadog_url: "https://app.datadoghq.com" +datadog_use_mount: "no" + +# default datadog.conf options +datadog_config: {} + +# default checks enabled +datadog_checks: {} + +# default checks enabled +datadog_check_agents: {} + +# default user/group +datadog_user: dd-agent +datadog_group: root + +# default apt repo +datadog_apt_repo: "deb http://apt.datadoghq.com/ stable main" + +datadog_mysql_host: localhost +datadog_mysql_user: datadog +datadog_mysql_password: ThisNeedsToBeChangedViaVault +datadog_mysql_replication_enabled: True +datadog_mysql_extra_status_enabled: True +datadog_mysql_extra_innodb_enabled: True +datadog_mysql_extra_performance_enabled: True diff --git a/files/nutcracker.py b/files/nutcracker.py new file mode 100644 index 00000000..124ac905 --- /dev/null +++ b/files/nutcracker.py @@ -0,0 +1,217 @@ +""" + +To test this, run 'sudo -u dd-agent dd-agent check nutcracker' + +When ready: +- place this file in /etc/dd-agent/checks.d/nutcracker.py +- put the config file in /etc/dd-agent/conf.d/nutcracker.yaml +- service datadog-agent restart +""" + +import hashlib +import json +import md5 +import memcache +import os +import socket +import sys +import time +import uuid + +from checks import AgentCheck + +class NutcrackerCheck(AgentCheck): + SOURCE_TYPE_NAME = 'nutcracker' + SERVICE_CHECK = 'nutcracker.can_connect' + + DEFAULT_HOST = '127.0.0.1' + DEFAULT_PORT = 11211 + DEFAULT_STATS_PORT = 22222 + + # Pool stats. These descriptions are from 'nutcracker --describe-stats' + POOL_STATS = [ + ['curr_connections', 'gauge', None], # Number of current connections + ['total_connections', 'rate', None], # Running total connections made + ['server_ejects', 'rate', None], # times a backend server was ejected + ['client_err', 'rate', None], # errors on client connections + ] + + # Server stats. These descriptions are from 'nutcracker --describe-stats' + SERVER_STATS = [ + ['server_eof', 'rate', None], # eof on server connections + ['server_err', 'rate', None], # errors on server connections + ['server_timedout', 'rate', 'timedout'], # timeouts on server connections + ['server_connections', 'gauge', 'connections'], # active server connections + ['requests', 'rate', None], # requests + ['request_bytes', 'rate', None], # total request bytes + ['responses', 'rate', None], # responses + ['response_bytes', 'rate', None], # total response bytes + ['in_queue', 'gauge', None], # requests in incoming queue + ['in_queue_bytes', 'gauge', None], # current request bytes in incoming queue + ['out_queue', 'gauge', None], # requests in outgoing queue + ['out_queue_bytes', 'gauge', None], # current request bytes in outgoing queue + ] + + def _get_raw_stats(self, host, stats_port): + # Connect + self.log.debug("Connecting to %s:%s", host, stats_port) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((host, stats_port)) + + # Read + file = s.makefile('r') + data = file.readline(); + s.close() + + # Load + return json.loads(data); + + def _send_datadog_stat(self, item, data, tag_map, prefix): + # Break out the info + stat_key, stat_type, override_name = item + + # Make sure we have a name + if not override_name: + override_name = stat_key + + # Add the prefix if appropriate. + if prefix: + override_name = prefix + "_" + override_name + + try: + # Get the data, make sure it's there. + stat_data = float(data.get(stat_key)) + except: + # Hrm, not there. Let it be zero. + stat_data = 0 + + # Make the datadog metric. + metric = self.normalize(override_name.lower(), self.SOURCE_TYPE_NAME) + + tags = [k+":"+v for k,v in tag_map.iteritems()] + + if stat_type == 'gauge': + self.gauge(metric, stat_data, tags=tags) + return + + if stat_type == 'rate': + metric += "_rate" + self.rate(metric, stat_data, tags=tags) + return + + if stat_type == 'bool': + self.gauge(metric, (1 if stat_data else 0), tags=tags) + return + + raise Exception("Unknown datadog stat type '%s' for key '%s'" % (stat_type, stat_key)) + + def _get_metrics(self, host, port, stats_port, tags, aggregation_key): + try: + raw_stats = self._get_raw_stats(host, stats_port) + except Exception as e: + self.service_check(self.SERVICE_CHECK, AgentCheck.CRITICAL) + self.event({ + 'timestamp': int(time.time()), + 'event_type': 'get_stats', + 'msg_title': 'Cannot get stats', + 'msg_text': str(e), + 'aggregation_key': aggregation_key + }) + + raise + + + # pprint.pprint(raw_stats) + + # Get all the pool stats + for pool_key, pool_data in raw_stats.iteritems(): + try: + # Pools are not separated from the other keys, blarg. + # Just check if it's a dict with one of the pool keys, if not then skip it. + pool_data['client_connections'] + except: + # Not there, it's not a pool. + self.log.debug(pool_key + ": NOT A POOL"); + continue + + # Start the stat tags. + tags['nutcracker_pool'] = pool_key + + # It's a pool. Process all the non-server stats + for item in self.POOL_STATS: + self._send_datadog_stat(item, pool_data, tags, "pool") + + # Find all the servers. + for server_key, server_data in pool_data.iteritems(): + try: + # Servers are not separated from the other keys, blarg. + # Just check if it's a dict with one of the server keys, if not then skip it. + server_data['in_queue_bytes'] + except: + # Not there, it's not a server. + self.log.debug(server_key + ": NOT A SERVER"); + continue + + # Set the server in the tags. + tags['nutcracker_pool_server'] = server_key + + # It's a server. Send stats. + for item in self.SERVER_STATS: + self._send_datadog_stat(item, server_data, tags, "server") + + # The key for our roundtrip tests. + key = uuid.uuid4().hex + + try: + # Make the connection and do a round trip. + mc = memcache.Client([host+':'+str(port)], debug=0) + + mc.set(key, key) + data = mc.get(key) + mc.delete(key) + empty_data = mc.get(key) + + # Did the get work? + if data != key: + raise Exception("Cannot set and get") + + # Did the delete work? + if empty_data: + raise Exception("Cannot delete") + + except Exception as e: + # Something failed. + metric = self.normalize("test_connect_fail", self.SOURCE_TYPE_NAME) + self.gauge(metric, 1, tags=tags) + + self.service_check(self.SERVICE_CHECK, AgentCheck.CRITICAL) + self.event({ + 'timestamp': int(time.time()), + 'event_type': 'test_data', + 'msg_title': 'Cannot get/set/delete', + 'msg_text': str(e), + 'aggregation_key': aggregation_key + }) + + raise + + # Connection is ok. + self.service_check(self.SERVICE_CHECK, AgentCheck.OK) + + # Called by datadog as the starting point for this check. + def check(self, instance): + host = instance.get('host', self.DEFAULT_HOST) + port = int(instance.get('port', self.DEFAULT_PORT)) + stats_port = int(instance.get('stats_port', self.DEFAULT_STATS_PORT)) + + tags = {} + for item in instance.get('tags', []): + k, v = item.split(":", 1) + tags[k] = v + + tags["host"] = host + ":" + str(port) + + aggregation_key = hashlib.md5(host+":"+str(port)).hexdigest() + + self._get_metrics(host, port, stats_port, tags, aggregation_key) + diff --git a/files/redis-labs-enterprise-cluster.py b/files/redis-labs-enterprise-cluster.py new file mode 100644 index 00000000..75dd8750 --- /dev/null +++ b/files/redis-labs-enterprise-cluster.py @@ -0,0 +1,195 @@ +""" +To test this, run: +'sudo -u dd-agent dd-agent check redis-labs-enterprise-cluster' + +When ready: +- place this file in /etc/dd-agent/checks.d/redis-labs-enterprise-cluster.py +- put the config in /etc/dd-agent/conf.d/redis-labs-enterprise-cluster.yaml +- service datadog-agent restart +""" + +import base64 +import json +import ssl +import socket +import time +import urllib2 + +from checks import AgentCheck + +GIG = 1024 * 1024 * 1024 + + +class RedisLabsEnterpriseClusterCheck(AgentCheck): + SOURCE_TYPE_NAME = 'rlec' + SERVICE_CHECK = 'rlec.can_connect' + + DEFAULT_HOST = '127.0.0.1' + DEFAULT_PORT = 9443 + + stats_endpoints = [ + 'bdbs', + 'nodes', + 'bdbs/stats/last', + 'shards/stats/last', + 'nodes/stats/last', + 'cluster/stats/last', + # 'cluster/actions', + ] + + def metric_name(self, metric): + return self.normalize(metric.lower(), self.SOURCE_TYPE_NAME) + + def _get_raw_stats(self, host, port, username, password): + data = {} + for endpoint in self.stats_endpoints: + url = 'https://%s:%s/v1/%s' % (host, port, endpoint) + + req = urllib2.Request(url) + + base64string = base64.encodestring('%s:%s' % (username, password)) + base64string = base64string[:-1] + req.add_header("Authorization", "Basic %s" % base64string) + + context = ssl._create_unverified_context() + response = urllib2.urlopen(req, context=context) + + data[endpoint] = json.loads(response.read()) + + return data + + def gauge(self, metric, value, *args, **kwargs): + metric = self.metric_name(metric) + return super(RedisLabsEnterpriseClusterCheck, self).gauge( + metric, value, *args, **kwargs + ) + + def _get_metrics_dbs(self, raw_stats): + bdb_stats_map = raw_stats['bdbs/stats/last'] + for item in raw_stats['bdbs']: + uid = item['uid'] + str_uid = str(uid) + bdb_stats = bdb_stats_map[str_uid] + + name = item['name'] + + tags = [ + 'db_name:%s' % name + ] + + mem_gigs = int(item['memory_size']) / GIG + used_gigs = int(bdb_stats['used_memory']) / GIG + + self.gauge('db.total_size_in_gigs', mem_gigs, tags=tags) + self.gauge('db.num_shards', item['shards_count'], tags=tags) + self.gauge('db.read_hits', bdb_stats['read_hits'], tags=tags) + self.gauge('db.read_misses', bdb_stats['read_misses'], tags=tags) + self.gauge('db.write_hits', bdb_stats['write_hits'], tags=tags) + self.gauge('db.write_misses', bdb_stats['write_misses'], tags=tags) + self.gauge('db.num_connections', bdb_stats['conns'], tags=tags) + self.gauge('db.num_keys', bdb_stats['no_of_keys'], tags=tags) + self.gauge('db.bytes_added', bdb_stats['ingress_bytes'], tags=tags) + self.gauge('db.bytes_read', bdb_stats['egress_bytes'], tags=tags) + self.gauge('db.count_evicted', bdb_stats['evicted_objects'], + tags=tags) + self.gauge('db.count_expired', bdb_stats['expired_objects'], + tags=tags) + self.gauge('db.ops_per_sec', + bdb_stats['instantaneous_ops_per_sec'], tags=tags) + self.gauge('db.used_memory_in_gigs', used_gigs, tags=tags) + + def _get_metrics_nodes(self, raw_stats): + node_stats_map = raw_stats['nodes/stats/last'] + for node in raw_stats['nodes']: + # Get the node ip. Don't send these stats if it doesn't match + # the node we're on. + ip = node['addr'] + if ip != socket.gethostbyname(socket.gethostname()): + continue + + uid = node['uid'] + node_stats = node_stats_map[str(uid)] + + tags = [ + 'node_ip:%s' % ip, + 'node_uid:%s' % uid + ] + + is_active = 1 if (node['status'] == 'active') else 0 + ephemeral_gigs = int(node_stats['ephemeral_storage_free']) / GIG + persistent_gigs = int(node_stats['persistent_storage_free']) / GIG + memory_gigs = int(node_stats['free_memory']) / GIG + + self.gauge('node.shard_count', node['shard_count'], tags=tags) + self.gauge('node.active', is_active, tags=tags) + self.gauge('node.connections', node_stats['conns'], tags=tags) + self.gauge('node.aof_rewrites', node_stats['cur_aof_rewrites'], + tags=tags) + self.gauge('node.ephemeral_free_space_gigs', ephemeral_gigs, + tags=tags) + self.gauge('node.persistent_free_space_gigs', persistent_gigs, + tags=tags) + self.gauge('node.free_memory_gigs', memory_gigs, tags=tags) + self.gauge('node.requests', node_stats['total_req'], tags=tags) + + def _get_metrics_shards(self, raw_stats): + """ + At this time it looks like this isn't useful info + + shards_stats_map = raw_stats['shards/stats/last'] + """ + + def _get_metrics_cluster(self, raw_stats): + stats = raw_stats['cluster/stats/last'] + + tags = [] + + ephemeral_gigs = int(stats['ephemeral_storage_free']) / GIG + persistent_gigs = int(stats['persistent_storage_free']) / GIG + memory_gigs = int(stats['free_memory']) / GIG + + self.gauge('cluster.connections', stats['conns'], tags=tags) + self.gauge('cluster.ephemeral_free_space_gigs', ephemeral_gigs, + tags=tags) + self.gauge('cluster.persistent_free_space_gigs', persistent_gigs, + tags=tags) + self.gauge('cluster.free_memory_gigs', memory_gigs, tags=tags) + self.gauge('cluster.requests', stats['total_req'], tags=tags) + self.gauge('cluster.bytes_added', stats['ingress_bytes'], tags=tags) + self.gauge('cluster.bytes_read', stats['egress_bytes'], tags=tags) + self.gauge('cluster.cpu_idle', stats['cpu_idle'], tags=tags) + + def _get_metrics(self, host, port, username, password, tags): + try: + raw_stats = self._get_raw_stats(host, port, username, password) + except Exception: + self.service_check(self.SERVICE_CHECK, AgentCheck.CRITICAL) + self.increment(self.metric_name('node.get_stats.failure'), 1, tags = [ + 'host:%s' % host, + ]) + + raise + + # Send all stats + self._get_metrics_dbs(raw_stats) + self._get_metrics_nodes(raw_stats) + self._get_metrics_shards(raw_stats) + self._get_metrics_cluster(raw_stats) + + # Connection is ok. + self.service_check(self.SERVICE_CHECK, AgentCheck.OK) + + # Called by datadog as the starting point for this check. + def check(self, instance): + host = instance.get('host', self.DEFAULT_HOST) + port = int(instance.get('port', self.DEFAULT_PORT)) + username = instance['username'] + password = instance['password'] + + tags = {} + for item in instance.get('tags', []): + k, v = item.split(":", 1) + tags[k] = v + + self._get_metrics(host, port, username, password, tags) + diff --git a/files/ssl_check_expire_days.py b/files/ssl_check_expire_days.py new file mode 100644 index 00000000..cdbbdf9e --- /dev/null +++ b/files/ssl_check_expire_days.py @@ -0,0 +1,26 @@ +# A rewrite of https://workshop.avatarnewyork.com/project/datadog-ssl-expires-check/ +# I prefer to test the site itself instead of the ssl cert file + +import time +import datetime +import subprocess +import sys +from checks import AgentCheck + +class SSLCheckExpireDays(AgentCheck): + def check(self, instance): + metric = "ssl.expire_in_days" + site = instance['site'] + p = subprocess.Popen("echo | openssl s_client -showcerts -servername " + site + " -connect " + site + ":443 2>/dev/null | openssl x509 -noout -dates | grep notAfter | cut -f 2 -d\= | xargs -0 -I arg date -d arg \"+%s\"",stdout=subprocess.PIPE, shell=True) + (output, err) = p.communicate() + if output: + output = output.rstrip("\n") + d0 = int(time.time()) + d1 = int(output) + delta = d1 - d0 + days= delta/24/60/60 # convert the timestamp to days + tag="site:" + site # generate the tags +# print "metric: " + str(metric) + ", tag: " + str(tag) + ", days: " + str(days) + self.gauge(metric, int(days), tags=[tag]) + else: + self.gauge(metric, -1, tags=[site]) diff --git a/files/ssl_check_expire_days.yaml b/files/ssl_check_expire_days.yaml new file mode 100644 index 00000000..47478c9e --- /dev/null +++ b/files/ssl_check_expire_days.yaml @@ -0,0 +1,7 @@ +init_config: + +instances: + - site: lb101.udemy.com + - site: lb102.udemy.com + - site: udemysupport.zendesk.com + - site: sso.udemy.com:9031 diff --git a/files/uwsgi.py b/files/uwsgi.py new file mode 100755 index 00000000..81d698f3 --- /dev/null +++ b/files/uwsgi.py @@ -0,0 +1,128 @@ +""" + +To test this, run 'sudo -u dd-agent dd-agent check uwsgi' + +When ready: +- place this file in /etc/dd-agent/checks.d/uwsgi.py +- put the config file in /etc/dd-agent/conf.d/uwsgi.yaml +- service datadog-agent restart +""" + +import hashlib +import glob +import json +import os +import socket +from stat import ST_CTIME +import time + +from checks import AgentCheck + + +class UwsgiCheck(AgentCheck): + SOURCE_TYPE_NAME = 'uwsgi' + SERVICE_CHECK = 'uwsgi.can_connect' + + def metric_name(self, metric): + return self.normalize(metric.lower(), self.SOURCE_TYPE_NAME) + + def gauge(self, metric, value, *args, **kwargs): + metric = self.metric_name(metric) + return super(UwsgiCheck, self).gauge( + metric, value, *args, **kwargs + ) + + def histogram(self, metric, value, *args, **kwargs): + metric = self.metric_name(metric) + return super(UwsgiCheck, self).histogram( + metric, value, *args, **kwargs + ) + + def _get_raw_stats(self): + chosen_socket = None + latest_ctime = 0 + files = glob.glob('/tmp/uwsgi_stats_*.socket') + for fname in files: + stats = os.stat(fname) + + if stats[ST_CTIME] > latest_ctime: + latest_ctime = stats[ST_CTIME] + chosen_socket = fname + + if not chosen_socket: + raise RuntimeError("Cannot find uwsgi stats socket file") + + sock_obj = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock_obj.connect(chosen_socket) + + json_str = '' + while True: + data = sock_obj.recv(4096) + if len(data) < 1: + break + json_str += data.decode('utf8') + + return json.loads(json_str) + + def _send_stats(self, data): + self._send_stats_workers(data) + + def _send_stats_workers(self, data): + code_dir = data['cwd'] + master_pid = data['pid'] + global_tags = [ + 'code_dir:%s' % code_dir, + 'master_pid:%s' % master_pid, + ] + + self.gauge('listen_queue', data['listen_queue'], tags=global_tags) + self.gauge('listen_queue_errors', data['listen_queue_errors'], tags=global_tags) + + for worker in data['workers']: + worker_tags = [ + 'pid:%s' % worker['pid'], + 'worker_id:%s' % worker['id'], + ] + tags = worker_tags + global_tags + + self.gauge('worker.accepting', worker['accepting'], tags=tags) + self.gauge('worker.status.%s' % worker['status'], 1, tags=tags) + self.gauge('worker.running_time', worker['running_time'], + tags=tags) + self.gauge('worker.data_transmitted', worker['tx'], tags=tags) + self.gauge('worker.address_space', worker['vsz'], tags=tags) + self.gauge('worker.rss_memory', worker['rss'], tags=tags) + self.gauge('worker.respawn_count', worker['respawn_count'], + tags=tags) + self.gauge('worker.exceptions_count', worker['exceptions'], + tags=tags) + self.gauge('worker.harakiri_count', worker['harakiri_count'], + tags=tags) + self.histogram('worker.avg_response_time_ms', + worker['avg_rt']/1000, tags=tags) + + def _get_metrics(self, aggregation_key): + try: + raw_stats = self._get_raw_stats() + except Exception as e: + self.service_check(self.SERVICE_CHECK, AgentCheck.CRITICAL) + self.event({ + 'timestamp': int(time.time()), + 'event_type': 'get_stats', + 'msg_title': 'Cannot get stats', + 'msg_text': str(e), + 'aggregation_key': aggregation_key + }) + + raise + + self._send_stats(raw_stats) + + # Connection is ok. + self.service_check(self.SERVICE_CHECK, AgentCheck.OK) + + # Called by datadog as the starting point for this check. + def check(self, instance): + aggregation_key = hashlib.md5().hexdigest() + self._get_metrics(aggregation_key) + diff --git a/handlers/main.yml b/handlers/main.yml new file mode 100644 index 00000000..39b137e9 --- /dev/null +++ b/handlers/main.yml @@ -0,0 +1,3 @@ +--- +- name: restart datadog-agent + service: name=datadog-agent state=restarted diff --git a/meta/main.yml b/meta/main.yml new file mode 100644 index 00000000..4f4286e1 --- /dev/null +++ b/meta/main.yml @@ -0,0 +1,34 @@ +--- +galaxy_info: + author: 'Brian Akins, Dustin Brown & Datadog' + description: Install Datadog agent and configure checks + license: Apache2 + min_ansible_version: 1.6 + platforms: + - name: Ubuntu + versions: + - lucid + - maverick + - natty + - oneiric + - precise + - quantal + - raring + - saucy + - trusty + - utopic + - vivid + - xenial + - name: Debian + versions: + - squeeze + - wheezy + - jessie + - name: EL + versions: + - 7 + - 6 + - 5 + categories: + - monitoring +dependencies: [] diff --git a/tasks/main.yml b/tasks/main.yml new file mode 100644 index 00000000..61285f1e --- /dev/null +++ b/tasks/main.yml @@ -0,0 +1,67 @@ +--- +- include: pkg-debian.yml + when: ansible_os_family == "Debian" + +- include: pkg-redhat.yml + when: ansible_os_family == "RedHat" + +- name: Create main Datadog agent configuration file + template: + src=datadog.conf.j2 + dest=/etc/dd-agent/datadog.conf + owner={{ datadog_user }} + group={{ datadog_group }} + notify: restart datadog-agent + +# DEPRECATED: Remove specific handling of the process check for next major release +- template: src=process.yaml.j2 dest=/etc/dd-agent/conf.d/process.yaml + when: datadog_process_checks is defined + notify: restart datadog + +- debug: 'msg="[DEPRECATION NOTICE] Using `datadog_process_checks` is deprecated, use `process` under `datadog_checks` instead"' + when: datadog_process_checks is defined + +- service: name=datadog-agent state=started enabled=yes + when: datadog_enabled + +- service: name=datadog-agent state=stopped enabled=no + when: not datadog_enabled + +- name: Create a configuration file for each Datadog check + template: + src=checks.yaml.j2 + dest=/etc/dd-agent/conf.d/{{ item }}.yaml + owner={{ datadog_user }} + group={{ datadog_group }} + with_items: '{{ datadog_checks.keys() }}' + notify: + - restart datadog-agent + +- name: Create a check agent for each Datadog check + copy: + src={{ item.name }}.py + dest=/etc/dd-agent/checks.d/{{ item.name }}.py + owner={{ datadog_user }} + group={{ datadog_group }} + with_items: '{{ datadog_check_agents }}' + notify: + - restart datadog-agent + +- name: Upgrade snakebite version + pip: + name: snakebite + version: "{{ datadog_snakebite_version }}" + executable: /opt/datadog-agent/embedded/bin/pip + state: present + when: hadoop_server_monitoring is defined and hadoop_server_monitoring + notify: restart datadog-agent + +- name: Install perl-DBD-MySQL + yum: state=present name=perl-DBD-MySQL + when: mysql_server_monitoring is defined and mysql_server_monitoring + +- mysql_db: name=datadog state=present + when: data_server_monitoring is defined and data_server_monitoring + +- mysql_user: name=datadog password="{{ datadog_mysql_data_password }}" priv=datadog.*:ALL state=present + when: data_server_monitoring is defined and data_server_monitoring \ No newline at end of file diff --git a/tasks/pkg-debian.yml b/tasks/pkg-debian.yml new file mode 100644 index 00000000..bbce6402 --- /dev/null +++ b/tasks/pkg-debian.yml @@ -0,0 +1,12 @@ +--- +- apt: name=apt-transport-https state=latest + +- apt_key: id=C7A7DA52 keyserver=hkp://keyserver.ubuntu.com:80 state=present + when: datadog_apt_key_url is not defined + +- apt_key: id=C7A7DA52 url={{ datadog_apt_key_url }} state=present + when: datadog_apt_key_url is defined + +- apt_repository: repo='{{ datadog_apt_repo }}' state=present update_cache=yes + +- apt: name=datadog-agent state=latest diff --git a/tasks/pkg-redhat.yml b/tasks/pkg-redhat.yml new file mode 100644 index 00000000..bfca5709 --- /dev/null +++ b/tasks/pkg-redhat.yml @@ -0,0 +1,14 @@ +--- +- name: Copy repo file into place + template: src=datadog.repo.j2 dest=/etc/yum.repos.d/datadog.repo owner=root group=root mode=0644 + +- name: Remove dd-trace-agent package + yum: name=dd-trace-agent state=removed + +- name: Install datadog-agent package + yum: name=datadog-agent state=latest enablerepo=datadog + notify: restart datadog-agent + +- name: Configure datadog-agent + template: src=datadog.conf.j2 dest=/etc/dd-agent/datadog.conf + notify: restart datadog-agent diff --git a/templates/checks.yaml.j2 b/templates/checks.yaml.j2 new file mode 100644 index 00000000..39274f28 --- /dev/null +++ b/templates/checks.yaml.j2 @@ -0,0 +1 @@ +{{ datadog_checks[item] | to_nice_yaml }} diff --git a/templates/datadog.conf.j2 b/templates/datadog.conf.j2 new file mode 100644 index 00000000..e3007427 --- /dev/null +++ b/templates/datadog.conf.j2 @@ -0,0 +1,23 @@ +# Managed by Ansible + +[Main] +dd_url: https://app.datadoghq.com +api_key: {{ datadog_api_key }} +use_mount: no +tags: {{ datadog_tags }} + + +{% if datadog_config["dd_url"] is not defined -%} + dd_url: {{ datadog_url | default('https://app.datadoghq.com') }} +{% endif %} +{% if datadog_config["api_key"] is not defined -%} + api_key: {{ datadog_api_key | default('youshouldsetthis') }} +{% endif %} +{% if datadog_config["use_mount"] is not defined -%} + use_mount: {{ datadog_use_mount | default('no') }} +{% endif %} + +{# These variables are free-style, passed through a hash -#} +{% if datadog_config -%} + {{ datadog_config | to_nice_yaml }} +{% endif %} diff --git a/templates/datadog.repo.j2 b/templates/datadog.repo.j2 new file mode 100644 index 00000000..8df0ed32 --- /dev/null +++ b/templates/datadog.repo.j2 @@ -0,0 +1,6 @@ +[datadog] +name = Datadog, Inc. +baseurl = https://yum.datadoghq.com/rpm/{{ ansible_userspace_architecture }}/ +enabled=0 +gpgcheck=1 +gpgkey=https://yum.datadoghq.com/DATADOG_RPM_KEY.public diff --git a/templates/process.yaml.j2 b/templates/process.yaml.j2 new file mode 100644 index 00000000..7db27049 --- /dev/null +++ b/templates/process.yaml.j2 @@ -0,0 +1,18 @@ +{# DEPRECATED: Remove specific handling of the process check for next major release -#} +init_config: + +instances: +{% for process in datadog_process_checks %} +- name: {{ process.name }} + search_string: {{ process.search_string }} + {% if process.exact_match is defined -%} + exact_match: {{ process.exact_match }} + {% endif -%} + {% if process.cpu_check_interval is defined -%} + cpu_check_interval: {{ process.cpu_check_interval }} + {% endif -%} + {% if process.ignore_denied_access is defined -%} + ignore_denied_access: {{ process.ignore_denied_access }} + {% endif %} + +{% endfor %} diff --git a/tests/monitoring.bats b/tests/monitoring.bats new file mode 100644 index 00000000..0c0bd387 --- /dev/null +++ b/tests/monitoring.bats @@ -0,0 +1,13 @@ +#!/usr/bin/env bats + +@test "the datadog-agent should be running" { + [ "$(ps aux | grep datadog-agent)" ] +} + +@test "datadog redis config is present" { + [ -f "/etc/dd-agent/conf.d/redisdb.yaml" ] +} + +@test "datadog mysql config is present" { + [ -f "/etc/dd-agent/conf.d/mysql.yaml" ] +}