From 63063da9ed34c9f657be7c44568d189687f60c77 Mon Sep 17 00:00:00 2001 From: Brian Christner Date: Tue, 21 Feb 2017 13:38:27 +0100 Subject: [PATCH] adding Prometheus configs --- alertmanager/config.yml | 11 +++++++++++ config.monitoring | 2 ++ prometheus/alert.rules | 13 +++++++++++++ prometheus/prometheus.yml | 31 +++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+) create mode 100644 alertmanager/config.yml create mode 100644 config.monitoring create mode 100644 prometheus/alert.rules create mode 100644 prometheus/prometheus.yml diff --git a/alertmanager/config.yml b/alertmanager/config.yml new file mode 100644 index 0000000..120697b --- /dev/null +++ b/alertmanager/config.yml @@ -0,0 +1,11 @@ +route: + receiver: 'slack' + repeat_interval: 1m + +receivers: + - name: 'slack' + slack_configs: + - send_resolved: true + username: 'vegasbrianc' + channel: '#notifications' + api_url: 'https://hooks.slack.com/services/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' diff --git a/config.monitoring b/config.monitoring new file mode 100644 index 0000000..f12466b --- /dev/null +++ b/config.monitoring @@ -0,0 +1,2 @@ +GF_SECURITY_ADMIN_PASSWORD=foobar +GF_USERS_ALLOW_SIGN_UP=false diff --git a/prometheus/alert.rules b/prometheus/alert.rules new file mode 100644 index 0000000..1da000a --- /dev/null +++ b/prometheus/alert.rules @@ -0,0 +1,13 @@ +ALERT service_down + IF up == 0 + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} is down :( ", + description = "{{ $labels.instance }} of job {{ $labels.job }} is not happy.", + } + +ALERT high_load + IF node_load1 > 0.5 + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} under high load", + description = "{{ $labels.instance }} of job {{ $labels.job }} is under high load.", + } diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000..03c3e51 --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,31 @@ +# my global config +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'Alertmanager' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + - "alert.rules" + # - "first.rules" + # - "second.rules" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'Monitoring_Mayhem' + + # Override the global default and scrape targets from this job every 5 seconds. +# scrape_interval: 5s + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['node-exporter:9100','localhost:9090', 'metrics:9171']