/
sync_prometheus.go
178 lines (159 loc) · 6.8 KB
/
sync_prometheus.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
// Copyright (c) 2021, 2023, Oracle and/or its affiliates.
// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
package vmc
import (
"context"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"strings"
"github.com/Jeffail/gabs/v2"
clustersv1alpha1 "github.com/verrazzano/verrazzano/cluster-operator/apis/clusters/v1alpha1"
"github.com/verrazzano/verrazzano/pkg/constants"
"github.com/verrazzano/verrazzano/pkg/mcconstants"
"github.com/verrazzano/verrazzano/pkg/metricsutils"
vpoconst "github.com/verrazzano/verrazzano/platform-operator/constants"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/yaml"
)
const (
scrapeConfigsKey = "scrape_configs"
prometheusConfigBasePath = "/etc/prometheus/config/"
managedCertsBasePath = "/etc/prometheus/managed-cluster-ca-certs/"
scrapeConfigTemplate = constants.PrometheusJobNameKey + `: ##JOB_NAME##
scrape_interval: 20s
scrape_timeout: 15s
scheme: https
honor_labels: true
metrics_path: '/federate'
params:
'match[]':
- '{__name__=~"..*"}'
# If an existing verrazzano_cluster metric is present, make sure it is always replaced to
# the right managed cluster name for the cluster. Do this with a metric_relabel_config so it
# happens at the end i.e. _after_ scraping is completed, before ingesting into data source.
metric_relabel_configs:
- action: replace
source_labels:
- verrazzano_cluster
target_label: verrazzano_cluster
replacement: '##CLUSTER_NAME##'
static_configs:
- targets:
- ##HOST##
labels: # add the labels if not already present on managed cluster (this will no op if present)
verrazzano_cluster: '##CLUSTER_NAME##'
basic_auth:
username: verrazzano-prom-internal
password: ##PASSWORD##
`
)
// syncPrometheusScraper will create a scrape configuration for the cluster and update the prometheus config map. There will also be an
// entry for the cluster's CA cert added to the prometheus config map to allow for lookup of the CA cert by the scraper's HTTP client.
func (r *VerrazzanoManagedClusterReconciler) syncPrometheusScraper(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, secret *corev1.Secret) error {
// The additional scrape configs and managed cluster TLS secrets are needed by the Prometheus Operator Prometheus
// because the federated scrape config can't be represented in a PodMonitor, ServiceMonitor, etc.
return r.mutateAdditionalScrapeConfigs(ctx, vmc, secret)
}
// newScrapeConfig will return a prometheus scraper configuration based on the entries in the prometheus info structure provided
func (r *VerrazzanoManagedClusterReconciler) newScrapeConfig(cacrtSecret *corev1.Secret, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (*gabs.Container, error) {
var newScrapeConfig *gabs.Container
if cacrtSecret == nil || vmc.Status.PrometheusHost == "" {
return newScrapeConfig, nil
}
vzPromSecret, err := r.getSecret(constants.VerrazzanoSystemNamespace, constants.VerrazzanoPromInternal, true)
if err != nil {
return nil, err
}
newScrapeConfigMappings := map[string]string{
"##JOB_NAME##": vmc.Name,
"##HOST##": vmc.Status.PrometheusHost,
"##PASSWORD##": string(vzPromSecret.Data[mcconstants.VerrazzanoPasswordKey]),
"##CLUSTER_NAME##": vmc.Name}
configTemplate := scrapeConfigTemplate
for key, value := range newScrapeConfigMappings {
configTemplate = strings.ReplaceAll(configTemplate, key, value)
}
newScrapeConfig, err = metricsutils.ParseScrapeConfig(configTemplate)
if err != nil {
return nil, err
}
if len(cacrtSecret.Data["cacrt"]) > 0 {
newScrapeConfig.Set(managedCertsBasePath+getCAKey(vmc), "tls_config", "ca_file")
newScrapeConfig.Set(false, "tls_config", "insecure_skip_verify")
}
return newScrapeConfig, nil
}
// deleteClusterPrometheusConfiguration deletes the managed cluster configuration from the prometheus configuration and updates the prometheus config
// map
func (r *VerrazzanoManagedClusterReconciler) deleteClusterPrometheusConfiguration(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
return r.mutateAdditionalScrapeConfigs(ctx, vmc, nil)
}
// parsePrometheusConfig returns an editable representation of the prometheus configuration
func parsePrometheusConfig(promConfigStr string) (*gabs.Container, error) {
jsonConfig, err := yaml.YAMLToJSON([]byte(promConfigStr))
if err != nil {
return nil, err
}
prometheusConfig, err := gabs.ParseJSON(jsonConfig)
if err != nil {
return nil, err
}
return prometheusConfig, err
}
// getCAKey returns the key by which the CA cert will be retrieved by the scaper HTTP client
func getCAKey(vmc *clustersv1alpha1.VerrazzanoManagedCluster) string {
return "ca-" + vmc.Name
}
// mutateAdditionalScrapeConfigs adds and removes scrape config for managed clusters to the additional scrape configurations secret. Prometheus Operator appends the raw scrape config
// in this secret to the scrape config it generates from PodMonitor and ServiceMonitor resources.
func (r *VerrazzanoManagedClusterReconciler) mutateAdditionalScrapeConfigs(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, cacrtSecret *corev1.Secret) error {
// get the existing additional scrape config, if the secret doesn't exist we will create it
secret, err := r.getSecret(vpoconst.VerrazzanoMonitoringNamespace, constants.PromAdditionalScrapeConfigsSecretName, false)
if err != nil && !errors.IsNotFound(err) {
return err
}
var jobsStr string
if secret.Data != nil {
jobsStr = string(secret.Data[constants.PromAdditionalScrapeConfigsSecretKey])
}
// create the scrape config for the new managed cluster
newScrapeConfig, err := r.newScrapeConfig(cacrtSecret, vmc)
if err != nil {
return err
}
editScrapeJobName := vmc.Name
// parse the scrape config so we can manipulate it
jobs, err := metricsutils.ParseScrapeConfig(jobsStr)
if err != nil {
return err
}
scrapeConfigs, err := metricsutils.EditScrapeJob(jobs, editScrapeJobName, newScrapeConfig)
if err != nil {
return err
}
bytes, err := yaml.JSONToYAML(scrapeConfigs.Bytes())
if err != nil {
return err
}
// update the secret with the updated scrape config
secret = corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: constants.PromAdditionalScrapeConfigsSecretName,
Namespace: vpoconst.VerrazzanoMonitoringNamespace,
},
Data: map[string][]byte{},
}
result, err := controllerruntime.CreateOrUpdate(ctx, r.Client, &secret, func() error {
secret.Data[constants.PromAdditionalScrapeConfigsSecretKey] = bytes
return nil
})
if err != nil {
return err
}
if result != controllerutil.OperationResultNone {
r.log.Infof("The Prometheus additional scrape config Secret %s has been modified for VMC %s", secret.Name, vmc.Name)
}
return nil
}