Skip to content

Vendor gstatus in glusterfs #20338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions glusterfs/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
@@ -4,15 +4,6 @@ files:
options:
- template: init_config
options:
- name: gstatus_path
description: |
Path to the gstatus command.

A version of the gstatus is shipped with the Agent binary.
If you are using a source install, specify the location of gstatus.
value:
type: string
example: <datadog-agent>/embedded/sbin/gstatus
- template: init_config/default
- template: instances
options:
100 changes: 35 additions & 65 deletions glusterfs/datadog_checks/glusterfs/check.py
Original file line number Diff line number Diff line change
@@ -2,31 +2,17 @@
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import json

try:
from json.decoder import JSONDecodeError
except ImportError:
from simplejson import JSONDecodeError

import os
import subprocess
from typing import Dict, List # noqa: F401

from datadog_checks.base import AgentCheck, ConfigurationError
from datadog_checks.base import AgentCheck
from datadog_checks.base.config import is_affirmative

try:
import datadog_agent
except ImportError:
from datadog_checks.base.stubs import datadog_agent

from .glusterlib.cluster import Cluster
from .metrics import BRICK_STATS, CLUSTER_STATS, PARSE_METRICS, VOL_SUBVOL_STATS, VOLUME_STATS

GLUSTER_VERSION = 'glfs_version'
CLUSTER_STATUS = 'cluster_status'

GSTATUS_PATH_SUFFIX = '/embedded/sbin/gstatus'


class GlusterfsCheck(AgentCheck):
__NAMESPACE__ = 'glusterfs'
@@ -39,57 +25,10 @@ def __init__(self, name, init_config, instances):
# type: (str, Dict, List[Dict]) -> None
super(GlusterfsCheck, self).__init__(name, init_config, instances)
self._tags = self.instance.get('tags', [])

# Check if customer set gstatus path
if init_config.get('gstatus_path'):
self.gstatus_cmd = init_config.get('gstatus_path')
else:
path = datadog_agent.get_config('run_path')
if path.endswith('/run'):
path = path[:-4]
path = path + GSTATUS_PATH_SUFFIX
if os.path.exists(path):
self.gstatus_cmd = path
else:
raise ConfigurationError(
'Glusterfs check requires `gstatus` to be installed or set the path to the installed version.'
)
self.log.debug("Using gstatus path `%s`", self.gstatus_cmd)
self.use_sudo = is_affirmative(self.instance.get('use_sudo', True))

def get_gstatus_output(self, cmd):
res = subprocess.run(cmd.split(), capture_output=True, text=True)
return res.stdout, res.stderr, res.returncode

def check(self, _):
if self.use_sudo:
cmd = f'sudo -ln {self.gstatus_cmd}'
stdout, stderr, returncode = self.get_gstatus_output(cmd)
if returncode != 0 or not stdout:
raise Exception('The dd-agent user does not have sudo access: {!r}'.format(stderr or stdout))
gluster_cmd = f'sudo {self.gstatus_cmd}'
else:
gluster_cmd = self.gstatus_cmd
# Ensures units are universally the same by specifying the --units flag
gluster_cmd += ' -a -o json -u g'
self.log.debug("gstatus command: %s", gluster_cmd)
try:
# In testing I saw that even though we request the json, sometimes there's a line that appears at the top
# and thus will break the json loading. A line like:
# 'Note: Unable to get self-heal status for one or more volumes \n'
stdout, stderr, returncode = self.get_gstatus_output(gluster_cmd)
if stdout.lstrip().startswith('{'):
json_data = stdout
else:
json_data = stdout.split('\n', 1)[-1]
gstatus = json.loads(json_data)
except JSONDecodeError as e:
self.log.warning("Unable to decode gstatus output: %s", str(e))
raise
except Exception as e:
self.log.warning("Encountered error trying to collect gluster status: %s", str(e))
raise

gstatus = json.loads(self.get_gstatus_data())
if 'data' in gstatus:
data = gstatus['data']
self.submit_metrics(data, 'cluster', CLUSTER_STATS, self._tags)
@@ -111,7 +50,6 @@ def check(self, _):
self.service_check(
self.CLUSTER_SC, AgentCheck.WARNING, tags=self._tags, message="Cluster status is %s" % status
)

else:
self.log.warning("No data from gstatus: %s", gstatus)

@@ -212,3 +150,35 @@ def submit_service_check(self, sc_name, val, tags):
self.service_check(sc_name, AgentCheck.CRITICAL, tags=tags, message=msg)
else:
self.service_check(sc_name, AgentCheck.UNKNOWN, tags=tags, message=msg)

def get_gstatus_data(self):
options = type('', (), {})()
options.volumes = False
options.alldata = True
options.brickinfo = False
options.displayquota = False
options.displaysnap = False
options.units = "g"
options.output_mode = "json"

cluster = Cluster(options, self.log, self.use_sudo)
# check_version(cluster)
cluster.gather_data()

return self.return_json(cluster)

def return_json(self, data):
from datetime import datetime

# Build the cluster data for json
gstatus = {}
gstatus['cluster_status'] = data.cluster_status
gstatus['glfs_version'] = data.glusterfs_version
gstatus['node_count'] = data.nodes
gstatus['nodes_active'] = data.nodes_reachable
gstatus['volume_count'] = data.volume_count
gstatus['volumes_started'] = data.volumes_started
if data.volume_count:
gstatus['volume_summary'] = data.volume_data

return json.dumps({"last_updated": str(datetime.now()), "data": gstatus})
Original file line number Diff line number Diff line change
@@ -8,10 +8,6 @@
# ddev -x validate models -s <INTEGRATION_NAME>


def shared_gstatus_path():
return '<datadog-agent>/embedded/sbin/gstatus'


def instance_disable_generic_tags():
return False

Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@
from datadog_checks.base.utils.functions import identity
from datadog_checks.base.utils.models import validation

from . import defaults, validators
from . import validators


class SharedConfig(BaseModel):
@@ -25,7 +25,6 @@ class SharedConfig(BaseModel):
arbitrary_types_allowed=True,
frozen=True,
)
gstatus_path: Optional[str] = None
service: Optional[str] = None

@model_validator(mode='before')
@@ -38,8 +37,6 @@ def _validate(cls, value, info):
field_name = field.alias or info.field_name
if field_name in info.context['configured_fields']:
value = getattr(validators, f'shared_{info.field_name}', identity)(value, field=field)
else:
value = getattr(defaults, f'shared_{info.field_name}', lambda: value)()

return validation.utils.make_immutable(value)

8 changes: 0 additions & 8 deletions glusterfs/datadog_checks/glusterfs/data/conf.yaml.example
Original file line number Diff line number Diff line change
@@ -2,14 +2,6 @@
#
init_config:

## @param gstatus_path - string - optional - default: <datadog-agent>/embedded/sbin/gstatus
## Path to the gstatus command.
##
## A version of the gstatus is shipped with the Agent binary.
## If you are using a source install, specify the location of gstatus.
#
# gstatus_path: <datadog-agent>/embedded/sbin/gstatus

## @param service - string - optional
## Attach the tag `service:<SERVICE>` to every metric, event, and service check emitted by this integration.
##
Empty file.
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.