Skip to content

Commit

Permalink
Use device service for retrieve gpu details
Browse files Browse the repository at this point in the history
  • Loading branch information
sonicaj committed Sep 10, 2020
1 parent ba8bec4 commit b12446e
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 46 deletions.
10 changes: 3 additions & 7 deletions src/middlewared/middlewared/etc_files/docker.py
@@ -1,12 +1,8 @@
import json
import os
import re
import subprocess


RE_LDCONFIG = re.compile(r'@/sbin/ldconfig')


def nvidia_configuration(middleware):
# this needs to happen for nvidia gpu to work properly for docker containers
# https://github.com/NVIDIA/nvidia-docker/issues/854#issuecomment-572175484
Expand All @@ -15,10 +11,10 @@ def nvidia_configuration(middleware):
return {}

with open(nvidia_config_path, 'r') as f:
data = RE_LDCONFIG.sub('/sbin/ldconfig', f.read())
data = f.read()

with open(nvidia_config_path, 'w') as f:
f.write(data)
f.write(data.replace('@/sbin/ldconfig', '/sbin/ldconfig'))

return {
'runtimes': {'nvidia': {'path': '/usr/bin/nvidia-container-runtime', 'runtimeArgs': []}},
Expand All @@ -27,7 +23,7 @@ def nvidia_configuration(middleware):


def gpu_configuration(middleware):
available_gpu = middleware.call_sync('hardware.available_gpu')
available_gpu = middleware.call_sync('device.get_gpus')

if available_gpu['vendor'] == 'NVIDIA':
return nvidia_configuration(middleware)
Expand Down
4 changes: 2 additions & 2 deletions src/middlewared/middlewared/plugins/device.py
Expand Up @@ -4,9 +4,9 @@

class DeviceService(Service):

@accepts(Str('type', enum=['SERIAL', 'DISK']))
@accepts(Str('type', enum=['SERIAL', 'DISK', 'GPU']))
async def get_info(self, _type):
"""
Get info for SERIAL/DISK device types.
Get info for SERIAL/DISK/GPU device types.
"""
return await self.middleware.call(f'device.get_{_type.lower()}s')
Expand Up @@ -33,6 +33,10 @@ class DeviceInfoBase(ServicePartBase):
'blocks': None,
}

@private
async def get_gpus(self):
raise NotImplementedError()

@private
async def get_serials(self):
raise NotImplementedError()
Expand Down
Expand Up @@ -120,3 +120,6 @@ async def get_serials(self):

async def get_storage_devices_topology(self):
return await camcontrol_list()

async def get_gpus(self):
raise NotImplementedError()
25 changes: 25 additions & 0 deletions src/middlewared/middlewared/plugins/device_/device_info_linux.py
Expand Up @@ -8,15 +8,19 @@

from .device_info_base import DeviceInfoBase
from middlewared.service import private, Service
from middlewared.utils import run

RE_DISK_SERIAL = re.compile(r'Unit serial number:\s*(.*)')
RE_GPU_VENDOR = re.compile(r'description:\s*VGA compatible controller[\s\S]*vendor:\s*(.*)')
RE_NVME_PRIVATE_NAMESPACE = re.compile(r'nvme[0-9]+c')
RE_SERIAL = re.compile(r'state.*=\s*(\w*).*io (.*)-(\w*)\n.*', re.S | re.A)
RE_UART_TYPE = re.compile(r'is a\s*(\w+)')


class DeviceService(Service, DeviceInfoBase):

GPU = None

def get_serials(self):
devices = []
for tty in map(lambda t: os.path.basename(t), glob.glob('/dev/ttyS*')):
Expand Down Expand Up @@ -225,3 +229,24 @@ def get_storage_devices_topology(self):
}
}
return topology

async def get_gpus(self):
if self.GPU:
return self.GPU

not_available = {'available': False, 'vendor': None}
cp = await run(['lshw', '-numeric', '-C', 'display'], check=False)
if cp.returncode:
self.logger.error('Unable to retrieve GPU details: %s', cp.stderr.decode())
return not_available

vendor = RE_GPU_VENDOR.findall(cp.stdout.decode())
if not vendor:
self.GPU = not_available
else:
# We only support nvidia based GPU's right now based on equipment available
if 'nvidia' in vendor[0].lower():
self.GPU = {'available': True, 'vendor': 'NVIDIA'}
else:
self.GPU = not_available
return self.GPU
Empty file.
36 changes: 0 additions & 36 deletions src/middlewared/middlewared/plugins/hardware_linux/gpu.py

This file was deleted.

Expand Up @@ -49,7 +49,7 @@ async def setup(self):
self.logger.error('Unable to configure GPU for node: %s', e)

async def setup_internal(self):
gpu = await self.middleware.call('hardware.available_gpu')
gpu = await self.middleware.call('device.get_gpus')
to_remove = list(GPU_CONFIG.keys())
daemonsets = {
f'{d["metadata"]["namespace"]}_{d["metadata"]["name"]}': d
Expand Down

0 comments on commit b12446e

Please sign in to comment.