Skip to content

Commit

Permalink
Handle [Not Supported] returned by nvidia-smi (#6)
Browse files Browse the repository at this point in the history
For some old GPU cards, nvidia-smi cannot retrieve usage information
such as GPU utilization or running processes' pid. We print the
'??' or '--' marks for those unavailable values.

A simple unit test case (with a mock) for this issue is also added.
  • Loading branch information
wookayin committed Nov 7, 2016
1 parent 8e75c11 commit 902f9cd
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 11 deletions.
55 changes: 44 additions & 11 deletions gpustat.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ def __init__(self, entry):
self.entry = entry
self.processes = []

# Handle '[Not Supported] for old GPU cards (#6)
for k in self.entry.keys():
if 'Not Supported' in self.entry[k]:
self.entry[k] = None

if self.entry['utilization.gpu'] is None:
self.entry['utilization.gpu'] = '??'


def __repr__(self):
return self.print_to(StringIO()).getvalue()

Expand All @@ -61,19 +70,25 @@ def print_to(self, fp,
):
# color settings
colors = {}
def _conditional(cond_fn, true_value, false_value,
error_value=ANSIColors.GRAY):
try:
if cond_fn(): return true_value
else: return false_value
except:
return error_value

colors['C0'] = ANSIColors.RESET
colors['C1'] = ANSIColors.CYAN
colors['CName'] = ANSIColors.BLUE
colors['CTemp'] = ANSIColors.RED \
if int(self.entry['temperature.gpu']) < 50 \
else ANSIColors.BOLD_RED
colors['CTemp'] = _conditional(lambda: int(self.entry['temperature.gpu']) < 50,
ANSIColors.RED, ANSIColors.BOLD_RED)
colors['CMemU'] = ANSIColors.BOLD_YELLOW
colors['CMemT'] = ANSIColors.YELLOW
colors['CMemP'] = ANSIColors.YELLOW
colors['CUser'] = ANSIColors.GRAY
colors['CUtil'] = ANSIColors.GREEN \
if int(self.entry['utilization.gpu']) < 30 \
else ANSIColors.BOLD_GREEN
colors['CUtil'] = _conditional(lambda: int(self.entry['utilization.gpu']) < 30,
ANSIColors.GREEN, ANSIColors.BOLD_GREEN)

if not with_colors:
for k in list(colors.keys()):
Expand All @@ -88,16 +103,21 @@ def print_to(self, fp,
gpuname_width=gpuname_width)
reps += " |"

def _repr(v, none_value='???'):
if v is None: return none_value
else: return str(v)

def process_repr(p):
r = ''
if not show_cmd or show_user:
r += "{CUser}{}{C0}".format(p['user'], **colors)
r += "{CUser}{}{C0}".format(_repr(p['user'], '--'), **colors)
if show_cmd:
if r: r += ':'
r += "{C1}{}{C0}".format(p.get('comm', p['pid']), **colors)
r += "{C1}{}{C0}".format(_repr(p.get('comm', p['pid']), '--'), **colors)

if show_pid: r += ("/%s" % p['pid'])
r += '({CMemP}{}M{C0})'.format(p['used_memory'], **colors)
if show_pid:
r += ("/%s" % _repr(p['pid'], '--'))
r += '({CMemP}{}M{C0})'.format(_repr(p['used_memory'], '?'), **colors)
return r

for p in self.processes:
Expand Down Expand Up @@ -170,7 +190,8 @@ def running_processes():
})
process_entries.append(process_entry)

pid_map = {int(e['pid']) : None for e in process_entries}
pid_map = {int(e['pid']) : None for e in process_entries
if not 'Not Supported' in e['pid']}

# 2. map pid to username, etc.
if pid_map:
Expand All @@ -188,7 +209,19 @@ def running_processes():

# 3. add some process information to each process_entry
for process_entry in process_entries[:]:

if 'Not Supported' in process_entry['pid']:
# TODO move this stuff into somewhere appropriate
# such as running_processes(): process_entry = ...
# or introduce Process class to elegantly handle it
process_entry['user'] = None
process_entry['comm'] = None
process_entry['pid'] = None
process_entry['used_memory'] = None
continue

pid = int(process_entry['pid'])

if pid_map[pid] is None:
# !?!? this pid is listed up in nvidia-smi's query result,
# but actually seems not to be a valid running process. ignore!
Expand Down
50 changes: 50 additions & 0 deletions test_gpustat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Unit or integration tests for gpustat
"""

import unittest
import gpustat

# mock output for test
def _mock_check_output(cmd, shell=True):
if cmd.startswith('nvidia-smi --query-compute-apps'):
return '''\
GPU-10fb0fbd-2696-43f3-467f-d280d906a107, 48448, 4000
GPU-10fb0fbd-2696-43f3-467f-d280d906a107, 153223, 4000
GPU-d1df4664-bb44-189c-7ad0-ab86c8cb30e2, 192453, 3000
GPU-d1df4664-bb44-189c-7ad0-ab86c8cb30e2, 194826, 6000
GPU-50205d95-57b6-f541-2bcb-86c09afed564, 38310, 4245
GPU-50205d95-57b6-f541-2bcb-86c09afed564, [Not Supported], [Not Supported]
'''
elif cmd.startswith('nvidia-smi --query-gpu'):
return '''\
0, GPU-10fb0fbd-2696-43f3-467f-d280d906a107, GeForce GTX TITAN X, 80, 76, 8000, 12287
1, GPU-d1df4664-bb44-189c-7ad0-ab86c8cb30e2, GeForce GTX TITAN X, 36, 0, 9000, 12287
2, GPU-50205d95-57b6-f541-2bcb-86c09afed564, GeForce GTX TITAN X, 71, [Not Supported], 8520, 12287
'''
elif cmd.startswith('ps -o pid,user,comm -p'):
return '''\
PID USER COMMAND
48448 user1 python
154213 user1 caffe
38310 user3 python
153223 user2 python
194826 user3 caffe
192453 user1 torch
'''
else:
raise ValueError(cmd)

# mocking (override subprocess.check_output)
gpustat.check_output = _mock_check_output


class TestGPUStat(unittest.TestCase):

def test_new_query_mocked(self):
gpustats = gpustat.new_query()
gpustats.print_formatted(no_color=False, show_user=True, show_cmd=True, show_pid=True)


if __name__ == '__main__':
unittest.main()

0 comments on commit 902f9cd

Please sign in to comment.