Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for enc/dec gpu utilization (#79) #80

Merged
merged 2 commits into from
Mar 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Options:
* `-f`, `--show-full-cmd` : Display full command and cpu stats of running process
* `-p`, `--show-pid` : Display PID of the process
* `-F`, `--show-fan` : Display GPU fan speed
* `-e`, `--show-codec` : Display encoder and/or decoder utilization
* `-P`, `--show-power` : Display GPU power usage and/or limit (`draw` or `draw,limit`)
* `-a`, `--show-all` : Display all gpu properties above
* `--watch`, `-i`, `--interval` : Run in watch mode (equivalent to `watch gpustat`) if given. Denotes interval between updates. ([#41][gh-issue-41])
Expand Down
14 changes: 10 additions & 4 deletions gpustat/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,18 @@ def main(*argv):
help='Display PID of running process')
parser.add_argument('-F', '--show-fan-speed', '--show-fan',
action='store_true', help='Display GPU fan speed')
parser.add_argument('--json', action='store_true', default=False,
help='Print all the information in JSON format')
parser.add_argument('-v', '--version', action='version',
version=('gpustat %s' % __version__))
parser.add_argument(
ChaoticMind marked this conversation as resolved.
Show resolved Hide resolved
'-e', '--show-codec', nargs='?', const='enc,dec', default='',
choices=['enc', 'dec', 'enc,dec'],
help='Show encoder/decoder utilization'
)
parser.add_argument(
'-P', '--show-power', nargs='?', const='draw,limit',
choices=['', 'draw', 'limit', 'draw,limit', 'limit,draw'],
help='Show GPU power usage or draw (and/or limit)'
)
parser.add_argument('--json', action='store_true', default=False,
help='Print all the information in JSON format')
parser.add_argument(
'-i', '--interval', '--watch', nargs='?', type=float, default=0,
help='Use watch mode if given; seconds to wait between updates'
Expand All @@ -118,12 +121,15 @@ def main(*argv):
'--debug', action='store_true', default=False,
help='Allow to print additional informations for debugging.'
)
parser.add_argument('-v', '--version', action='version',
version=('gpustat %s' % __version__))
args = parser.parse_args(argv[1:])
if args.show_all:
args.show_cmd = True
args.show_user = True
args.show_pid = True
args.show_fan_speed = True
args.show_codec = 'enc,dec'
args.show_power = 'draw,limit'
del args.show_all

Expand Down
61 changes: 58 additions & 3 deletions gpustat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,24 @@ def utilization(self):
v = self.entry['utilization.gpu']
return int(v) if v is not None else None

@property
def utilization_enc(self):
"""
Returns the GPU encoder utilization (in percentile),
or None if the information is not available.
"""
v = self.entry['utilization.enc']
return int(v) if v is not None else None

@property
def utilization_dec(self):
"""
Returns the GPU decoder utilization (in percentile),
or None if the information is not available.
"""
v = self.entry['utilization.dec']
return int(v) if v is not None else None

@property
def power_draw(self):
"""
Expand Down Expand Up @@ -163,8 +181,9 @@ def print_to(self, fp,
show_full_cmd=False,
show_user=False,
show_pid=False,
show_power=None,
show_fan_speed=None,
show_codec="",
show_power=None,
gpuname_width=16,
term=None,
):
Expand All @@ -181,8 +200,11 @@ def _conditional(cond_fn, true_value, false_value,
except Exception:
return error_value

_ENC_THRESHOLD = 50

colors['C0'] = term.normal
colors['C1'] = term.cyan
colors['CBold'] = term.bold
colors['CName'] = term.blue
colors['CTemp'] = _conditional(lambda: self.temperature < 50,
term.red, term.bold_red)
Expand All @@ -195,6 +217,12 @@ def _conditional(cond_fn, true_value, false_value,
colors['CUser'] = term.bold_black # gray
colors['CUtil'] = _conditional(lambda: self.utilization < 30,
term.green, term.bold_green)
colors['CUtilEnc'] = _conditional(
lambda: self.utilization_enc < _ENC_THRESHOLD,
term.green, term.bold_green)
colors['CUtilDec'] = _conditional(
lambda: self.utilization_dec < _ENC_THRESHOLD,
term.green, term.bold_green)
colors['CCPUUtil'] = term.green
colors['CPowU'] = _conditional(
lambda: float(self.power_draw) / self.power_limit < 0.4,
Expand All @@ -221,6 +249,17 @@ def _repr(v, none_value='??'):
reps += "%(FSpeed)s{entry[fan.speed]:>3} %%%(C0)s, "

reps += "%(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s"
if show_codec:
codec_info = []
if "enc" in show_codec:
codec_info.append(
"%(CBold)sE: %(C0)s"
"%(CUtilEnc)s{entry[utilization.enc]:>3} %%%(C0)s")
if "dec" in show_codec:
codec_info.append(
"%(CBold)sD: %(C0)s"
"%(CUtilDec)s{entry[utilization.dec]:>3} %%%(C0)s")
reps += " ({})".format(" ".join(codec_info))

if show_power:
reps += ", %(CPowU)s{entry[power.draw]:>3}%(C0)s "
Expand Down Expand Up @@ -386,6 +425,16 @@ def get_process_info(nv_process):
except N.NVMLError:
utilization = None # Not supported

try:
utilization_enc = N.nvmlDeviceGetEncoderUtilization(handle)
except N.NVMLError:
utilization_enc = None # Not supported

try:
utilization_dec = N.nvmlDeviceGetDecoderUtilization(handle)
except N.NVMLError:
utilization_dec = None # Not supported

try:
power = N.nvmlDeviceGetPowerUsage(handle)
except N.NVMLError:
Expand Down Expand Up @@ -437,6 +486,10 @@ def get_process_info(nv_process):
'temperature.gpu': temperature,
'fan.speed': fan_speed,
'utilization.gpu': utilization.gpu if utilization else None,
'utilization.enc':
utilization_enc[0] if utilization_enc else None,
'utilization.dec':
utilization_dec[0] if utilization_dec else None,
'power.draw': power // 1000 if power is not None else None,
'enforced.power.limit': power_limit // 1000
if power_limit is not None else None,
Expand Down Expand Up @@ -486,7 +539,8 @@ def __repr__(self):

def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
show_cmd=False, show_full_cmd=False, show_user=False,
show_pid=False, show_power=None, show_fan_speed=None,
show_pid=False, show_fan_speed=None,
show_codec="", show_power=None,
gpuname_width=16, show_header=True,
eol_char=os.linesep,
):
Expand Down Expand Up @@ -540,8 +594,9 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
show_full_cmd=show_full_cmd,
show_user=show_user,
show_pid=show_pid,
show_power=show_power,
show_fan_speed=show_fan_speed,
show_codec=show_codec,
show_power=show_power,
gpuname_width=gpuname_width,
term=t_color)
fp.write(eol_char)
Expand Down
30 changes: 22 additions & 8 deletions gpustat/test_gpustat.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ def _decorated(*args, **kwargs):
mock_handles[2]: N.NVMLError_NotSupported(), # Not Supported
}.get(handle, RuntimeError))

N.nvmlDeviceGetEncoderUtilization.side_effect = _raise_ex(lambda handle: {
mock_handles[0]: [88, 167000], # [value, sample_rate]
mock_handles[1]: [0, 167000], # [value, sample_rate]
mock_handles[2]: N.NVMLError_NotSupported(), # Not Supported
}.get(handle, RuntimeError))

N.nvmlDeviceGetDecoderUtilization.side_effect = _raise_ex(lambda handle: {
mock_handles[0]: [67, 167000], # [value, sample_rate]
mock_handles[1]: [0, 167000], # [value, sample_rate]
mock_handles[2]: N.NVMLError_NotSupported(), # Not Supported
}.get(handle, RuntimeError))

# running process information: a bit annoying...
mock_process_t = namedtuple("Process_t", ['pid', 'usedGpuMemory'])

Expand Down Expand Up @@ -167,19 +179,19 @@ def _MockedMem():
""".splitlines()) # noqa: E501

MOCK_EXPECTED_OUTPUT_FULL = os.linesep.join("""\
[0] GeForce GTX TITAN 0 | 80°C, 16 %, 76 %, 125 / 250 W | 8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
[1] GeForce GTX TITAN 1 | 36°C, 53 %, 0 %, ?? / 250 W | 9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
[2] GeForce GTX TITAN 2 | 71°C, 100 %, ?? %, 250 / ?? W | 0 / 12189 MB | (Not Supported)
[0] GeForce GTX TITAN 0 | 80°C, 16 %, 76 % (E: 88 % D: 67 %), 125 / 250 W | 8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
[1] GeForce GTX TITAN 1 | 36°C, 53 %, 0 % (E: 0 % D: 0 %), ?? / 250 W | 9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
[2] GeForce GTX TITAN 2 | 71°C, 100 %, ?? % (E: ?? % D: ?? %), 250 / ?? W | 0 / 12189 MB | (Not Supported)
""".splitlines()) # noqa: E501

MOCK_EXPECTED_OUTPUT_FULL_PROCESS = os.linesep.join("""\
[0] GeForce GTX TITAN 0 | 80°C, 16 %, 76 %, 125 / 250 W | 8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
[0] GeForce GTX TITAN 0 | 80°C, 16 %, 76 % (E: 88 % D: 67 %), 125 / 250 W | 8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
├─ 48448 ( 85%, 257MB): python
└─ 153223 ( 15%, 0B): python
[1] GeForce GTX TITAN 1 | 36°C, 53 %, 0 %, ?? / 250 W | 9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
[1] GeForce GTX TITAN 1 | 36°C, 53 %, 0 % (E: 0 % D: 0 %), ?? / 250 W | 9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
├─ 192453 ( 123%, 59MB): torch
└─ 194826 ( 0%, 1025MB): caffe
[2] GeForce GTX TITAN 2 | 71°C, 100 %, ?? %, 250 / ?? W | 0 / 12189 MB | (Not Supported)
[2] GeForce GTX TITAN 2 | 71°C, 100 %, ?? % (E: ?? % D: ?? %), 250 / ?? W | 0 / 12189 MB | (Not Supported)
""".splitlines()) # noqa: E501


Expand Down Expand Up @@ -220,8 +232,8 @@ def test_new_query_mocked(self, N, Process, virtual_memory):
fp = StringIO()
gpustats.print_formatted(
fp=fp, no_color=False, show_user=True,
show_cmd=True, show_pid=True, show_power=True, show_fan_speed=True,
show_full_cmd=True
show_cmd=True, show_full_cmd=True, show_pid=True,
show_fan_speed=True, show_codec="enc,dec", show_power=True,
)

result = fp.getvalue()
Expand Down Expand Up @@ -273,6 +285,8 @@ def test_attributes_and_items(self, N, Process, virtual_memory):
g.memory_used, g.memory_total, g.memory_available))
print("temperature : %d" % (g.temperature))
print("utilization : %s" % (g.utilization))
print("utilization_enc : %s" % (g.utilization_enc))
print("utilization_dec : %s" % (g.utilization_dec))

@unittest.skipIf(sys.version_info < (3, 4), "Only in Python 3.4+")
@mock.patch('psutil.virtual_memory')
Expand Down