Merge pull request #80 from ChaoticMind/enc_dec_util

Add support for enc/dec gpu utilization (#79)
wookayin · Mar 24, 2020 · b80d8e8 · b80d8e8
2 parents 2468d5b + 59c979a
commit b80d8e8
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -30,6 +30,7 @@ Options:
 * `-f`, `--show-full-cmd`   : Display full command and cpu stats of running process
 * `-p`, `--show-pid`   : Display PID of the process
 * `-F`, `--show-fan`   : Display GPU fan speed
+* `-e`, `--show-codec` : Display encoder and/or decoder utilization
 * `-P`, `--show-power` : Display GPU power usage and/or limit (`draw` or `draw,limit`)
 * `-a`, `--show-all`   : Display all gpu properties above
 * `--watch`, `-i`, `--interval`   : Run in watch mode (equivalent to `watch gpustat`) if given. Denotes interval between updates. ([#41][gh-issue-41])

diff --git a/gpustat/cli.py b/gpustat/cli.py
@@ -93,15 +93,18 @@ def main(*argv):
                         help='Display PID of running process')
     parser.add_argument('-F', '--show-fan-speed', '--show-fan',
                         action='store_true', help='Display GPU fan speed')
-    parser.add_argument('--json', action='store_true', default=False,
-                        help='Print all the information in JSON format')
-    parser.add_argument('-v', '--version', action='version',
-                        version=('gpustat %s' % __version__))
+    parser.add_argument(
+        '-e', '--show-codec', nargs='?', const='enc,dec', default='',
+        choices=['enc', 'dec', 'enc,dec'],
+        help='Show encoder/decoder utilization'
+    )
     parser.add_argument(
         '-P', '--show-power', nargs='?', const='draw,limit',
         choices=['', 'draw', 'limit', 'draw,limit', 'limit,draw'],
         help='Show GPU power usage or draw (and/or limit)'
     )
+    parser.add_argument('--json', action='store_true', default=False,
+                        help='Print all the information in JSON format')
     parser.add_argument(
         '-i', '--interval', '--watch', nargs='?', type=float, default=0,
         help='Use watch mode if given; seconds to wait between updates'
@@ -118,12 +121,15 @@ def main(*argv):
         '--debug', action='store_true', default=False,
         help='Allow to print additional informations for debugging.'
     )
+    parser.add_argument('-v', '--version', action='version',
+                        version=('gpustat %s' % __version__))
     args = parser.parse_args(argv[1:])
     if args.show_all:
         args.show_cmd = True
         args.show_user = True
         args.show_pid = True
         args.show_fan_speed = True
+        args.show_codec = 'enc,dec'
         args.show_power = 'draw,limit'
     del args.show_all
 

diff --git a/gpustat/core.py b/gpustat/core.py
@@ -132,6 +132,24 @@ def utilization(self):
         v = self.entry['utilization.gpu']
         return int(v) if v is not None else None
 
+    @property
+    def utilization_enc(self):
+        """
+        Returns the GPU encoder utilization (in percentile),
+        or None if the information is not available.
+        """
+        v = self.entry['utilization.enc']
+        return int(v) if v is not None else None
+
+    @property
+    def utilization_dec(self):
+        """
+        Returns the GPU decoder utilization (in percentile),
+        or None if the information is not available.
+        """
+        v = self.entry['utilization.dec']
+        return int(v) if v is not None else None
+
     @property
     def power_draw(self):
         """
@@ -163,8 +181,9 @@ def print_to(self, fp,
                  show_full_cmd=False,
                  show_user=False,
                  show_pid=False,
-                 show_power=None,
                  show_fan_speed=None,
+                 show_codec="",
+                 show_power=None,
                  gpuname_width=16,
                  term=None,
                  ):
@@ -181,8 +200,11 @@ def _conditional(cond_fn, true_value, false_value,
             except Exception:
                 return error_value
 
+        _ENC_THRESHOLD = 50
+
         colors['C0'] = term.normal
         colors['C1'] = term.cyan
+        colors['CBold'] = term.bold
         colors['CName'] = term.blue
         colors['CTemp'] = _conditional(lambda: self.temperature < 50,
                                        term.red, term.bold_red)
@@ -195,6 +217,12 @@ def _conditional(cond_fn, true_value, false_value,
         colors['CUser'] = term.bold_black   # gray
         colors['CUtil'] = _conditional(lambda: self.utilization < 30,
                                        term.green, term.bold_green)
+        colors['CUtilEnc'] = _conditional(
+            lambda: self.utilization_enc < _ENC_THRESHOLD,
+            term.green, term.bold_green)
+        colors['CUtilDec'] = _conditional(
+            lambda: self.utilization_dec < _ENC_THRESHOLD,
+            term.green, term.bold_green)
         colors['CCPUUtil'] = term.green
         colors['CPowU'] = _conditional(
             lambda: float(self.power_draw) / self.power_limit < 0.4,
@@ -221,6 +249,17 @@ def _repr(v, none_value='??'):
             reps += "%(FSpeed)s{entry[fan.speed]:>3} %%%(C0)s, "
 
         reps += "%(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s"
+        if show_codec:
+            codec_info = []
+            if "enc" in show_codec:
+                codec_info.append(
+                    "%(CBold)sE: %(C0)s"
+                    "%(CUtilEnc)s{entry[utilization.enc]:>3} %%%(C0)s")
+            if "dec" in show_codec:
+                codec_info.append(
+                    "%(CBold)sD: %(C0)s"
+                    "%(CUtilDec)s{entry[utilization.dec]:>3} %%%(C0)s")
+            reps += " ({})".format("  ".join(codec_info))
 
         if show_power:
             reps += ",  %(CPowU)s{entry[power.draw]:>3}%(C0)s "
@@ -386,6 +425,16 @@ def get_process_info(nv_process):
             except N.NVMLError:
                 utilization = None  # Not supported
 
+            try:
+                utilization_enc = N.nvmlDeviceGetEncoderUtilization(handle)
+            except N.NVMLError:
+                utilization_enc = None  # Not supported
+
+            try:
+                utilization_dec = N.nvmlDeviceGetDecoderUtilization(handle)
+            except N.NVMLError:
+                utilization_dec = None  # Not supported
+
             try:
                 power = N.nvmlDeviceGetPowerUsage(handle)
             except N.NVMLError:
@@ -437,6 +486,10 @@ def get_process_info(nv_process):
                 'temperature.gpu': temperature,
                 'fan.speed': fan_speed,
                 'utilization.gpu': utilization.gpu if utilization else None,
+                'utilization.enc':
+                    utilization_enc[0] if utilization_enc else None,
+                'utilization.dec':
+                    utilization_dec[0] if utilization_dec else None,
                 'power.draw': power // 1000 if power is not None else None,
                 'enforced.power.limit': power_limit // 1000
                 if power_limit is not None else None,
@@ -486,7 +539,8 @@ def __repr__(self):
 
     def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
                         show_cmd=False, show_full_cmd=False, show_user=False,
-                        show_pid=False, show_power=None, show_fan_speed=None,
+                        show_pid=False, show_fan_speed=None,
+                        show_codec="", show_power=None,
                         gpuname_width=16, show_header=True,
                         eol_char=os.linesep,
                         ):
@@ -540,8 +594,9 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
                        show_full_cmd=show_full_cmd,
                        show_user=show_user,
                        show_pid=show_pid,
-                       show_power=show_power,
                        show_fan_speed=show_fan_speed,
+                       show_codec=show_codec,
+                       show_power=show_power,
                        gpuname_width=gpuname_width,
                        term=t_color)
             fp.write(eol_char)

diff --git a/gpustat/test_gpustat.py b/gpustat/test_gpustat.py
@@ -114,6 +114,18 @@ def _decorated(*args, **kwargs):
         mock_handles[2]: N.NVMLError_NotSupported(),  # Not Supported
     }.get(handle, RuntimeError))
 
+    N.nvmlDeviceGetEncoderUtilization.side_effect = _raise_ex(lambda handle: {
+        mock_handles[0]: [88, 167000],  # [value, sample_rate]
+        mock_handles[1]: [0, 167000],  # [value, sample_rate]
+        mock_handles[2]: N.NVMLError_NotSupported(),  # Not Supported
+    }.get(handle, RuntimeError))
+
+    N.nvmlDeviceGetDecoderUtilization.side_effect = _raise_ex(lambda handle: {
+        mock_handles[0]: [67, 167000],  # [value, sample_rate]
+        mock_handles[1]: [0, 167000],  # [value, sample_rate]
+        mock_handles[2]: N.NVMLError_NotSupported(),  # Not Supported
+    }.get(handle, RuntimeError))
+
     # running process information: a bit annoying...
     mock_process_t = namedtuple("Process_t", ['pid', 'usedGpuMemory'])
 
@@ -167,19 +179,19 @@ def _MockedMem():
 """.splitlines())  # noqa: E501
 
 MOCK_EXPECTED_OUTPUT_FULL = os.linesep.join("""\
-[0] GeForce GTX TITAN 0 | 80°C,  16 %,  76 %,  125 / 250 W |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
-[1] GeForce GTX TITAN 1 | 36°C,  53 %,   0 %,   ?? / 250 W |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
-[2] GeForce GTX TITAN 2 | 71°C, 100 %,  ?? %,  250 /  ?? W |     0 / 12189 MB | (Not Supported)
+[0] GeForce GTX TITAN 0 | 80°C,  16 %,  76 % (E:  88 %  D:  67 %),  125 / 250 W |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
+[1] GeForce GTX TITAN 1 | 36°C,  53 %,   0 % (E:   0 %  D:   0 %),   ?? / 250 W |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
+[2] GeForce GTX TITAN 2 | 71°C, 100 %,  ?? % (E:  ?? %  D:  ?? %),  250 /  ?? W |     0 / 12189 MB | (Not Supported)
 """.splitlines())  # noqa: E501
 
 MOCK_EXPECTED_OUTPUT_FULL_PROCESS = os.linesep.join("""\
-[0] GeForce GTX TITAN 0 | 80°C,  16 %,  76 %,  125 / 250 W |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
+[0] GeForce GTX TITAN 0 | 80°C,  16 %,  76 % (E:  88 %  D:  67 %),  125 / 250 W |  8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
  ├─  48448 (  85%,  257MB): python
  └─ 153223 (  15%,     0B): python
-[1] GeForce GTX TITAN 1 | 36°C,  53 %,   0 %,   ?? / 250 W |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
+[1] GeForce GTX TITAN 1 | 36°C,  53 %,   0 % (E:   0 %  D:   0 %),   ?? / 250 W |  9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
  ├─ 192453 ( 123%,   59MB): torch
  └─ 194826 (   0%, 1025MB): caffe
-[2] GeForce GTX TITAN 2 | 71°C, 100 %,  ?? %,  250 /  ?? W |     0 / 12189 MB | (Not Supported)
+[2] GeForce GTX TITAN 2 | 71°C, 100 %,  ?? % (E:  ?? %  D:  ?? %),  250 /  ?? W |     0 / 12189 MB | (Not Supported)
 """.splitlines())  # noqa: E501
 
 
@@ -220,8 +232,8 @@ def test_new_query_mocked(self, N, Process, virtual_memory):
         fp = StringIO()
         gpustats.print_formatted(
             fp=fp, no_color=False, show_user=True,
-            show_cmd=True, show_pid=True, show_power=True, show_fan_speed=True,
-            show_full_cmd=True
+            show_cmd=True, show_full_cmd=True, show_pid=True,
+            show_fan_speed=True, show_codec="enc,dec", show_power=True,
         )
 
         result = fp.getvalue()
@@ -273,6 +285,8 @@ def test_attributes_and_items(self, N, Process, virtual_memory):
             g.memory_used, g.memory_total, g.memory_available))
         print("temperature : %d" % (g.temperature))
         print("utilization : %s" % (g.utilization))
+        print("utilization_enc : %s" % (g.utilization_enc))
+        print("utilization_dec : %s" % (g.utilization_dec))
 
     @unittest.skipIf(sys.version_info < (3, 4), "Only in Python 3.4+")
     @mock.patch('psutil.virtual_memory')