Skip to content

Commit

Permalink
Merge 09054c3 into b3c1ab1
Browse files Browse the repository at this point in the history
  • Loading branch information
undertherain committed Sep 28, 2020
2 parents b3c1ab1 + 09054c3 commit 808b4d9
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ sudo: false
dist: bionic

python:
- '3.6'
- '3.7'

os:
- linux
Expand Down
44 changes: 19 additions & 25 deletions benchmarker/modules/do_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, params, extra_args=None):
parser.set_defaults(cbm=True)
args, remaining_args = parser.parse_known_args(extra_args)
super().__init__(params, remaining_args)
self.params["channels_first"] = True
params["problem"]["precision"] = args.precision
self.params["backend"] = args.backend
self.params["cudnn_benchmark"] = args.cbm
Expand All @@ -35,17 +36,28 @@ def __init__(self, params, extra_args=None):
assert self.params["problem"]["precision"] in {"FP32", "FP16", "mixed"}
else:
assert self.params["problem"]["precision"] == "FP32"
self.params["channels_first"] = True
torch.backends.cudnn.benchmark = self.params["cudnn_benchmark"]
if self.params["backend"] == "DNNL":
torch.backends.mkldnn.enabled = True
else:
if self.params["backend"] == "native":
torch.backends.mkldnn.enabled = False
else:
raise RuntimeError("Unknown backend")
x_train, y_train = self.load_data()
self.device = torch.device("cuda" if self.params["gpus"] else "cpu")
self.x_train = torch.from_numpy(x_train).to(self.device)
self.y_train = torch.from_numpy(y_train).to(self.device)

def train(self, model, device, optimizer, epoch):
def train(self, model, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(zip(self.x_train, self.y_train)):
optimizer.zero_grad()
loss = model(data, target)
loss.mean().backward()
optimizer.step()
progress(epoch, batch_idx, len(self.x_train), loss.mean().item())
if device.type == "cuda":
if self.device.type == "cuda":
torch.cuda.synchronize()

def set_random_seed(self, seed):
Expand Down Expand Up @@ -75,30 +87,12 @@ def inference(self, model, device):
# 100. * correct / len(test_loader.dataset)))

def run_internal(self):
global torch # THIS IS BEYOUND RIDICULOUS THAT IT FAILS W/O THIS LINE!
torch.backends.cudnn.benchmark = self.params["cudnn_benchmark"]
if self.params["backend"] == "DNNL":
import torch.backends.mkldnn
torch.backends.mkldnn.enabled = True
else:
if self.params["backend"] == "native":
torch.backends.mkldnn.enabled = False
else:
raise RuntimeError("Unknown backend")
device = torch.device("cuda" if self.params["gpus"] else "cpu")

x_train, y_train = self.load_data()

# train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.params["batch_size"], shuffle=False)

model = self.net
if len(self.params["gpus"]) > 1:
model = nn.DataParallel(model)
# TODO: make of/on-core optional
self.x_train = torch.from_numpy(x_train).to(device)
self.y_train = torch.from_numpy(y_train).to(device)
model.to(device)

model.to(self.device)
# TODO: args for training hyperparameters
start = timer()
if self.params["problem"]["precision"] == "FP16":
Expand All @@ -118,7 +112,7 @@ def run_internal(self):
# TODO: make opt level a parameter
# TODO: convert inputs to FP16 for more agressive opt levels
for epoch in range(1, self.params["nb_epoch"] + 1):
self.train(model, device, optimizer, epoch)
self.train(model, optimizer, epoch)
# test(args, model, device, test_loader)
else:
assert self.params["problem"]["precision"] in ["FP32", "FP16"]
Expand All @@ -127,7 +121,7 @@ def run_internal(self):
if self.params["backend"] == "DNNL":
model = mkldnn_utils.to_mkldnn(model)
for epoch in range(1, self.params["nb_epoch"] + 1):
self.inference(model, device)
self.inference(model, self.device)
end = timer()
self.params["time_total"] = end - start
self.params["time_epoch"] = self.params["time_total"] / self.params["nb_epoch"]
Expand Down
36 changes: 11 additions & 25 deletions benchmarker/modules/do_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ def __init__(self, params, extra_args=None):
super().__init__(params, remaining_args)
self.params["channels_first"] = False
os.environ["KERAS_BACKEND"] = "tensorflow"
x_train, y_train = self.load_data()
# Reshape from (nbatch, bs, ...) to (nbatch * bs, ...)
self.x_train = x_train.reshape((-1,) + x_train.shape[2:])
self.y_train = y_train.reshape((-1,) + y_train.shape[2:])
# preheat
self.net.predict(self.x_train, self.params["batch_size"])

def get_strategy(self):
gpu_count_same = self.params["nb_gpus"] == len(
Expand Down Expand Up @@ -71,38 +77,18 @@ def set_random_seed(self, seed):
tf.random.set_seed(seed)

def run_internal(self):

# if params["channels_first"]:
# keras.backend.set_image_data_format("channels_first")
# else:
# keras.backend.set_image_data_format("channels_last")

# todo set image format
x_train, y_train = self.load_data()
# Reshape from (nbatch, bs, ...) to (nbatch * bs, ...)
x_train = x_train.reshape((-1,) + x_train.shape[2:])
y_train = y_train.reshape((-1,) + y_train.shape[2:])

if len(y_train.shape) > 1:
cnt_classes = y_train.shape[1]
else:
cnt_classes = 1
self.params["cnt_classes"] = cnt_classes
model = self.net
nb_epoch = self.params["nb_epoch"]
bs = self.params["batch_size"]
if self.params["mode"] == "training":
print("preheat")
model.fit(x_train, y_train, batch_size=bs, epochs=1)
print("train")
start = timer()
model.fit(x_train, y_train, batch_size=bs, epochs=nb_epoch, verbose=1)
model.fit(self.x_train,
self.y_train,
batch_size=self.params["batch_size"],
epochs=nb_epoch, verbose=1)
else:
# preheat
model.predict(x_train, bs)
start = timer()
for i in range(nb_epoch):
model.predict(x_train, bs, verbose=1)
model.predict(self.x_train, self.params["batch_size"], verbose=1)
end = timer()
self.params["time_total"] = (end - start)
self.params["time_epoch"] = self.params["time_total"] / self.params["nb_epoch"]
Expand Down
56 changes: 52 additions & 4 deletions benchmarker/modules/i_neural_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,28 @@
import random

import numpy
import threading
from time import sleep
import numpy as np
import pyRAPL


class INeuralNet:
"""Interface for all deep learning modules"""

def __init__(self, params, extra_args=None):
self.params = params

parser = argparse.ArgumentParser(description="Benchmark deep learning models")
parser.add_argument("--mode", default="training")
parser.add_argument("--nb_epoch", type=int, default=10)

#
parser.add_argument("--power_sampling_ms", type=int, default=100)
parser.add_argument("--random_seed", default=None)

parsed_args, remaining_args = parser.parse_known_args(extra_args)

params["mode"] = parsed_args.mode
params["power"] = {}
params["power"]["sampling_ms"] = parsed_args.power_sampling_ms
params["nb_epoch"] = parsed_args.nb_epoch
assert params["mode"] in ["training", "inference"]
params["path_out"] = os.path.join(params["path_out"], params["mode"])
Expand All @@ -38,10 +42,16 @@ def __init__(self, params, extra_args=None):
self.params["batch_size"] = (
self.params["batch_size_per_device"] * self.params["nb_gpus"]
)
self.params["channels_first"] = True
# self.params["channels_first"] = True
if parsed_args.random_seed is not None:
self.set_random_seed(int(parsed_args.random_seed))
self.get_kernel(params, remaining_args)
self.keep_monitor = True
try:
pyRAPL.setup()
self.rapl_enabled = True
except:
self.rapl_enabled = False

def get_kernel(self, params, remaining_args):
"""Default function to set `self.net`. The derived do_* classes can
Expand Down Expand Up @@ -85,15 +95,53 @@ def set_random_seed(self, seed):
random.seed(seed)

def run(self):
self.params["power"]["joules_total"] = 0
thread_monitor = threading.Thread(target=self.monitor, args=())
thread_monitor.start() # S
if self.rapl_enabled:
meter_rapl = pyRAPL.Measurement('bar')
meter_rapl.begin()
results = self.run_internal()
self.keep_monitor = False
if self.rapl_enabled:
meter_rapl.end()
self.params["power"]["joules_CPU"] = sum(meter_rapl.result.pkg) / 1000000.0
self.params["power"]["joules_RAM"] = sum(meter_rapl.result.dram) / 1000000.0
thread_monitor.join()
if self.rapl_enabled:
self.params["power"]["joules_total"] += self.params["power"]["joules_CPU"]
self.params["power"]["joules_total"] += self.params["power"]["joules_RAM"]
results["time_batch"] = (
results["time_epoch"] / results["problem"]["cnt_batches_per_epoch"]
)
results["time_sample"] = results["time_batch"] / results["batch_size"]
results["samples_per_second"] = (
results["problem"]["cnt_samples"] / results["time_epoch"]
)
if "joules_GPU" in results["power"]:
results["samples_per_joule_GPU"] = results["problem"]["cnt_samples"] * results["nb_epoch"] / self.params["power"]["joules_GPU"]
if results["power"]["joules_total"] > 0:
results["samples_per_joule"] = results["problem"]["cnt_samples"] * results["nb_epoch"] / self.params["power"]["joules_total"]
if "flop_estimated" in results["problem"]:
results["flop_per_second_estimated"] = results["problem"]['flop_estimated'] / results["time_total"]
results["gflop_per_second_estimated"] = results["flop_per_second_estimated"] / (1000 * 1000 * 1000)
return results

def monitor(self):
lst_power_gpu = []
# TODO: move this to init
# TODO: query multiple GPUs
# TODO: don't do this if GPU is not used
from py3nvml.py3nvml import nvmlInit, nvmlShutdown # nvmlDeviceGetCount
from py3nvml.py3nvml import nvmlDeviceGetHandleByIndex, nvmlDeviceGetPowerUsage
nvmlInit()
# cnt_gpu = nvmlDeviceGetCount()
handles = [nvmlDeviceGetHandleByIndex(i) for i in self.params["gpus"]]
while self.keep_monitor:
power_gpu = [nvmlDeviceGetPowerUsage(handle) / 1000.0 for handle in handles]
lst_power_gpu.append(sum(power_gpu))
sleep(self.params["power"]["sampling_ms"] / 1000.0)
nvmlShutdown()
self.params["power"]["avg_watt_GPU"] = np.mean(lst_power_gpu)
self.params["power"]["joules_GPU"] = self.params["power"]["avg_watt_GPU"] * self.params["time_total"]
self.params["power"]["joules_total"] += self.params["power"]["joules_GPU"]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ py-cpuinfo==5.0.0
system_query[cpu, hdd, ram, swap]
# python_papi
# install system_query[all] for GPU systems
pyRAPL

0 comments on commit 808b4d9

Please sign in to comment.