## 第7章 计算性能
### 7.1 命令式编程和符号式编程

In [1]:
def add(a, b):
    return a+b
def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g
fancy_func(1, 2, 3, 4)

10

In [2]:
def add_str():
    return '''
def add(a, b):
    return a+b
'''
def fancy_func_str():
    return '''
def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g
'''
def evoke_str():
    return add_str()+fancy_func_str()+'''
print(fancy_func(1, 2, 3, 4))
'''
prog = evoke_str()
print(prog)
y = compile(prog, '', 'exec')
exec(y)


def add(a, b):
    return a+b

def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g

print(fancy_func(1, 2, 3, 4))

10


### 7.2 自动并行计算

In [3]:
import time
import torch
assert torch.cuda.device_count() >= 2

In [4]:
class Benchmark():
    def __init__(self, prefix=None):
        # 打印的前置字符串
        self.prefix = prefix + ' ' if prefix else ''
    def __enter__(self):
        self.start = time.time()
    def __exit__(self, *args):
        print('%stime: %.4f sec' % (self.prefix, time.time()-self.start))

In [5]:
def run(x):
    for _ in range(20000):
        y = torch.mm(x, x)

In [6]:
x_gpu1 = torch.rand(size=(100, 100), device='cuda:0')
x_gpu2 = torch.rand(size=(100, 100), device='cuda:1')

In [7]:
with Benchmark('Run on GPU1.'):
    run(x_gpu1)
    torch.cuda.synchronize()
with Benchmark('Run on GPU2.'):
    run(x_gpu2)
    torch.cuda.synchronize()

Run on GPU1. time: 0.8804 sec
Run on GPU2. time: 0.9097 sec


In [8]:
with Benchmark('Run on both GPU1 and GPU2 in parallel.'):
    run(x_gpu1)
    run(x_gpu2)
    torch.cuda.synchronize()

Run on both GPU1 and GPU2 in parallel. time: 0.9803 sec


### 7.3 多GPU计算

In [9]:
!nvidia-smi

Thu Dec 24 15:33:15 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 450.57       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN V             Off  | 00000000:02:00.0 Off |                  N/A |
| 32%   47C    P8    28W / 250W |   3750MiB / 12065MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN V             Off  | 00000000:82:00.0 Off |                  N/A |
| 46%   63C    P2    44W / 250W |   4493MiB / 12066MiB |      0%      Default |
|       

In [10]:
net = torch.nn.Linear(10, 1).cuda()
net

Linear(in_features=10, out_features=1, bias=True)

In [11]:
net = torch.nn.DataParallel(net)
net

DataParallel(
  (module): Linear(in_features=10, out_features=1, bias=True)
)

In [12]:
torch.save(net.state_dict(), '7.3_model.pt')

In [13]:
new_net = torch.nn.Linear(10, 1)
new_net.load_state_dict(torch.load('7.3_model.pt'))

RuntimeError: Error(s) in loading state_dict for Linear:
	Missing key(s) in state_dict: "weight", "bias". 
	Unexpected key(s) in state_dict: "module.weight", "module.bias". 

In [14]:
torch.save(net.module.state_dict(), '7.3_model.pt')
new_net.load_state_dict(torch.load('7.3_model.pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])