In [1]:
! nvidia-smi -L | cut -d '(' -f 1

GPU 0: GeForce RTX 2070 SUPER 


In [1]:
import torch
import time
import timeit
import itertools

nb = 1000

def main(s: str):
    def prof(file, seq_length, n, input_size, hidden_size, num_layers):
        x = torch.randn(seq_length, n, input_size, dtype=torch.float, device='cuda')
        
        net = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers).cuda()
        out = net(x)
        
        # warmup
        timeit.timeit(lambda: net(x), number=nb)
        torch.cuda.synchronize()
        
        start = time.time()
        for _ in range(nb):
            out = net(x)
        torch.cuda.synchronize()
        end = time.time()
        tc = (end - start) / nb
        
        print(f'{seq_length}, {n}'.ljust(25), f'{tc : .3e}')
        file.write(f'{seq_length}, {n} $ {tc}\n')


    print(torch.__version__)
    print('seq_length, batch_size')

    hidden_size = 1024
    input_size = 128
    num_layers = 1

    with open(s + '.txt', 'w') as file:
        for n, seq_length in itertools.product(
            [16, 32, 64, 128, 192, 256],
            [6, 12, 14, 16, 18, 20]
        ):
            prof(file, seq_length, n, input_size, hidden_size, num_layers)

In [2]:
main('before')

1.7.0a0+69839ea
seq_length, batch_size
6, 16                      3.255e-04
12, 16                     4.629e-04
14, 16                     5.024e-04
16, 16                     5.445e-04
18, 16                     5.963e-04
20, 16                     6.414e-04
6, 32                      2.986e-04
12, 32                     3.929e-04
14, 32                     5.999e-04
16, 32                     6.798e-04
18, 32                     7.842e-04
20, 32                     8.698e-04
6, 64                      3.462e-04
12, 64                     5.012e-04
14, 64                     5.550e-04
16, 64                     6.911e-04
18, 64                     7.815e-04
20, 64                     1.050e-03
6, 128                     3.877e-04
12, 128                    8.093e-04
14, 128                    9.302e-04
16, 128                    1.062e-03
18, 128                    1.200e-03
20, 128                    1.342e-03
6, 192                     5.491e-04
12, 192                    1.057e-03

In [5]:
main('after')

1.7.0a0+69839ea
seq_length, batch_size
6, 16                      1.001e-03
12, 16                     1.604e-03
14, 16                     1.789e-03
16, 16                     1.974e-03
18, 16                     2.219e-03
20, 16                     2.460e-03
6, 32                      1.205e-03
12, 32                     1.779e-03
14, 32                     1.960e-03
16, 32                     2.159e-03
18, 32                     2.335e-03
20, 32                     2.525e-03
6, 64                      1.205e-03
12, 64                     2.140e-03
14, 64                     2.449e-03
16, 64                     2.773e-03
18, 64                     3.074e-03
20, 64                     3.372e-03
6, 128                     2.134e-03
12, 128                    3.959e-03
14, 128                    4.779e-03
16, 128                    5.375e-03
18, 128                    5.793e-03
20, 128                    6.748e-03
6, 192                     2.912e-03
12, 192                    5.681e-03

In [8]:
import re

def readfile(fn):
    with open(fn, 'r') as f:
        fl = f.readlines()
        
    d = {}
    for _line in fl:
        key, t = re.split('\$', _line.rstrip())
        d[key] = float(t) * 1000
    
    return d

def compare(f, before: str, *afters):
    assert len(afters) >= 1, 'provide at least one after data'

    print('shape'.ljust(55), 'time_before (ms), time_after (ms)', end='')
    f.write('| shape | time_before (ms) | time_after (ms) |')
    for after in afters[1:]:
        print(', gpu_time_' + after.rstrip('.txt'), end='')
        f.write(' gpu_time_' + after.rstrip('.txt') + ' (ms) |')
    print()
    print('sequence_length, batch_size')
    f.write('\n')
    f.write('| --- ' * (len(afters) + 2) + '| \n')

    d_b = readfile(before)
    d_as = []
    for after in afters:
        d_a = readfile(after)
        d_as.append(d_a)
    
    for key in d_b:
        time_before = d_b[key]
        time_after = d_as[0][key]

        print(f'{key: <55} {time_before: .3f}, {time_after: .3f}, ' + ' '*5, end='')
        f.write(f'| {key} | {time_before: .3f} | {time_after: .3f} | ')
        for d_a in d_as[1:]:
            time_after = d_a[key]
            print(f'{time_after: .3f}, ', end='')
            f.write(f'{time_after: .3f} |')
        print()
        f.write('\n')

with open('table.md', 'w') as f:
    compare(f, 'before.txt', 'after.txt')

shape                                                   time_before (ms), time_after (ms)
sequence_length, batch_size
6, 16                                                    0.971,  1.001,      
12, 16                                                   1.576,  1.604,      
14, 16                                                   1.735,  1.789,      
16, 16                                                   1.906,  1.974,      
18, 16                                                   2.146,  2.219,      
20, 16                                                   2.371,  2.460,      
6, 32                                                    1.216,  1.205,      
12, 32                                                   1.783,  1.779,      
14, 32                                                   1.970,  1.960,      
16, 32                                                   2.159,  2.159,      
18, 32                                                   2.349,  2.335,      
20, 32                  