In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
#####################################
# File name : main.py
# Create date : 2019-01-10 16:39
# Modified date : 2019-01-24 14:05
# Author : DARREN
# Describe : not set
# Email : lzygzh@126.com
#####################################
from __future__ import division
from __future__ import print_function

import os
import time

import numpy as np
import torch

import matplotlib.pyplot as plt

def create_path(path):
    if not os.path.isdir(path):
        os.makedirs(path)

def get_file_full_name(path, name):
    create_path(path)
    if path[-1] == "/":
        full_name = path +  name
    else:
        full_name = path + "/" +  name
    return full_name

def create_file(path, name, open_type='w'):
    file_name = get_file_full_name(path, name)
    return open(file_name, open_type)

def _plot_record(record, full_path):
    _plot_cpu_gpu_time(record, full_path)
    _plot_acceleration(record, full_path)

def _get_full_path(repeats, size_begin, size_end):
    if not os.path.exists("./output"):
        os.makedirs("./output")
    path_str = "./output/%s_%s_%s" % (repeats, size_begin, size_end)
    return path_str

def _plot_cpu_gpu_time(record, full_path):
    float32_numpy_lt = []
    float64_numpy_lt = []
    float32_cpu_lt = []
    float64_cpu_lt = []
    float32_gpu_lt = []
    float64_gpu_lt = []
    steps = []
    for key in record:
        steps.append([key])
    steps.sort()


    for i in range(len(steps)):
        step_dic = record[steps[i][0]]
        float32_numpy_value = step_dic["float32_numpy"]
        float32_numpy_lt.append(float32_numpy_value)
        float64_numpy_value = step_dic["float64_numpy"]
        float64_numpy_lt.append(float64_numpy_value)

        float32_cpu_value = step_dic["float32_torch_cpu"]
        float32_cpu_lt.append(float32_cpu_value)
        float64_cpu_value = step_dic["float64_torch_cpu"]
        float64_cpu_lt.append(float64_cpu_value)

        float32_gpu_value = step_dic["float32_torch_gpu"]
        float32_gpu_lt.append(float32_gpu_value)
        float64_gpu_value = step_dic["float64_torch_gpu"]
        float64_gpu_lt.append(float64_gpu_value)

    float32_numpy_lt = np.array(float32_numpy_lt)
    float64_numpy_lt = np.array(float64_numpy_lt)

    float32_cpu_lt = np.array(float32_cpu_lt)
    float64_cpu_lt = np.array(float64_cpu_lt)
    float32_gpu_lt = np.array(float32_gpu_lt)
    float64_gpu_lt = np.array(float64_gpu_lt)

    steps = np.array(steps)
    steps = steps*steps

    float32_gpu_line, = plt.plot(steps, float32_gpu_lt)
    float64_gpu_line, = plt.plot(steps, float64_gpu_lt)
    float32_cpu_line, = plt.plot(steps, float32_cpu_lt)
    float64_cpu_line, = plt.plot(steps, float64_cpu_lt)

    float32_numpy_line, = plt.plot(steps, float32_numpy_lt)
    float64_numpy_line, = plt.plot(steps, float64_numpy_lt)
    # pylint: disable=bad-continuation

    line_lt = [
    float32_gpu_line,
    float64_gpu_line,
    float32_cpu_line,
    float64_cpu_line,
    float32_numpy_line,
    float64_numpy_line,
    ]

    labels_lt = (
    "float32 torch gpu",
    "float64 torch gpu",
    "float32 torch cpu",
    "float64 torch cpu",
    "float32 numpy",
    "float64 numpy",
    )
    # pylint: enable=bad-continuation
    plt.legend(handles=line_lt, labels=labels_lt, loc='best')
    full_path_name = "%s/cpu_gpu.jpg" % (full_path)
#    plt.show()
    plt.savefig(full_path_name)
    plt.close()

def _plot_acceleration(record, full_path):
    float64_acceleration_lt = []
    float32_acceleration_lt = []
    float64_np_torch_cpu_acceleration_lt = []
    float32_np_torch_cpu_acceleration_lt = []
    float64_np_torch_gpu_acceleration_lt = []
    float32_np_torch_gpu_acceleration_lt = []

    steps = []
    for key in record:
        steps.append([key])
    steps.sort()

    for i in range(len(steps)):
        step_dic = record[steps[i][0]]
        float64_acceleration_lt.append(step_dic["float64_torch_acceleration"])
        float32_acceleration_lt.append(step_dic["float32_torch_acceleration"])

        float64_np_torch_cpu_acceleration_lt.append(step_dic["float64_np_torch_cpu_acceleration"])
        float32_np_torch_cpu_acceleration_lt.append(step_dic["float32_np_torch_cpu_acceleration"])

        float64_np_torch_gpu_acceleration_lt.append(step_dic["float64_np_torch_gpu_acceleration"])
        float32_np_torch_gpu_acceleration_lt.append(step_dic["float32_np_torch_gpu_acceleration"])

    float64_acceleration_lt = np.array(float64_acceleration_lt)
    float32_acceleration_lt = np.array(float32_acceleration_lt)

    float64_np_torch_cpu_acceleration_lt = np.array(float64_np_torch_cpu_acceleration_lt)
    float32_np_torch_cpu_acceleration_lt = np.array(float32_np_torch_cpu_acceleration_lt)

    float64_np_torch_gpu_acceleration_lt = np.array(float64_np_torch_gpu_acceleration_lt)
    float32_np_torch_gpu_acceleration_lt = np.array(float32_np_torch_gpu_acceleration_lt)

    steps = np.array(steps)
    steps = steps*steps

    l1, = plt.plot(steps, float32_acceleration_lt)
    l2, = plt.plot(steps, float64_acceleration_lt)

    l3, = plt.plot(steps, float32_np_torch_cpu_acceleration_lt)
    l4, = plt.plot(steps, float64_np_torch_cpu_acceleration_lt)

    l5, = plt.plot(steps, float32_np_torch_gpu_acceleration_lt)
    l6, = plt.plot(steps, float64_np_torch_gpu_acceleration_lt)
    # pylint: disable=bad-continuation

    line_lt = [
        l1,
        l2,
        l3,
        l4,
        l5,
        l6,
    ]


    labels_lt = (
    'float32 torch acceleration',
    'float64 torch acceleration',
    'float64 np torch cpu acceleration',
    'float32 np torch cpu acceleration',
    'float64 np torch gpu acceleration',
    'float32 np torch gpu acceleration',
    )
    # pylint: enable=bad-continuation

    plt.legend(handles=line_lt, labels=labels_lt, loc='best')
    full_path_name = "%s/acceleration.jpg" % (full_path)
#    plt.show()
    plt.savefig(full_path_name)
    plt.close()

def _write_status(file_obj, i, time_lt):
    float32_acceleration = time_lt[1] / time_lt[3]
    float64_acceleration = time_lt[0] / time_lt[2]

    float64_cpu_str = "i:%s float64 cpu:%s" % (i, time_lt[0])
    float32_cpu_str = "i:%s float32 cpu:%s" % (i, time_lt[1])
    float64_gpu_str = "i:%s float64 gpu:%s" % (i, time_lt[2])
    float32_gpu_str = "i:%s float32 gpu:%s" % (i, time_lt[3])
    float64_numpy_str = "i:%s float64 numpy:%s" % (i, time_lt[4])
    float32_numpy_str = "i:%s float32 numpy:%s" % (i, time_lt[5])

    float32_torch_acceleration_str = "float32 torch acceleration:%s" % float32_acceleration
    float64_torch_acceleration_str = "float64 torch acceleration:%s" % float64_acceleration

    file_obj.write("%s\n" % float64_cpu_str)
    file_obj.write("%s\n" % float32_cpu_str)
    file_obj.write("%s\n" % float64_gpu_str)
    file_obj.write("%s\n" % float32_gpu_str)
    file_obj.write("%s\n" % float64_numpy_str)
    file_obj.write("%s\n" % float32_numpy_str)
    file_obj.write("%s\n" % float32_torch_acceleration_str)
    file_obj.write("%s\n" % float64_torch_acceleration_str)

    print(float64_cpu_str)
    print(float32_cpu_str)
    print(float64_gpu_str)
    print(float32_gpu_str)
    print(float64_numpy_str)
    print(float32_numpy_str)
    print(float32_torch_acceleration_str)
    print(float64_torch_acceleration_str)

def _record_status(record, i, time_lt):
    dic = {}
    dic["float64_torch_cpu"] = time_lt[0]
    dic["float32_torch_cpu"] = time_lt[1]
    dic["float64_torch_gpu"] = time_lt[2]
    dic["float32_torch_gpu"] = time_lt[3]
    dic["float64_numpy"] = time_lt[4]
    dic["float32_numpy"] = time_lt[5]

    dic["float64_torch_acceleration"] = time_lt[0] / time_lt[2]
    dic["float32_torch_acceleration"] = time_lt[1] / time_lt[3]

    dic["float64_np_torch_cpu_acceleration"] = time_lt[4] / time_lt[0]
    dic["float32_np_torch_cpu_acceleration"] = time_lt[5] / time_lt[1]

    dic["float64_np_torch_gpu_acceleration"] = time_lt[4] / time_lt[2]
    dic["float32_np_torch_gpu_acceleration"] = time_lt[5] / time_lt[3]

    record[i] = dic

def _get_numpy_take_time(x, y, repeats, data_type):
    x = np.array(x, dtype=data_type)
    y = np.array(y, dtype=data_type)

    t0 = time.time()
    for i in range(repeats):
        z = np.matmul(x, y)
    t1 = time.time()
    v = z.sum()

    all_time = t1 - t0
    avg_time = all_time / repeats
    return avg_time, v

def _get_take_time(x, y, repeats, data_type, dev="cpu"):
    x = torch.from_numpy(x)
    x = x.type(data_type)

    y = torch.from_numpy(y)
    y = y.type(data_type)

    if dev == "gpu":
        device = torch.device("cuda")
        x = x.to(device)
        y = y.to(device)

    t0 = time.time()
    for i in range(repeats):
        z = torch.matmul(x, y)
    t1 = time.time()

    v = z.sum()
    all_time = t1 - t0
    avg_time = all_time / repeats
    return avg_time, v.item()

def test_cpu_gpu(repeats, size_begin, size_end, step=1):
    record = {}
    full_path = _get_full_path(repeats, size_begin, size_end)
    file_obj = create_file(full_path, "output")
    for s in range(size_begin, size_end, step):
        time_lt = []

        x = np.random.randn(s, s)
        y = np.random.randn(s, s)

        float64_cpu_time, v1 = _get_take_time(x, y, repeats, torch.double, "cpu")
        float32_cpu_time, v2 = _get_take_time(x, y, repeats, torch.float, "cpu")
        time_lt.append(float64_cpu_time)
        time_lt.append(float32_cpu_time)

        float64_gpu_time, v3 = _get_take_time(x, y, repeats, torch.double, "gpu")
        float32_gpu_time, v4 = _get_take_time(x, y, repeats, torch.float, "gpu")
        time_lt.append(float64_gpu_time)
        time_lt.append(float32_gpu_time)

        float64_numpy_time, v5 = _get_numpy_take_time(x, y, repeats, np.float64)
        float32_numpy_time, v6 = _get_numpy_take_time(x, y, repeats, np.float32)
        time_lt.append(float64_numpy_time)
        time_lt.append(float32_numpy_time)
        print(v1, v2, v3, v4, v5, v6)
        file_obj.write("%s %s %s %s %s %s" % (v1, v2, v3, v4, v5, v6))

        _write_status(file_obj, s, time_lt)
        _record_status(record, s, time_lt)

    file_obj.close()
    _plot_record(record, full_path)

def test_matmul(repeats, max_size, step):
    for i in range(int(max_size / step)):
        size_begin = 1 + i*step
        size_end = (i + 1)*step
        test_cpu_gpu(repeats, size_begin, size_end)

    size_begin = 1
    size_end = max_size
    test_cpu_gpu(repeats, size_begin, size_end)

def test():
    repeats = 1000
    max_size = 500
    step = 100
    test_matmul(repeats, max_size, step)

    repeats = 5
    size_begin = 500
    size_end = 3000
    step = 5
    test_cpu_gpu(repeats, size_begin, size_end, step)

    repeats = 1
    size_begin = 1
    size_end = 10000
    step = 50
    test_cpu_gpu(repeats, size_begin, size_end, step)

    repeats = 1
    size_begin = 10000
    size_end = 20000
    step = 200
    test_cpu_gpu(repeats, size_begin, size_end, step)

test()


-0.4566861553058859 -0.4566861689090729 -0.4566861553058859 -0.4566861689090729 -0.4566861553058859 -0.45668617
i:1 float64 cpu:3.988742828369141e-06
i:1 float32 cpu:4.987001419067383e-06
i:1 float64 gpu:6.382918357849121e-05
i:1 float32 gpu:6.08372688293457e-05
i:1 float64 numpy:9.97304916381836e-07
i:1 float32 numpy:9.975433349609375e-07
float32 torch acceleration:0.08197280244542855
float64 torch acceleration:0.06249089530440499
-1.6041892930321984 -1.6041892766952515 -1.6041892930321984 -1.604189157485962 -1.6041892930321984 -1.6041892
i:2 float64 cpu:2.194046974182129e-05
i:2 float32 cpu:4.988193511962891e-06
i:2 float64 gpu:5.784344673156738e-05
i:2 float32 gpu:6.8817138671875e-05
i:2 float64 numpy:1.0023117065429687e-06
i:2 float32 numpy:1.9905567169189454e-06
float32 torch acceleration:0.07248475609756098
float64 torch acceleration:0.3793077864747561
5.759258625770899 5.759258270263672 5.759258625770901 5.75925874710083 5.759258625770901 5.7592587
i:3 float64 cpu:2.394390106201

float64 torch acceleration:0.3387223640275454
-130.09381568718743 -130.09384155273438 -130.09381568718746 -130.09381103515625 -130.09381568718746 -130.09384
i:20 float64 cpu:1.4947891235351562e-05
i:20 float32 cpu:5.984067916870117e-06
i:20 float64 gpu:5.8831214904785156e-05
i:20 float32 gpu:5.3855419158935546e-05
i:20 float64 numpy:1.995086669921875e-06
i:20 float32 numpy:2.0003318786621095e-06
float32 torch acceleration:0.11111357056214197
float64 torch acceleration:0.2540809544651396
-29.657870172782953 -29.657873153686523 -29.657870172782992 -29.657878875732422 -29.657870172782964 -29.657867
i:21 float64 cpu:2.6925086975097657e-05
i:21 float32 cpu:9.972810745239258e-06
i:21 float64 gpu:9.175443649291992e-05
i:21 float32 gpu:6.984734535217285e-05
i:21 float64 numpy:1.982688903808594e-06
i:21 float32 numpy:1.9948482513427736e-06
float32 torch acceleration:0.14278009700949956
float64 torch acceleration:0.29344724902948194
-21.54287742486059 -21.54288101196289 -21.542877424860578 -21.5

i:38 float32 gpu:6.881546974182128e-05
i:38 float64 numpy:5.983829498291015e-06
i:38 float32 numpy:4.987001419067383e-06
float32 torch acceleration:0.18840880980345284
float64 torch acceleration:0.3417607756272922
320.47749901537327 320.47760009765625 320.4774990153734 320.47747802734375 320.4774990153734 320.47757
i:39 float64 cpu:2.393770217895508e-05
i:39 float32 cpu:1.4957904815673828e-05
i:39 float64 gpu:7.47966766357422e-05
i:39 float32 gpu:6.482625007629395e-05
i:39 float64 numpy:6.981372833251953e-06
i:39 float32 numpy:5.986213684082031e-06
float32 torch acceleration:0.23073839375360883
float64 torch acceleration:0.3200369756470738
-128.18150437166764 -128.18154907226562 -128.18150437166767 -128.1814422607422 -128.1815043716677 -128.18152
i:40 float64 cpu:2.9917001724243163e-05
i:40 float32 cpu:1.1968135833740234e-05
i:40 float64 gpu:7.380223274230957e-05
i:40 float32 gpu:6.482625007629395e-05
i:40 float64 numpy:7.978200912475586e-06
i:40 float32 numpy:4.987001419067383e-06
flo

i:57 float64 cpu:2.892279624938965e-05
i:57 float32 cpu:1.7954587936401368e-05
i:57 float64 gpu:7.978343963623047e-05
i:57 float32 gpu:5.385589599609375e-05
i:57 float64 numpy:1.296544075012207e-05
i:57 float32 numpy:9.974241256713867e-06
float32 torch acceleration:0.33338203003258254
float64 torch acceleration:0.36251628635293276
593.0470090114565 593.046875 593.0470090114567 593.0469970703125 593.0470090114566 593.0469
i:58 float64 cpu:4.0889978408813475e-05
i:58 float32 cpu:1.595759391784668e-05
i:58 float64 gpu:7.379984855651855e-05
i:58 float32 gpu:5.3856134414672854e-05
i:58 float64 numpy:1.3962507247924805e-05
i:58 float32 numpy:9.973526000976563e-06
float32 torch acceleration:0.29630039532690833
float64 torch acceleration:0.5540658850742556
478.34295207501157 478.34271240234375 478.34295207501174 478.3429260253906 478.34295207501185 478.34286
i:59 float64 cpu:4.088950157165527e-05
i:59 float32 cpu:1.795196533203125e-05
i:59 float64 gpu:7.079386711120605e-05
i:59 float32 gpu:6.3

-41.23682756220151 -41.2369384765625 -41.2368275622014 -41.236846923828125 -41.2368275622016 -41.236908
i:76 float64 cpu:2.892303466796875e-05
i:76 float32 cpu:1.1968612670898438e-05
i:76 float64 gpu:7.479906082153321e-05
i:76 float32 gpu:5.884575843811035e-05
i:76 float64 numpy:4.2882680892944336e-05
i:76 float32 numpy:3.291177749633789e-05
float32 torch acceleration:0.20338955582476087
float64 torch acceleration:0.38667644152615305
-287.6236577046858 -287.6233215332031 -287.6236577046865 -287.6236267089844 -287.62365770468637 -287.62354
i:77 float64 cpu:3.7897348403930665e-05
i:77 float32 cpu:2.094435691833496e-05
i:77 float64 gpu:8.477425575256348e-05
i:77 float32 gpu:9.974265098571777e-05
i:77 float64 numpy:6.482887268066406e-05
i:77 float32 numpy:5.6835412979125974e-05
float32 torch acceleration:0.20998396083671367
float64 torch acceleration:0.44703840886016494
-187.0235534493222 -187.0235137939453 -187.02355344932226 -187.0238037109375 -187.02355344932238 -187.02374
i:78 float64 

float64 torch acceleration:0.6029284069536482
-1730.4635191825084 -1730.4638671875 -1730.4635191825084 -1730.463623046875 -1730.4635191825078 -1730.4637
i:95 float64 cpu:4.487848281860352e-05
i:95 float32 cpu:2.393674850463867e-05
i:95 float64 gpu:7.081055641174316e-05
i:95 float32 gpu:5.884242057800293e-05
i:95 float64 numpy:5.485296249389648e-05
i:95 float32 numpy:3.989315032958985e-05
float32 torch acceleration:0.4067940827299506
float64 torch acceleration:0.6337823778371118
-531.414947794673 -531.4151611328125 -531.4149477946722 -531.4150390625 -531.4149477946719 -531.4149
i:96 float64 cpu:4.888129234313965e-05
i:96 float32 cpu:2.0932912826538088e-05
i:96 float64 gpu:5.285930633544922e-05
i:96 float32 gpu:6.98091983795166e-05
i:96 float64 numpy:5.584907531738281e-05
i:96 float32 numpy:4.188799858093262e-05
float32 torch acceleration:0.2998589485691648
float64 torch acceleration:0.9247433561260757
-281.97634232855 -281.976318359375 -281.9763423285508 -281.9764709472656 -281.97634232

i:114 float64 numpy:6.582450866699219e-05
i:114 float32 numpy:4.4879674911499024e-05
float32 torch acceleration:0.4905753140434361
float64 torch acceleration:0.8709586621991047
-3476.994789474473 -3476.995361328125 -3476.9947894744737 -3476.99462890625 -3476.9947894744737 -3476.9946
i:115 float64 cpu:5.584979057312012e-05
i:115 float32 cpu:2.991986274719238e-05
i:115 float64 gpu:5.78453540802002e-05
i:115 float32 gpu:6.083798408508301e-05
i:115 float64 numpy:7.081031799316406e-05
i:115 float32 numpy:4.388284683227539e-05
float32 torch acceleration:0.49179576209081677
float64 torch acceleration:0.9655017496424464
-1268.4411210620435 -1268.441162109375 -1268.4411210620456 -1268.441162109375 -1268.441121062046 -1268.441
i:116 float64 cpu:5.185985565185547e-05
i:116 float32 cpu:2.49330997467041e-05
i:116 float64 gpu:5.5850982666015625e-05
i:116 float32 gpu:6.08375072479248e-05
i:116 float64 numpy:8.976411819458008e-05
i:116 float32 numpy:7.778811454772949e-05
float32 torch acceleration:0.4

i:133 float64 cpu:7.978606224060058e-05
i:133 float32 cpu:5.086374282836914e-05
i:133 float64 gpu:5.285787582397461e-05
i:133 float32 gpu:5.884170532226563e-05
i:133 float64 numpy:0.00010272622108459473
i:133 float32 numpy:6.682085990905762e-05
float32 torch acceleration:0.864416531604538
float64 torch acceleration:1.5094451110048623
167.25639749535972 167.25714111328125 167.25639749535856 167.25643920898438 167.2563974953581 167.25659
i:134 float64 cpu:7.679390907287597e-05
i:134 float32 cpu:3.690218925476074e-05
i:134 float64 gpu:5.186152458190918e-05
i:134 float32 gpu:5.784678459167481e-05
i:134 float64 numpy:9.873414039611816e-05
i:134 float32 numpy:7.180857658386231e-05
float32 torch acceleration:0.63792982644141
float64 torch acceleration:1.4807491621575648
5154.4666074405395 5154.46728515625 5154.46660744054 5154.46630859375 5154.46660744054 5154.467
i:135 float64 cpu:9.97312068939209e-05
i:135 float32 cpu:4.388284683227539e-05
i:135 float64 gpu:7.18080997467041e-05
i:135 float3