In [1]:
import torchvision
import torch
import pandas as pd
import torch.fx as fx
import os

from model_static_graph import extract_graph, draw_graph
from pytorch_tracing import py_tracing_forward, py_tracing_backward, py_tracing_optimize
from op_kernel_dict import get_op_kernel
from convert_json_to_csv import convert_json_to_csv


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 1000)




In [3]:
#model
model = torchvision.models.resnet18().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
input_data = torch.randn(5, 3, 224, 224).cuda()
model_name='resnet18'

#静态图
# static_graph,name_module,adj=extract_graph(model)
# draw_graph(adj,name_module, model_name=model_name)
#draw_graph(adj,name_module, model_name=model_name,t=1)

#pytorch_tracing
py_tracing_forward(model, input_data, model_name=model_name)
py_tracing_backward(model, input_data, model_name=model_name)
py_tracing_optimize(model, input_data, optimizer, model_name=model_name)

  warn("Profiler won't be using warmup, this can skew profiler results")


Profiling completed. Trace log saved to './log/resnet18_forward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/resnet18_backward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/resnet18_optimize.json'
CSV files saved to 'log_csv'


In [2]:
from VGG import VGG16

model = VGG16().cuda()
input_data = torch.randn(1, 3, 224, 224).cuda()
model_name='VGG16'
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

py_tracing_forward(model, input_data, model_name=model_name)
py_tracing_backward(model, input_data, model_name=model_name)
py_tracing_optimize(model, input_data, optimizer, model_name=model_name)

  warn("Profiler won't be using warmup, this can skew profiler results")


Profiling completed. Trace log saved to './log/VGG16_forward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/VGG16_backward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/VGG16_optimize.json'
CSV files saved to 'log_csv'


In [4]:
#decoder only transformer
from gpt import GPT

x=torch.randint(0,5000,(5,30)).cuda()
padding=torch.zeros(5,30).cuda()
model_name='GPT'
    
# GPT模型
model=GPT(d_model=64,nhead=2,feedforward=128,vocab_size=5000,seq_max_len=50).cuda()
y=model(x,padding)



In [2]:
import torch
from transformer import Transformer

# Hyperparameters
vocab_size = 10000
d_model = 512
nhead = 8
num_encoder_layers = 6
num_decoder_layers = 6
dim_feedforward = 2048
max_seq_length = 512
batch_size = 1
src_seq_length = 50
tgt_seq_length = 50

# 输入数据
src = torch.randint(0, vocab_size, (batch_size, src_seq_length)).cuda()  # Source input tokens
tgt = torch.randint(0, vocab_size, (batch_size, tgt_seq_length)).cuda()  # Target input tokens
model = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, 
                    num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward, 
                    vocab_size=vocab_size, max_seq_length=max_seq_length).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

py_tracing_forward(model, src, model_name='transformer',input_data_2=tgt)
py_tracing_backward(model, src, model_name='transformer', input_data_2=tgt)
py_tracing_optimize(model, src, optimizer, model_name='transformer', input_data_2=tgt)


  warn("Profiler won't be using warmup, this can skew profiler results")


Profiling completed. Trace log saved to './log/transformer_forward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/transformer_backward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/transformer_optimize.json'
CSV files saved to 'log_csv'


In [8]:
# main.py
from LSTM import SimpleLSTM

# 参数设置
input_size = 10
hidden_size = 50
num_layers = 10
num_classes = 15
sequence_length = 1000
batch_size = 300

model_name='LSTM'
model = SimpleLSTM(input_size, hidden_size, num_layers, num_classes).cuda()
input_data = torch.randn(batch_size, sequence_length, input_size).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

py_tracing_forward(model, input_data, model_name=model_name)
py_tracing_backward(model, input_data, model_name=model_name)
py_tracing_optimize(model, input_data, optimizer, model_name=model_name)

  warn("Profiler won't be using warmup, this can skew profiler results")


Profiling completed. Trace log saved to './log/LSTM_forward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/LSTM_backward.json'
CSV files saved to 'log_csv'
Profiling completed. Trace log saved to './log/LSTM_optimize.json'
CSV files saved to 'log_csv'
所有文件转换完成。


In [3]:
#log 转化为 csv
log_directory = 'log'
output_directory = 'log_csv'
convert_json_to_csv(output_directory, log_directory)

所有文件转换完成。


In [18]:
from op_kernel_dict import get_op_kernel


Type = ['forward','backward','optimize']
model_list=['resnet18','VGG16','LSTM']
op_kernel={model_name: {} for model_name in model_list}
for model in model_list:
    for tp in Type:
        op_tmp=get_op_kernel(tp, model)
        op_kernel[model].update(op_tmp)

op_kernel['LSTM']

所有文件转换完成。


{'aten::copy_': {'void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl_nocast<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl_nocast<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1} const&)::{lambda(int)#1})'},
 'aten::_cudnn_rnn': {'ampere_sgemm_32x128_tn',
  'void RNN_blockPersist_fp_LSTM<float, float, float, 64>(float const*, float*, float const*, float*, float const*, 

In [19]:
#比较op_kernel里每个key的set是否有交集
# for key1 in op_kernel:
#     for key2 in op_kernel:
#         if key1!=key2:
#             if len(op_kernel[key1]&op_kernel[key2])>0:
#                 print(key1,key2)
#                 print(op_kernel[key1]-op_kernel[key2])
