### Use the line_profiler to profile the GCN learning 

In [8]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


### Planetoid PubMed data:

In [15]:
%%writefile profile_pubMed.py
# below is the code to be profiled:

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    

from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Planetoid/PubMed', name='PubMed')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    
# main part of executing the train and test of GCN 
def simulate(steps):

    model.train()
    for epoch in range(steps):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

    model.eval()
    _, pred = model(data).max(dim=1)
    correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    acc = correct / data.test_mask.sum().item()
    print('Accuracy: {:.4f}'.format(acc))

Overwriting profile_pubMed.py


In [10]:
from profile_pubMed import simulate
%lprun -T profile_pubMed_res.txt -f simulate simulate(200)
print(open('profile_pubMed_res.txt', 'r').read())

1 3 500
Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717])








Accuracy: 0.7850

*** Profile printout saved to text file 'profile_pubMed_res.txt'. 
Timer unit: 1e-06 s

Total time: 0.609469 s
File: /home/xiangli/projects/pytorch_tutorial/profile_pubMed.py
Function: simulate at line 37

Line #      Hits         Time  Per Hit   % Time  Line Contents
    37                                           def simulate(steps):
    38                                           
    39         1         30.0     30.0      0.0      model.train()
    40       201        197.0      1.0      0.0      for epoch in range(steps):
    41       200       4353.0     21.8      0.7          optimizer.zero_grad()
    42       200     365456.0   1827.3     60.0          out = model(data)
    43       200      62351.0    311.8     10.2          loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    44       200     112083.0    560.4     18.4          loss.backward()
    45       200      62415.0    312.1     10.2          optimizer.step()
    46                 



In [11]:
!nvidia-smi
!pwd


Sat Sep 14 09:41:04 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 166...  On   | 00000000:01:00.0  On |                  N/A |
| 43%   34C    P8     6W / 120W |   1256MiB /  5909MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [12]:
# The overall performance including the to function
!python -m torch.utils.bottleneck /home/xiangli/projects/pytorch_tutorial/profile_pubMed.py > bottleneck_profile_pubMed_res.txt

In [13]:
!nvidia-smi

Sat Sep 14 09:41:58 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 166...  On   | 00000000:01:00.0  On |                  N/A |
| 43%   35C    P2    23W / 120W |   1264MiB /  5909MiB |     12%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [5]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
torch.cuda.empty_cache()

from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Planetoid/PubMed', name='PubMed')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)
    
# main part of executing the train and test of GCN 
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net().to(device)
    data = dataset[0].to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    
    steps = 200
    model.train()
    for epoch in range(steps):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

    model.eval()
    _, pred = model(data).max(dim=1)
    correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    acc = correct / data.test_mask.sum().item()
    print('Accuracy: {:.4f}'.format(acc))

res = prof.key_averages().table(sort_by="cuda_time_total")
print(res)
# file1 = open("Profiler_Planetoid_Pubmed_res.txt","w") 
file1 = open("toDevice_Profiler_Planetoid_Pubmed_res.txt","w") 
# \n is placed to indicate EOL (End of Line) 
file1.write(str(res) ) 
file1.close() #to change file access modes 

1 3 500
Data(edge_index=[2, 88648], test_mask=[19717], train_mask=[19717], val_mask=[19717], x=[19717, 500], y=[19717])










Accuracy: 0.7870
-------------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
Name                                   Self CPU total %  Self CPU total   CPU total %      CPU total        CPU time avg     CUDA total %     CUDA total       CUDA time avg    Number of Calls  
-------------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
mm                                     2.81%            14.652ms         2.81%            14.652ms         14.622us         14.93%           134.704ms        134.435us        1002             
index                                  36.58%           190.660ms        36.58%           190.660ms        79.046us         14.22%           128.292ms        53.189us         2412             
MmBackward       

### Planetoid core data:

In [16]:
%%writefile profile_Cora.py
# below is the code to be profiled:

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Planetoid/Cora', name='Cora')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    
# main part of executing the train and test of GCN 
def simulate(steps):

    model.train()
    for epoch in range(steps):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

    model.eval()
    _, pred = model(data).max(dim=1)
    correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    acc = correct / data.test_mask.sum().item()
    print('Accuracy: {:.4f}'.format(acc))

Writing profile_Cora.py


In [17]:
from profile_Cora import simulate
%lprun -T profile_Cora_res.txt -f simulate simulate(200)
print(open('profile_Cora_res.txt', 'r').read())

1 7 1433
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])






Accuracy: 0.8150

*** Profile printout saved to text file 'profile_Cora_res.txt'. 
Timer unit: 1e-06 s

Total time: 0.525296 s
File: /home/xiangli/projects/pytorch_tutorial/profile_Cora.py
Function: simulate at line 36

Line #      Hits         Time  Per Hit   % Time  Line Contents
    36                                           def simulate(steps):
    37                                           
    38         1         28.0     28.0      0.0      model.train()
    39       201        216.0      1.1      0.0      for epoch in range(steps):
    40       200       4426.0     22.1      0.8          optimizer.zero_grad()
    41       200     278133.0   1390.7     52.9          out = model(data)
    42       200      63160.0    315.8     12.0          loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    43       200     113502.0    567.5     21.6          loss.backward()
    44       200      64161.0    320.8     12.2          optimizer.step()
    45                     



In [18]:
# use aotograd profiler to take care of the overall including : copy the data to the device
# !python -m torch.utils.bottleneck /home/xiangli/projects/pytorch_tutorial/profile_Cora.py > bottleneck_profile_Cora_res.txt

In [6]:

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='~/tmp/Planetoid/Cora', name='Cora')
print(len(dataset), dataset.num_classes, dataset.num_node_features)
data = dataset[0]
print(data)

    
# main part of executing the train and test of GCN 
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('The currently used device is: ', device)
    model = Net().to(device)
    data = dataset[0].to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    
    steps = 200
    model.train()
    for epoch in range(steps):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

    model.eval()
    _, pred = model(data).max(dim=1)
    correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    acc = correct / data.test_mask.sum().item()
    print('Accuracy: {:.4f}'.format(acc))

    
res = prof.key_averages().table(sort_by="cuda_time_total")
print(res)
# file1 = open("Profiler_Planetoid_Pubmed_res.txt","w") 
file1 = open("toDevice_Profiler_Planetoid_Cora_res.txt","w") 
# \n is placed to indicate EOL (End of Line) 
file1.write(str(res) ) 
file1.close() #to change file access modes     


1 7 1433
Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
The currently used device is:  cuda










Accuracy: 0.7910
-----------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
Name                                 Self CPU total %  Self CPU total   CPU total %      CPU total        CPU time avg     CUDA total %     CUDA total       CUDA time avg    Number of Calls  
-----------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  ---------------  
index                                26.35%           120.560ms        26.35%           120.560ms        49.984us         17.36%           115.381ms        47.836us         2412             
mm                                   3.07%            14.053ms         3.07%            14.053ms         14.025us         9.43%            62.636ms         62.511us         1002             
IndexBackward              