In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Parameters and DataLoaders
input_size = 5
output_size = 2
mid_size = 4

batch_size = 30
data_size = 100

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
USE_CUDA = torch.cuda.is_available()


In [3]:
class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                         batch_size=batch_size, shuffle=True)

In [14]:
class Model(nn.Module):
    # Our model

    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.in_size = input_size
        self.out_size = output_size
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, input):
        output = self.fc(input)
        print("\tIn Model: input size", input.size(),
              "output size", output.size())

        return output
    
    def save(self):
        print("hell year")

In [15]:
model = Model(input_size=3, output_size=1)

In [16]:
model = nn.DataParallel(model)

In [17]:
model.module.save()

hell year


In [5]:
class ComplexModel(nn.Module):
    
    def __init__(self, block=[]):
        super().__init__()
        self.blocks = nn.ModuleList(block)
    
    def forward(self, x):
        for model in self.blocks:
            x = model(x)
        return x

In [6]:
class Transfer(nn.Module):
    
    def __init__(self, model):
        super().__init__()
        self.model = model
    
    def to_parallel(self):
        self.model = self.model.to(DEVICE)
        if USE_CUDA:
            self.model = nn.DataParallel(self.model)
    
    def __call__(self, x):
        self.to_parallel()
        x = self.model(x)
        return x

In [7]:
class Graph(nn.Module):
    
    def __init__(self, in_size, mid_size, out_size):
        super().__init__()
        model1 = Model(in_size, mid_size)
        model2 = Model(mid_size, out_size)
        model3 = Model(mid_size, out_size)
        self.edge_out = {str((in_size, mid_size)): model1}
        self.edge_in = {str((mid_size, out_size))+"1": model2, str((mid_size, out_size))+"2": model3}
        complex_model1 = ComplexModel(block=[self.edge_out[str((in_size, mid_size))], self.edge_in[str((mid_size, out_size))+"1"]]) 
        complex_model2 = ComplexModel(block=[self.edge_out[str((in_size, mid_size))], self.edge_in[str((mid_size, out_size))+"2"]])
        
#         self.params = {}
#         self.params.update(self.edge_in)
#         self.params.update(self.edge_out)
        self.params = {"1":complex_model1, "2":complex_model2}
        self.params = nn.ModuleDict(self.params)
        print(self.params.keys())
        #self.params = nn.ModuleDict(self.params)
        #print(id(self.params[str((in_size, mid_size))]))
        transfer1 = Transfer(complex_model1)
        transfer2 = Transfer(complex_model2)
        self.models = {
            1: transfer1,
            2: transfer2,
        }

    
    def compute_path(self, data, paths=[1]):
        for path in paths:
            pred = self.models[path](data)
        return pred
        

In [124]:
new_model = Graph(5, 3, 1)
i = 0
for param in new_model.parameters():
    i+=1
print(i)

odict_keys(['1', '2'])
6


In [118]:
new_model = Graph(5, 3, 1)
i = 0
for param in new_model.parameters():
    i+=1
print(i)

odict_keys(['1', '2'])
Parameter containing:
tensor([[-0.0025,  0.4084, -0.0397,  0.1268, -0.1185],
        [ 0.2268,  0.3659,  0.1232,  0.1255, -0.0987],
        [-0.2927, -0.1250, -0.0806,  0.3706, -0.3717]], requires_grad=True)
Parameter containing:
tensor([-0.0472,  0.2329,  0.3608], requires_grad=True)
Parameter containing:
tensor([[-0.4971,  0.0874,  0.0073]], requires_grad=True)
Parameter containing:
tensor([-0.0401], requires_grad=True)
Parameter containing:
tensor([[ 0.2731, -0.1283, -0.2387]], requires_grad=True)
Parameter containing:
tensor([-0.1481], requires_grad=True)


In [72]:
new_model = Graph(5, 3, 1)

print(new_model.state_dict())

139955367158544
139955367158544
OrderedDict([('params.(3, 1)1.fc.weight', tensor([[ 0.2868,  0.4138, -0.3961]])), ('params.(3, 1)1.fc.bias', tensor([-0.2932])), ('params.(3, 1)2.fc.weight', tensor([[0.0177, 0.4977, 0.1249]])), ('params.(3, 1)2.fc.bias', tensor([-0.2039])), ('params.(5, 3).fc.weight', tensor([[ 0.4354,  0.4455, -0.3568, -0.1309,  0.1331],
        [-0.2123,  0.0450,  0.2694,  0.1094, -0.0351],
        [-0.3255, -0.4168,  0.2886,  0.0686,  0.4170]])), ('params.(5, 3).fc.bias', tensor([ 0.2080, -0.0991, -0.0649]))])


In [91]:
for data in rand_loader:
    input = data.to(DEVICE)
    output = new_model.compute_path(input, paths=[1, 2])
    loss = output.mean().backward()
    target_weights = list(new_model.parameters())
    print(target_weights[0].grad)
    print("Outside: input size", input.size(),
          "output_size", output.size())

	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 3])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 3])
	In Model: input size	In Model: input size torch.Size([15, 3]) output size torch.Size([15, 1])
 torch.Size([15, 3]) output size torch.Size([15, 1])
	In Model: input size 	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 3])
torch.Size([15, 5]) output size torch.Size([15, 3])
	In Model: input size torch.Size([15, 3]) output size torch.Size([15, 1])
	In Model: input size torch.Size([15, 3]) output size torch.Size([15, 1])
tensor([[ 0.0142, -0.0033, -0.0102,  0.0031,  0.0053],
        [ 0.0134, -0.0031, -0.0097,  0.0030,  0.0050],
        [ 0.1034, -0.0237, -0.0744,  0.0228,  0.0383]], device='cuda:0')
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 1])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 3])
	In Model: input size torch.Size([8, 5]) output size torch.Size([8, 3])
	In Model

In [48]:
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    new_model = nn.DataParallel(complex_model1)
    

new_model.to(device)

Let's use 2 GPUs!


DataParallel(
  (module): ComplexModel(
    (blocks): ModuleList(
      (0): Model(
        (fc): Linear(in_features=5, out_features=4, bias=True)
      )
      (1): Model(
        (fc): Linear(in_features=4, out_features=2, bias=True)
      )
    )
  )
)

In [49]:
for data in rand_loader:
    input = data.to(device)
    output = new_model(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())


	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
 torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
 torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
 torch.Size([15, 4]) 

In [25]:
model = Model(mid_size, output_size)
model_common1 = Model(input_size, mid_size)
model_common2 = Model(input_size, mid_size)
complex_model1 = ComplexModel(block=[model_common1, model])
complex_model2 = ComplexModel(block=[model_common2, model])
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model1 = nn.DataParallel(complex_model1)
    model2 = nn.DataParallel(complex_model2)

model1.to(device)
model2.to(device)

Let's use 2 GPUs!


DataParallel(
  (module): ComplexModel(
    (blocks): ModuleList(
      (0): Model(
        (fc): Linear(in_features=5, out_features=4, bias=True)
      )
      (1): Model(
        (fc): Linear(in_features=4, out_features=2, bias=True)
      )
    )
  )
)

In [26]:
for data in rand_loader:
    input = data.to(device)
    output = model1(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())

	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 4])	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
 output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) 

In [27]:
for data in rand_loader:
    input = data.to(device)
    output = model2(input)
    print("Outside: input size", input.size(),
          "output_size", output.size())

	In Model: input size torch.Size([15, 5]) 	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size torch.Size([15, 5]) output size 	In Model: input sizetorch.Size([15, 4])
 torch.Size([15, 4]) output size torch.Size([15, 2])
	In Model: input size torch.Size([15, 4]) output size torch.Size([15, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4])
	In Model: input size	In Model: input size torch.Size([15, 5]) output size torch.Size([15, 4]) torch.Size([15, 4]) output size torch.Size([15, 2])

	In Model: input size torch.Size([15, 4]) 

In [28]:
device

device(type='cuda')

In [80]:
a = [1, 2, 3, 4, 5, 6]
b = 1
print(b in a)

True
