In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy

hook = sy.TorchHook(torch)

BATCH_SIZE=64

In [2]:
# MovieLense DataSet Input

rs_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
user_ratings_base = pd.read_csv('Data/ua.base', sep='\t', names=rs_cols, encoding='latin-1')
user_ratings_test = pd.read_csv('Data/ua.test', sep='\t', names=rs_cols, encoding='latin-1')

In [3]:
user_ratings_test.describe()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
count,9430.0,9430.0,9430.0,9430.0
mean,472.0,400.800954,3.587805,883735400.0
std,272.234934,306.859789,1.12024,5360562.0
min,1.0,1.0,1.0,874724700.0
25%,236.0,182.0,3.0,879451500.0
50%,472.0,303.0,4.0,883390400.0
75%,708.0,566.0,4.0,888637800.0
max,943.0,1664.0,5.0,893286600.0


In [5]:
#Create user-item matrix

n_users_base = user_ratings_base['user_id'].unique().max()
n_items_base = user_ratings_base['movie_id'].unique().max()

train_matrix = np.zeros((n_users_base, n_items_base))
for line in user_ratings_base.itertuples():
    train_matrix[line[1]-1,line[2]-1] = line[3]

    
n_users_test = user_ratings_test['user_id'].unique().max()
n_items_test = user_ratings_test['movie_id'].unique().max()

test_matrix = np.zeros((n_users_base, n_items_base))
for line in user_ratings_test.itertuples():
    test_matrix[line[1]-1,line[2]-1] = line[3]    

movies_ids = torch.tensor(list(range(1,n_items_base)))

In [6]:
#Create virtual workers

workers_nodes = []

#training
workers_train_data_pointers = []

for idx, train_ratings in enumerate(train_matrix):
    worker = sy.VirtualWorker(hook, id="user_"+str(idx))
    data_pointer = sy.BaseDataset(movies_ids, torch.tensor(train_ratings)).send(worker)
    workers_nodes.append(worker)
    workers_train_data_pointers.append(data_pointer)

    
#testing
workers_test_data_pointers = []

for idx, test_ratings in enumerate(test_matrix):
    data_pointer = sy.BaseDataset(movies_ids, torch.tensor(test_ratings)).send(workers_nodes[idx])
    workers_test_data_pointers.append(data_pointer)

In [7]:
# Create federated training datasets
federated_train_dataset = sy.FederatedDataset(workers_train_data_pointers)
# Create federated training dataloaders
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=True, batch_size=BATCH_SIZE)

# Create federated testing datasets
federated_test_dataset = sy.FederatedDataset(workers_test_data_pointers)
# Create federated testing dataloaders
federated_test_loader = sy.FederatedDataLoader(federated_test_dataset, shuffle=True, batch_size=BATCH_SIZE)

In [14]:
print(federated_train_dataset)

FederatedDataset
    Distributed accross: user_0, user_1, user_2, user_3, user_4, user_5, user_6, user_7, user_8, user_9, user_10, user_11, user_12, user_13, user_14, user_15, user_16, user_17, user_18, user_19, user_20, user_21, user_22, user_23, user_24, user_25, user_26, user_27, user_28, user_29, user_30, user_31, user_32, user_33, user_34, user_35, user_36, user_37, user_38, user_39, user_40, user_41, user_42, user_43, user_44, user_45, user_46, user_47, user_48, user_49, user_50, user_51, user_52, user_53, user_54, user_55, user_56, user_57, user_58, user_59, user_60, user_61, user_62, user_63, user_64, user_65, user_66, user_67, user_68, user_69, user_70, user_71, user_72, user_73, user_74, user_75, user_76, user_77, user_78, user_79, user_80, user_81, user_82, user_83, user_84, user_85, user_86, user_87, user_88, user_89, user_90, user_91, user_92, user_93, user_94, user_95, user_96, user_97, user_98, user_99, user_100, user_101, user_102, user_103, user_104, user_105, user_106

In [15]:
#Create model

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        # item embedding layers
        embedding_dim = 50
        self.item_embedding = nn.Embedding(n_items_base, embedding_dim)
        
        # list of weight matrices
        self.fc_layers = nn.ModuleList()
        # hidden dense layers
        self.fc_layers.append(nn.Linear(50, 50))
        # final prediction layer
        self.output_layer = nn.Linear(50, 5)

    def forward(self, x):
        self.item_embedding(movies_ids)
        
        for idx, _ in enumerate(range(len(self.fc_layers))):
            x = self.fc_layers[idx](x)
            x = F.dropout(x, p=0.2)
            x = F.batch_norm(x)
            x = F.relu(x)
        
        logit = self.output_layer(x)
        rating = torch.sigmoid(logit)
        return rating

model = Model()
optimizer = optim.SGD(model.parameters(), lr=0.01)
print(model)

Model(
  (item_embedding): Embedding(1682, 50)
  (fc_layers): ModuleList(
    (0): Linear(in_features=50, out_features=50, bias=True)
  )
  (output_layer): Linear(in_features=50, out_features=5, bias=True)
)


In [213]:
#Train model

def testing():
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in federated_test_loader:
            output = model(data)
            test_loss += F.nll_loss(
                output, target, reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(federated_test_loader)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss,
        correct,
        len(federated_test_loader),
        100. * correct / len(federated_test_loader)))

def training(is_global, epoch_count):
    for epoch in range(0, epoch_count):
        model.train()
        for batch_idx, (data, target) in enumerate(federated_train_loader):
            # NEW) send model to correct worker
            model.send(data.location)
            
            #Call the optimizer for the worker using get_optim
            opt = optims.get_optim(data.location.id)
            #print(data.location.id)

            # 1) erase previous gradients (if they exist)
            opt.zero_grad()

            # 2) make a prediction
            pred = model(data)

            # 3) calculate how much we missed
            loss = ((pred - target)**2).sum()

            # 4) figure out which weights caused us to miss
            loss.backward()

            # 5) change those weights
            opt.step()
            
            # NEW) get model (with gradients)
            model.get()

            # 6) print our progress
            print(loss.get()) # NEW) slight edit... need to call .get() on loss\
    
def learning_algo():
    #Algorithm for training in Federated way
    for cycle in range(0,10):
        # global training
        training(True, 10)
        
        #local training
        training(False, 100)
        

In [211]:
learning_algo()

RuntimeError: index out of range: Tried to access index 1 out of table with -1 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418