<a href="https://colab.research.google.com/github/tushar-semwal/fedperf/blob/main/Santiago/1_Baseline_Notebook_Exp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FedPerf - Baseline notebook for experiments

In [1]:
!pip install torchsummaryX

Collecting torchsummaryX
  Downloading https://files.pythonhosted.org/packages/36/23/87eeaaf70daa61aa21495ece0969c50c446b8fd42c4b8905af264b40fe7f/torchsummaryX-1.3.0-py3-none-any.whl
Installing collected packages: torchsummaryX
Successfully installed torchsummaryX-1.3.0


In [13]:
%load_ext tensorboard

import copy
from functools import reduce
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import Sampler
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from torchsummaryX import summary as summaryx
from torchvision import transforms, utils, datasets
from tqdm.notebook import tqdm

%matplotlib inline

# Check assigned GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

# set manual seed for reproducibility
RANDOM_SEED = 42

# general reproducibility
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)

# gpu training specific
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Fri Mar 26 15:07:38 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                              

## Loading Datasets

In [3]:
!mkdir -p data

### MNIST

In [5]:
transforms_mnist = transforms.Compose([transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

In [None]:
# Commented as it throws a `HTTP Error 503: Service Unavailable` error
# mnist_data_train = datasets.MNIST('./data/mnist/', train=True, download=True, transform=transforms_mnist)
# mnist_data_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=transforms_mnist)

In [8]:
# Run this if the above cell is still throwing a 503 error
!wget www.di.ens.fr/~lelarge/MNIST.tar.gz -O data/MNIST.tar.gz
!tar -zxvf ./data/MNIST.tar.gz -C data/

--2021-03-26 14:51:37--  http://www.di.ens.fr/~lelarge/MNIST.tar.gz
Resolving www.di.ens.fr (www.di.ens.fr)... 129.199.99.14
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.di.ens.fr/~lelarge/MNIST.tar.gz [following]
--2021-03-26 14:51:38--  https://www.di.ens.fr/~lelarge/MNIST.tar.gz
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘data/MNIST.tar.gz’

data/MNIST.tar.gz       [            <=>     ]  33.20M  6.47MB/s    in 5.4s    

2021-03-26 14:51:44 (6.17 MB/s) - ‘data/MNIST.tar.gz’ saved [34813078]

MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNI

In [9]:
# Run this if the above cell is still throwing a 503 error
mnist_data_train = datasets.MNIST(root='./data', train=True, download=False, transform=transforms_mnist)
mnist_data_test = datasets.MNIST(root='./data', train=False, download=False, transform=transforms_mnist)

In [23]:
classes = np.array(list(mnist_data_train.class_to_idx.values()))
classes_test = np.array(list(mnist_data_test.class_to_idx.values()))
num_classes = len(classes_test)
print("Classes: {} \tType: {}".format(classes, type(classes)))
print("Classes Test: {} \tType: {}".format(classes_test, type(classes)))

Classes: [0 1 2 3 4 5 6 7 8 9] 	Type: <class 'numpy.ndarray'>
Classes Test: [0 1 2 3 4 5 6 7 8 9] 	Type: <class 'numpy.ndarray'>


In [24]:
print("Image Shape: {}".format(mnist_data_train.data[0].size()))

Image Shape: torch.Size([28, 28])


### CIFAR

In [10]:
transforms_cifar10 = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                         ])

cifar10_data_train = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms_cifar10)
cifar10_data_test = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms_cifar10)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [25]:
classes = np.array(list(cifar10_data_train.class_to_idx.values()))
classes_test = np.array(list(cifar10_data_test.class_to_idx.values()))
num_classes = len(classes_test)
print("Classes: {} \tType: {}".format(classes, type(classes)))
print("Classes Test: {} \tType: {}".format(classes_test, type(classes)))

Classes: [0 1 2 3 4 5 6 7 8 9] 	Type: <class 'numpy.ndarray'>
Classes Test: [0 1 2 3 4 5 6 7 8 9] 	Type: <class 'numpy.ndarray'>


In [31]:
print("Image Shape: {}".format(cifar10_data_train.data[0].shape))

Image Shape: (32, 32, 3)


### Shakespeare

In [11]:
!wget https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt -O data/shakespeare.txt

--2021-03-26 14:56:22--  https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt
Resolving ocw.mit.edu (ocw.mit.edu)... 151.101.2.133, 151.101.66.133, 151.101.130.133, ...
Connecting to ocw.mit.edu (ocw.mit.edu)|151.101.2.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5458199 (5.2M) [text/plain]
Saving to: ‘data/shakespeare.txt’


2021-03-26 14:56:22 (24.4 MB/s) - ‘data/shakespeare.txt’ saved [5458199/5458199]



In [21]:
class RandomSequentialSampler(Sampler):
    def __init__(self, datasource, seq_count):
        self.source = datasource
        self.seq_count = seq_count

    def __iter__(self):
        flatten = lambda l: [item for sublist in l for item in sublist]
        groups = [[range(i, i + self.seq_count)] for i in range(0, len(self), self.seq_count)]
        random.shuffle(groups)
        groups = flatten(flatten(groups))
        return iter(groups)

    def __len__(self):
        return len(self.source) - self.seq_count


class ShakespeareDataset(Dataset):
    def __init__(self, fname='./data/shakespeare.txt'):
        super(ShakespeareDataset, self).__init__()

        self.data = ''
        self.corpus = []
        with open(fname, 'r') as f:
            self.data = list(f.read())
            self.corpus = list(set(list(self.data)))

    def __len__(self):
        return len(self.data) - 1

    def __repr__(self):
        return f'{self.__class__} - (length: {self.__len__()})'

    def __getitem__(self, i):
        start, end = self.data[i], self.data[i+1]
        input_value = torch.zeros(1, len(self.corpus))
        input_value[0] = ShakespeareDataset.one_hot(self.get_label_from_char(start), len(self.corpus))
        target_value = torch.Tensor([[self.get_label_from_char(end)]])
        return [ input_value, target_value ]

    @staticmethod
    def one_hot(l, classes):
        x = torch.zeros(classes)
        x[l] = 1
        return x

    def sequential_sampler(self, batch_size):
        return DataLoader(self,
                          sampler=RandomSequentialSampler(self, 800),
                          batch_size=batch_size,
                          num_workers=8)

    def get_label_from_char(self, c):
        return self.corpus.index(c)

    def get_char_from_label(self, l):
        return self.corpus[l]

In [22]:
shakespeare_ds = ShakespeareDataset()
shakespeare_ds

<class '__main__.ShakespeareDataset'> - (length: 5458198)

## Partitioning & Data Loaders

## Models

## Algorithms

### FedAvg

### FedProx

## Experiments

### IID

### Non-IID