In [1]:
import os
import omegaconf
from shell.utils.experiment_utils import *
from shell.utils.metric import *
import matplotlib.pyplot as plt
from shell.fleet.network import TopologyGenerator
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
from shell.fleet.utils.fleet_utils import *

In [2]:
num_tasks = 10
num_init_tasks = 4
# algo = "monolithic"
algo = "modular"
experiment = "experiment_results/fedavg_transfer_also_projection"

In [3]:
def get_cfg(save_root_dir = "experiment_results/toy_fedprox",
    dataset = "mnist",
    algo = "monolithic",
    num_train = 64,
    seed = 0,
    use_contrastive = True,):
    job_name = f"{dataset}_{algo}_numtrain_{num_train}"
    if use_contrastive:
        job_name += "_contrastive"
    experiment = os.path.join(save_root_dir, job_name, dataset,algo, f"seed_{seed}")
    config_path = os.path.join(experiment, "hydra_out", ".hydra", "config.yaml")
    # read the config file
    cfg = omegaconf.OmegaConf.load(config_path)
    graph, datasets, NetCls, LearnerCls, net_cfg, agent_cfg, train_cfg,fleet_additional_cfg = setup_experiment(cfg)
    net_cfg['num_tasks'] = num_tasks - num_init_tasks 
    return graph, datasets, NetCls, LearnerCls, net_cfg, agent_cfg, train_cfg,fleet_additional_cfg, cfg


In [4]:
graph, datasets, NetCls, LearnerCls, net_cfg, agent_cfg, train_cfg,fleet_additional_cfg, cfg = get_cfg(experiment,
                                                                                                       algo=algo)

{'train': {'component_update_freq': 100, 'num_epochs': 100, 'init_component_update_freq': 100, 'init_num_epochs': 100, 'save_freq': 1}, 'dataset': {'dataset_name': 'mnist', 'num_tasks': 10, 'num_classes_per_task': 2, 'with_replacement': True, 'num_trains_per_class': 64, 'num_vals_per_class': 50, 'remap_labels': True}, 'net': {'name': 'mlp', 'depth': 4, 'layer_size': 64, 'dropout': 0.0}, 'sharing_strategy': {'name': 'gradient', 'num_coms_per_round': 1, 'comm_freq': 10, 'mu': 1.0}, 'seed': 0, 'algo': 'modular', 'job_name': 'mnist_modular_numtrain_64_contrastive', 'num_agents': 8, 'root_save_dir': 'experiment_results/fedavg_transfer_also_projection/', 'parallel': True, 'num_init_tasks': 4, 'agent': {'save_dir': '${root_save_dir}/${job_name}/${dataset.dataset_name}/${algo}/seed_${seed}', 'batch_size': 64, 'memory_size': 32, 'improvement_threshold': 0.05, 'use_contrastive': True}}
i_size 28
num_classes 2
net_cfg {'name': 'mlp', 'depth': 4, 'layer_size': 64, 'dropout': 0.0, 'i_size': 28, 'nu

In [5]:
# AgentCls = get_agent_cls(cfg.sharing_strategy, cfg.algo, cfg.parallel)
# FleetCls = get_fleet(cfg.sharing_strategy, cfg.parallel)

# fleet = FleetCls(graph, cfg.seed, datasets, cfg.sharing_strategy, AgentCls, NetCls=NetCls,
#                      LearnerCls=LearnerCls, net_kwargs=net_cfg, agent_kwargs=agent_cfg,
#                      train_kwargs=train_cfg, **fleet_additional_cfg)

In [6]:
agent_id = 69420
task_id = 3
num_added_components = None


net = load_net(cfg, NetCls, net_cfg, agent_id=agent_id, task_id=task_id, num_added_components=num_added_components)

if agent_id == 69420:
    dataset = fleet_additional_cfg['fake_dataset']
else:
    dataset = datasets[agent_id]
testloaders = {task: torch.utils.data.DataLoader(testset,
                                                         batch_size=256,
                                                         shuffle=False,
                                                         num_workers=4,
                                                         pin_memory=True,
                                                         ) for task, testset in enumerate(dataset.testset[:(task_id+1)])}

print()
print(eval_net(net, testloaders))
print('\n\n')
print(net.random_linear_projection.weight)
print(net.components)
print('\n\n')
if algo == "modular":
    print(net.structure)
    for t in range(task_id+1):
        print(net.structure[t])

save_dir experiment_results/fedavg_transfer_also_projection//mnist_modular_numtrain_64_contrastive/mnist/modular/seed_0/agent_69420/task_3
net_cfg {'depth': 4, 'layer_size': 64, 'dropout': 0.0, 'i_size': 28, 'num_classes': 2, 'num_tasks': 10, 'num_init_tasks': 4, 'use_contrastive': True}

{0: 0.9905378486055777, 1: 0.9780163599182005, 2: 0.9430443548387096, 3: 0.9752352649826647, 'avg': 0.9717084570862882}



Parameter containing:
tensor([[ 2.3161e-02, -1.1078e-02,  2.0928e-03,  ..., -6.8462e-03,
         -2.5744e-02, -3.1589e-02],
        [-5.6778e-03, -6.7004e-03, -3.5386e-02,  ...,  9.3553e-03,
         -1.0013e-02,  1.5819e-02],
        [-6.9292e-03, -7.4016e-05,  3.2825e-02,  ..., -3.5247e-02,
          2.2576e-02,  1.0087e-02],
        ...,
        [-5.2874e-03, -1.5547e-02,  6.1646e-03,  ..., -2.0338e-02,
          2.0868e-02, -3.3560e-02],
        [ 6.7798e-03,  6.0861e-03, -4.3917e-03,  ..., -2.9448e-02,
          1.9017e-02, -2.7786e-02],
        [-3.2708e-02, -1.6300e-02,  2

In [7]:
agent_id = 0
task_id = 5
# num_added_components = 1
num_added_components = None


net = load_net(cfg, NetCls, net_cfg, agent_id=agent_id, task_id=task_id, num_added_components=num_added_components)

if agent_id == 69420:
    dataset = fleet_additional_cfg['fake_dataset']
else:
    dataset = datasets[agent_id]
testloaders = {task: torch.utils.data.DataLoader(testset,
                                                         batch_size=256,
                                                         shuffle=False,
                                                         num_workers=4,
                                                         pin_memory=True,
                                                         ) for task, testset in enumerate(dataset.testset[:(task_id+1)])}

print()
print(eval_net(net, testloaders))
print('\n\n')
print(net.random_linear_projection.weight)
print(net.components)
print('\n\n')
if algo == "modular":
    print(net.structure)
    for t in range(task_id+1):
        print(net.structure[t])

save_dir experiment_results/fedavg_transfer_also_projection//mnist_modular_numtrain_64_contrastive/mnist/modular/seed_0/agent_0/task_5

{0: 0.7035256410256411, 1: 0.9407630522088354, 2: 0.9335051546391753, 3: 0.9573293172690763, 4: 0.9012282497441146, 5: 0.983451536643026, 'avg': 0.9033004919216449}



Parameter containing:
tensor([[ 2.3161e-02, -1.1078e-02,  2.0928e-03,  ..., -6.8462e-03,
         -2.5744e-02, -3.1589e-02],
        [-5.6778e-03, -6.7004e-03, -3.5386e-02,  ...,  9.3553e-03,
         -1.0013e-02,  1.5819e-02],
        [-6.9292e-03, -7.4016e-05,  3.2825e-02,  ..., -3.5247e-02,
          2.2576e-02,  1.0087e-02],
        ...,
        [-5.2874e-03, -1.5547e-02,  6.1646e-03,  ..., -2.0338e-02,
          2.0868e-02, -3.3560e-02],
        [ 6.7798e-03,  6.0861e-03, -4.3917e-03,  ..., -2.9448e-02,
          1.9017e-02, -2.7786e-02],
        [-3.2708e-02, -1.6300e-02,  2.9779e-02,  ...,  1.0043e-02,
          2.2422e-02, -3.5540e-03]], device='cuda:0')
ModuleList(
  (0-3): 4 x 

In [9]:
agent_id = 1
task_id = 5
# num_added_components = None
num_added_components = 3


net = load_net(cfg, NetCls, net_cfg, agent_id=agent_id, task_id=task_id, num_added_components=num_added_components)

if agent_id == 69420:
    dataset = fleet_additional_cfg['fake_dataset']
else:
    dataset = datasets[agent_id]
testloaders = {task: torch.utils.data.DataLoader(testset,
                                                         batch_size=256,
                                                         shuffle=False,
                                                         num_workers=4,
                                                         pin_memory=True,
                                                         ) for task, testset in enumerate(dataset.testset[:(task_id+1)])}





print()
print(eval_net(net, testloaders))
print('\n\n')
print(net.random_linear_projection.weight)
print(net.components)
print('\n\n')
if algo == "modular":
    print(net.structure)
    for t in range(task_id+1):
        print(net.structure[t])


save_dir experiment_results/fedavg_transfer_also_projection//mnist_modular_numtrain_64_contrastive/mnist/modular/seed_0/agent_1/task_5



{0: 0.9442786069651742, 1: 0.8567708333333334, 2: 0.8245614035087719, 3: 0.8899497487437186, 4: 0.8760683760683761, 5: 0.9059959349593496, 'avg': 0.8829374839297873}



Parameter containing:
tensor([[ 2.3161e-02, -1.1078e-02,  2.0928e-03,  ..., -6.8462e-03,
         -2.5744e-02, -3.1589e-02],
        [-5.6778e-03, -6.7004e-03, -3.5386e-02,  ...,  9.3553e-03,
         -1.0013e-02,  1.5819e-02],
        [-6.9292e-03, -7.4016e-05,  3.2825e-02,  ..., -3.5247e-02,
          2.2576e-02,  1.0087e-02],
        ...,
        [-5.2874e-03, -1.5547e-02,  6.1646e-03,  ..., -2.0338e-02,
          2.0868e-02, -3.3560e-02],
        [ 6.7798e-03,  6.0861e-03, -4.3917e-03,  ..., -2.9448e-02,
          1.9017e-02, -2.7786e-02],
        [-3.2708e-02, -1.6300e-02,  2.9779e-02,  ...,  1.0043e-02,
          2.2422e-02, -3.5540e-03]], device='cuda:0')
ModuleList(
  (0-6): 7 x Linear(in_features=64, out_features=64, bias=True)
)



ParameterList(
    (0): Parameter containing: [torch.float32 of size 7x4 (GPU 0

**NOTE** that the random linear projection is different from the joint agent and the local agents, which explain the non-transfer since we very rarely update the modules. While this is also strue for the monolithic models, this is not an issue because they are allowed to adapt their layers to that changes.