# Example for:
Clustering Experiment from all other classes of FashionMNIST to shoes (Sandal, Sneaker, Ankle boot)

## Setup and utils

In [1]:
import sys
sys.path.append('/mnt/c/Users/Arnisa/Desktop/MP/mp-tl-study')
from functions.utils import *
from functions.clustering_utils import *
from functions.visualization_utils import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load the pretrained model as a feature extractor

In [2]:
cuts = [0,1,2,3,4,5,6]

params = {
      # MODEL ARCHITECTURE PARAMS
      'depth': 6,
      'num_channels': 64,
      'activation_function': nn.ReLU,
      'kernel_size': 3,
      # TRAINING PARAMS
      'device': device,
      'lr_pretrain': 0.001,   
      'lr_fine_tune': 0.001, 
      'num_train': 40,
      'early_stop_patience': 6,
      'save_best': False,
      'save_checkpoints': False,
      'is_cnn': True,
      'is_debug': False,
      'classification_report_flag': False,
      'batch_size':64,
      # DATASET PARAMS
      'pre_train_classes': [0, 1, 2, 3, 4, 6, 8],
      'fine_tune_classes': [5, 7, 9],
      'val_split': 0.1,
      'num_workers': 0,
      'generate_dataset_seed': 42,
      # EXPERIMENT SETTING PARAMS
      'use_pooling': True,  
      'pooling_every_n_layers': 2, # add pooling after every n layers specified here. For only one pooling after all the CNN layers, this equals params['depth']
      'pooling_stride': 2,
      'freeze': True,         # VARIABLE
      'reinit': True,         # VARIABLE
      'truncate': False,      # VARIABLE
    }

In [3]:
root_dir = './data'  # Specify your data directory here
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
dataloader_wrapped = TransferLearningWrapper(params, datasets.FashionMNIST, datasets.FashionMNIST, root_dir, transform=transform)## Pretraining

In [4]:
feature_extractor = CNNFeatureExtractor(params, dataloader_wrapped.output_dim).to(device)
feature_extractor.load_state_dict(torch.load('pretrained_models/regular_classes.pth', map_location=device))

<All keys matched successfully>

### Experiments

In [6]:
scores_all = []
dataloader_wrapped.update_phase('finetune')
percentages = [0.001, 0.001, 0.01, 0.1, 0.5, 1]

# subset the dataset (same as the empirical experimenta)
for sampled_percentage in percentages:
    if sampled_percentage <= 0.01:
        repeats = 25
    elif sampled_percentage < 0.5:
        repeats = 20
    else:
        repeats = 5
    print(f"Percentage:{sampled_percentage}")

    for repeat in tqdm(range(repeats)):
        reduced_dataset_train = reduce_dataset(dataloader_wrapped.train_loader, sampled_percentage, seed=repeat)
        reduced_dataset_test = reduce_dataset(dataloader_wrapped.test_loader, sampled_percentage, seed=repeat)
        scores = get_PPR_scores(feature_extractor, reduced_dataset_train, reduced_dataset_test, device)

        for layer, ppr in scores.items():
            scores_all.append({"Percentage":sampled_percentage, "Repeat":repeat, "Layer":layer, 
                                   "PPR":ppr, "Dataset":"Finetune", "Split":"Train"})

Percentage:0.001


100%|██████████| 25/25 [05:18<00:00, 12.74s/it]


In [7]:
# save results
with open(f'results/regular_cluster.json', 'w') as f:
    json.dump(scores_all, f)