In [1]:
# prevent random files being included in dataset
!rm -rf `find -type d -name .ipynb_checkpoints`

In [2]:
import papermill as pm
import mlflow
import torch
from utils import md5_dir, set_seed
from torch.utils.data import DataLoader
from tqdm import tqdm

from loss_functions import kd_loss
from datasets import TrainImageNetDataset

import torch
import torch.nn.functional as F

# if using pretrained model
from torchvision.models import ResNet50_Weights

  from .autonotebook import tqdm as notebook_tqdm
  warn(


In [3]:
set_seed(42)
torch.cuda.empty_cache()

In [4]:
# Default Parameters
run_id = "5363b9a6f6954354b7bb68d535b5ea88"
train_data_path = "../data/ImageNet/ILSVRC/Data/CLS-LOC/train/"
# test_data_path = "data/ImageNet/ILSVRC/Data/CLS-LOC/val/"

train_data_labels_path = "../data/ImageNet/LOC_train_solution.csv"
# test_data_labels_path = "data/ImageNet/LOC_val_solution.csv"

label_mapping_path = "../data/ImageNet/LOC_synset_mapping.txt"

resnet50_weights = ResNet50_Weights.DEFAULT

preprocess = resnet50_weights.transforms()

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
train_dataset = TrainImageNetDataset(train_data_path, train_data_labels_path, label_mapping_path, preprocess)

# test_dataset = TestImageNetDataset(test_data_path, test_data_labels_path, label_mapping_path, preprocess)

train_dataloader = DataLoader(train_dataset, batch_size=80, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=80, shuffle=False)

../data/ImageNet/LOC_synset_mapping.txt


In [6]:
# if using pretrained model
from torchvision.models import resnet50, ResNet50_Weights, resnet18

resnet50_pretrained_weights = ResNet50_Weights.DEFAULT

teacher = resnet50(weights=resnet50_pretrained_weights)

student = resnet18(weights=None)
independent_student = resnet18(weights=None)

In [8]:
def train_student(student, teacher, train_dataloader, criterion, optimizer, epochs, device):
    """
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - teacher: The pretrained model used to help the student model learn
    - train_dataloader: Dataloader for training data
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    teacher.eval()
    teacher.to(device)
    student.train()
    student.to(device)
    epoch_print = 4000
    
    for epoch in range(epochs):
        running_loss = 0.0
        loss_cntr = 0
        previous_loss = 0.0

        for inputs, labels in tqdm(train_dataloader, position = 0, leave = True):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            teacher_predictions = teacher(inputs)
            student_predictions = student(inputs)

            loss = criterion(student_predictions, labels, teacher_predictions, 0.5, 0.5)

            loss.backward()

            optimizer.step()

            previous_loss += loss
            if loss_cntr > 0 and loss_cntr % epoch_print == 0:
                print("Current loss:", loss)
                print("Running loss:", running_loss)
                print("Loss delta:", previous_loss - loss)
                print("Avg loss per epoch", previous_loss/epoch_print)
                previous_loss = 0.0
            loss_cntr += 1
            
            running_loss += loss.item()

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')

        # # save training loss in mlflow
        # with mlflow.start_run(run_id=run_id) as run:
        #     mlflow.log_metric("student_training_loss", average_loss)

    # with mlflow.start_run(run_id=run_id) as run:
    #     mlflow.pytorch.log_model(
    #         pytorch_model=teacher.to("cpu"),
    #         artifact_path="teacher",
    #     )

    #     mlflow.pytorch.log_model(
    #         pytorch_model=student.to("cpu"),
    #         artifact_path="student"
    #     )
        
        


optimizer = torch.optim.Adam(student.parameters(), lr = 0.0001)
epochs = 6
train_student(student, teacher, train_dataloader, kd_loss, optimizer, epochs, device)

 25%|████████▉                           | 4000/16015 [39:02<2:00:09,  1.67it/s]

Current loss: tensor(3.2187, device='cuda:0', grad_fn=<AddBackward0>)
Running loss: 13855.61013007164
Loss delta: tensor(13855.6123, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(3.4647, device='cuda:0', grad_fn=<DivBackward0>)


 50%|████████████████▉                 | 8000/16015 [1:18:02<1:20:19,  1.66it/s]

Current loss: tensor(2.5929, device='cuda:0', grad_fn=<AddBackward0>)
Running loss: 25140.529168605804
Loss delta: tensor(11281.6904, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(2.8211, device='cuda:0', grad_fn=<DivBackward0>)


 75%|██████████████████████████▏        | 12000/16015 [1:57:06<38:32,  1.74it/s]

Current loss: tensor(2.0410, device='cuda:0', grad_fn=<AddBackward0>)
Running loss: 35089.33047389984
Loss delta: tensor(9946.2217, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(2.4871, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████████████████████████████▉| 16000/16015 [2:36:06<00:08,  1.76it/s]

Current loss: tensor(2.0747, device='cuda:0', grad_fn=<AddBackward0>)
Running loss: 44155.25842308998
Loss delta: tensor(9063.8809, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(2.2665, device='cuda:0', grad_fn=<DivBackward0>)


100%|███████████████████████████████████| 16015/16015 [2:36:15<00:00,  1.71it/s]

Epoch [1/6], Loss: 2.7592





MlflowException: Run '5363b9a6f6954354b7bb68d535b5ea88' not found

In [None]:
def train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device):
    """
    - teacher: The pretrained model used to help the student model learn
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    independent_student.train()
    independent_student.to(device)
    
    for epoch in tqdm.tqdm(range(epochs)):
        running_loss = 0.0

        for inputs, labels in tqdm.tqdm(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            independent_student_predictions = independent_student(inputs)

            loss = criterion(independent_student_predictions, labels)

            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            break

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')


        # save training loss in mlflow
        with mlflow.start_run(run_id=run_id) as run:
            mlflow.log_metric("independent_student_training_loss", average_loss)

    with mlflow.start_run(run_id=run_id) as run:
        mlflow.pytorch.log_model(
            pytorch_model=independent_student.to("cpu"),
            artifact_path="independent_student"
        )

optimizer = torch.optim.Adam(independent_student.parameters(), lr = 0.0001)
epochs = 1
criterion = torch.nn.CrossEntropyLoss()
train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device)