In [1]:
# prevent random files being included in dataset
!rm -rf `find -type d -name .ipynb_checkpoints`

In [2]:
import papermill as pm
import mlflow
import torch
from utils import md5_dir, set_seed
from torch.utils.data import DataLoader
from functools import partial
from tqdm import tqdm
# tqdm = partial(tqdm, position = 0, leave = True)

from loss_functions import kd_loss
from datasets import TrainImageNetDataset

import torch
import torch.nn.functional as F

from utils import *

# if using pretrained model
from torchvision.models import ResNet50_Weights

  from .autonotebook import tqdm as notebook_tqdm
  warn(


In [3]:
set_seed(42)
torch.cuda.empty_cache()

In [4]:
# Default Parameters
run_id = "5363b9a6f6954354b7bb68d535b5ea88"
train_data_path = "../data/ImageNet/ILSVRC/Data/CLS-LOC/train/"
# test_data_path = "data/ImageNet/ILSVRC/Data/CLS-LOC/val/"

train_data_labels_path = "../data/ImageNet/LOC_train_solution.csv"
# test_data_labels_path = "data/ImageNet/LOC_val_solution.csv"

label_mapping_path = "../data/ImageNet/LOC_synset_mapping.txt"

resnet50_weights = ResNet50_Weights.DEFAULT

preprocess = resnet50_weights.transforms()

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
train_dataset = TrainImageNetDataset(train_data_path, train_data_labels_path, label_mapping_path, preprocess, data_reduction = 0)

# test_dataset = TestImageNetDataset(test_data_path, test_data_labels_path, label_mapping_path, preprocess)

train_dataloader = DataLoader(train_dataset, batch_size=80, shuffle=True)

In [6]:
# if using pretrained model
from torchvision.models import resnet50, ResNet50_Weights, resnet18

resnet50_pretrained_weights = ResNet50_Weights.DEFAULT

epochs = 3
lr = 0.0001

In [None]:
teacher = resnet50(weights=resnet50_pretrained_weights)
student = resnet18(weights=None)
independent_student = resnet18(weights=None)


ind_optimizer = torch.optim.Adam(student.parameters(), lr)
optimizer = torch.optim.Adam(independent_student.parameters(), lr)

ind_criterion = torch.nn.CrossEntropyLoss()
criterion = kd_loss

ind_name = "independent_student"
std_name = "student"

components = {
    ind_name: {"model": independent_student, 
               "opt": ind_optimizer, 
               "criterion": ind_criterion, 
               "running_loss": 0,
               "previous_loss": 0.0
              },
    std_name: {"model": student, 
               "opt": optimizer, 
               "criterion": criterion,
               "running_loss": 0,
               "previous_loss": 0.0
              }
}

train_models(components, teacher, train_dataloader, epochs, device, run_id)
# train_student(student, teacher, train_dataloader, kd_loss, optimizer, epochs, device)
# train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device)

  1%|▏                                    | 101/16015 [00:56<2:27:22,  1.80it/s]

student
Current loss: tensor(4.4005, device='cuda:0', grad_fn=<AddBackward0>)
Loss delta: tensor(4.4005, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch 0.0


  1%|▍                                    | 201/16015 [01:54<2:37:07,  1.68it/s]

student
Current loss: tensor(4.3817, device='cuda:0', grad_fn=<AddBackward0>)
Loss delta: tensor(-0.0188, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(0.0440, device='cuda:0', grad_fn=<DivBackward0>)


  2%|▋                                    | 301/16015 [02:52<2:31:52,  1.72it/s]

student
Current loss: tensor(4.4289, device='cuda:0', grad_fn=<AddBackward0>)
Loss delta: tensor(0.0472, device='cuda:0', grad_fn=<SubBackward0>)
Avg loss per epoch tensor(0.0438, device='cuda:0', grad_fn=<DivBackward0>)


  2%|▊                                    | 375/16015 [03:35<2:35:45,  1.67it/s]

In [1]:
def train_student(student, teacher, train_dataloader, criterion, optimizer, epochs, device):
    """
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - teacher: The pretrained model used to help the student model learn
    - train_dataloader: Dataloader for training data
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    teacher.eval()
    teacher.to(device)
    student.train()
    student.to(device)

    # with tqdm as epoch:
        # for i in tqdm(range(epochs)):
    # for epoch in tqdm(range(epochs), leave = True, position = 0):
    # in tqdm(train_dataloader, leave = True, position = 0):
    for epoch in tqdm(range(epochs), leave = True, position = 0):
        running_loss = 0.0
        sampleNum = 0
        currentLoss = 0

        for inputs, labels in tqdm(train_dataloader, leave = True, position = 0):
        # for inputs, labels in train_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            teacher_predictions = teacher(inputs)
            student_predictions = student(inputs)

            loss = criterion(student_predictions, labels, teacher_predictions, 0.5, 0.5)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            currentLoss += loss.item()

            # if sampleNum % 100 == 0 and sampleNum > 0: # write this to a file somewhere else so tqdm doesnt mess up
            #     print("loss: ", str(currentLoss/100))
            #     print("total loss: ", str(running_loss/sampleNum))
            #     currentLoss = 0
            sampleNum += 1
            # print(str(running_loss) + " ", end = '')
            # break

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')

        # save training loss in mlflow
        with mlflow.start_run(run_id=run_id) as run:
            mlflow.log_metric("student_training_loss", average_loss)

    with mlflow.start_run(run_id=run_id) as run:
        mlflow.pytorch.log_model(
            pytorch_model=teacher.to("cpu"),
            artifact_path="teacher",)

        mlflow.pytorch.log_model(
            pytorch_model=student.to("cpu"),
            artifact_path="student")
        

optimizer = torch.optim.Adam(student.parameters(), lr)
train_student(student, teacher, train_dataloader, kd_loss, optimizer, epochs, device)

NameError: name 'torch' is not defined

In [8]:
def train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device):
    """
    - teacher: The pretrained model used to help the student model learn
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    independent_student.train()
    independent_student.to(device)
    
    for epoch in tqdm.tqdm(range(epochs)):
        running_loss = 0.0

        for inputs, labels in tqdm(train_dataloader, leave = True, position = 0):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            independent_student_predictions = independent_student(inputs)

            loss = criterion(independent_student_predictions, labels)

            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            break

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')


        # save training loss in mlflow
        with mlflow.start_run(run_id=run_id) as run:
            mlflow.log_metric("independent_student_training_loss", average_loss)

    with mlflow.start_run(run_id=run_id) as run:
        mlflow.pytorch.log_model(
            pytorch_model=independent_student.to("cpu"),
            artifact_path="independent_student"
        )

optimizer = torch.optim.Adam(independent_student.parameters(), lr)
criterion = torch.nn.CrossEntropyLoss()
train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device)

AttributeError: type object 'tqdm' has no attribute 'tqdm'