In [1]:
# prevent random files being included in dataset
!rm -rf `find -type d -name .ipynb_checkpoints`

In [2]:
import papermill as pm
import mlflow
import torch
from utils import md5_dir, set_seed
from torch.utils.data import DataLoader
import tqdm

from loss_functions import kd_loss
from datasets import TrainImageNetDataset

import torch
import torch.nn.functional as F

# if using pretrained model
from torchvision.models import ResNet50_Weights

  from .autonotebook import tqdm as notebook_tqdm


  warn(


In [3]:
set_seed(42)
torch.cuda.empty_cache()

In [4]:
# Default Parameters
run_id = "f13020b4e2304bd7837e10d17c6ea8ea"
train_data_path = "../data/ImageNet/ILSVRC/Data/CLS-LOC/train/"
# test_data_path = "data/ImageNet/ILSVRC/Data/CLS-LOC/val/"

train_data_labels_path = "../data/ImageNet/LOC_train_solution.csv"
# test_data_labels_path = "data/ImageNet/LOC_val_solution.csv"

label_mapping_path = "../data/ImageNet/LOC_synset_mapping.txt"

resnet50_weights = ResNet50_Weights.DEFAULT

preprocess = resnet50_weights.transforms()

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# Parameters
run_id = "9b0844e061f440b69f900308457aef36"
train_data_path = "../data/ImageNet/ILSVRC/Data/CLS-LOC/train/"
train_data_labels_path = "../data/ImageNet/LOC_train_solution.csv"
label_mapping_path = "../data/ImageNet/LOC_synset_mapping.txt"


In [6]:
train_dataset = TrainImageNetDataset(train_data_path, train_data_labels_path, label_mapping_path, preprocess)

# test_dataset = TestImageNetDataset(test_data_path, test_data_labels_path, label_mapping_path, preprocess)

train_dataloader = DataLoader(train_dataset, batch_size=72, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=80, shuffle=False)

../data/ImageNet/LOC_synset_mapping.txt


In [7]:
# if using pretrained model
from torchvision.models import resnet50, ResNet50_Weights, resnet18

resnet50_pretrained_weights = ResNet50_Weights.DEFAULT

teacher = resnet50(weights=resnet50_pretrained_weights)

student = resnet18(weights=None)
independent_student = resnet18(weights=None)

epochs = 100
lr = 0.001

In [8]:
def train_student(student, teacher, train_dataloader, criterion, optimizer, epochs, device):
    """
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - teacher: The pretrained model used to help the student model learn
    - train_dataloader: Dataloader for training data
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    teacher.eval()
    teacher.to(device)
    student.train()
    student.to(device)
    
    for epoch in tqdm.tqdm(range(epochs)):
        running_loss = 0.0

        for inputs, labels in tqdm.tqdm(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            teacher_predictions = teacher(inputs)
            student_predictions = student(inputs)

            loss = criterion(student_predictions, labels, teacher_predictions, 0.5, 0.5)

            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            break

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')

        # save training loss in mlflow
        with mlflow.start_run(run_id=run_id) as run:
            mlflow.log_metric("student_training_loss", average_loss)

    with mlflow.start_run(run_id=run_id) as run:
        mlflow.pytorch.log_model(
            pytorch_model=teacher.to("cpu"),
            artifact_path="teacher",
        )

        mlflow.pytorch.log_model(
            pytorch_model=student.to("cpu"),
            artifact_path="student"
        )
        
        


optimizer = torch.optim.Adam(student.parameters(), lr)
train_student(student, teacher, train_dataloader, kd_loss, optimizer, epochs, device)

  0%|          | 0/100 [00:00<?, ?it/s]




  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:01<?, ?it/s]


  1%|          | 1/100 [00:01<01:48,  1.09s/it]

Epoch [1/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  2%|▏         | 2/100 [00:01<01:15,  1.30it/s]

Epoch [2/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  3%|▎         | 3/100 [00:02<01:05,  1.48it/s]

Epoch [3/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  4%|▍         | 4/100 [00:02<00:58,  1.63it/s]

Epoch [4/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  5%|▌         | 5/100 [00:03<00:55,  1.72it/s]

Epoch [5/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  6%|▌         | 6/100 [00:03<00:52,  1.77it/s]

Epoch [6/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  7%|▋         | 7/100 [00:04<00:51,  1.80it/s]

Epoch [7/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  8%|▊         | 8/100 [00:04<00:49,  1.84it/s]

Epoch [8/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  9%|▉         | 9/100 [00:05<00:49,  1.83it/s]

Epoch [9/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 10%|█         | 10/100 [00:05<00:50,  1.78it/s]

Epoch [10/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 11%|█         | 11/100 [00:06<00:51,  1.72it/s]

Epoch [11/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 12%|█▏        | 12/100 [00:07<00:54,  1.62it/s]

Epoch [12/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 13%|█▎        | 13/100 [00:07<00:51,  1.67it/s]

Epoch [13/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 14%|█▍        | 14/100 [00:08<00:49,  1.73it/s]

Epoch [14/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 15%|█▌        | 15/100 [00:09<00:50,  1.67it/s]

Epoch [15/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 16%|█▌        | 16/100 [00:09<00:48,  1.75it/s]

Epoch [16/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 17%|█▋        | 17/100 [00:10<00:46,  1.80it/s]

Epoch [17/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 18%|█▊        | 18/100 [00:10<00:45,  1.80it/s]

Epoch [18/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 19%|█▉        | 19/100 [00:11<00:44,  1.80it/s]

Epoch [19/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 20%|██        | 20/100 [00:11<00:43,  1.83it/s]

Epoch [20/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 21%|██        | 21/100 [00:12<00:42,  1.86it/s]

Epoch [21/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 22%|██▏       | 22/100 [00:12<00:41,  1.88it/s]

Epoch [22/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 23%|██▎       | 23/100 [00:13<00:40,  1.92it/s]

Epoch [23/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 24%|██▍       | 24/100 [00:13<00:39,  1.95it/s]

Epoch [24/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 25%|██▌       | 25/100 [00:14<00:38,  1.95it/s]

Epoch [25/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 26%|██▌       | 26/100 [00:14<00:38,  1.92it/s]

Epoch [26/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 27%|██▋       | 27/100 [00:15<00:37,  1.92it/s]

Epoch [27/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 28%|██▊       | 28/100 [00:15<00:37,  1.93it/s]

Epoch [28/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 29%|██▉       | 29/100 [00:16<00:37,  1.91it/s]

Epoch [29/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 30%|███       | 30/100 [00:16<00:36,  1.91it/s]

Epoch [30/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 31%|███       | 31/100 [00:17<00:39,  1.77it/s]

Epoch [31/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 32%|███▏      | 32/100 [00:18<00:37,  1.81it/s]

Epoch [32/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 33%|███▎      | 33/100 [00:18<00:36,  1.81it/s]

Epoch [33/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 34%|███▍      | 34/100 [00:19<00:36,  1.81it/s]

Epoch [34/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 35%|███▌      | 35/100 [00:19<00:36,  1.78it/s]

Epoch [35/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 36%|███▌      | 36/100 [00:20<00:35,  1.82it/s]

Epoch [36/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 37%|███▋      | 37/100 [00:20<00:34,  1.81it/s]

Epoch [37/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 38%|███▊      | 38/100 [00:21<00:34,  1.81it/s]

Epoch [38/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 39%|███▉      | 39/100 [00:21<00:33,  1.80it/s]

Epoch [39/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 40%|████      | 40/100 [00:22<00:35,  1.70it/s]

Epoch [40/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 41%|████      | 41/100 [00:23<00:32,  1.79it/s]

Epoch [41/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 42%|████▏     | 42/100 [00:23<00:32,  1.81it/s]

Epoch [42/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 43%|████▎     | 43/100 [00:24<00:31,  1.83it/s]

Epoch [43/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 44%|████▍     | 44/100 [00:24<00:30,  1.81it/s]

Epoch [44/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 45%|████▌     | 45/100 [00:25<00:29,  1.85it/s]

Epoch [45/100], Loss: 0.0003





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 46%|████▌     | 46/100 [00:25<00:31,  1.71it/s]

Epoch [46/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 47%|████▋     | 47/100 [00:26<00:29,  1.77it/s]

Epoch [47/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 48%|████▊     | 48/100 [00:26<00:28,  1.80it/s]

Epoch [48/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 49%|████▉     | 49/100 [00:27<00:28,  1.78it/s]

Epoch [49/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 50%|█████     | 50/100 [00:28<00:27,  1.84it/s]

Epoch [50/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 51%|█████     | 51/100 [00:28<00:26,  1.86it/s]

Epoch [51/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 52%|█████▏    | 52/100 [00:29<00:25,  1.85it/s]

Epoch [52/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 53%|█████▎    | 53/100 [00:29<00:25,  1.84it/s]

Epoch [53/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 54%|█████▍    | 54/100 [00:30<00:25,  1.81it/s]

Epoch [54/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 55%|█████▌    | 55/100 [00:30<00:24,  1.86it/s]

Epoch [55/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 56%|█████▌    | 56/100 [00:31<00:24,  1.83it/s]

Epoch [56/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 57%|█████▋    | 57/100 [00:31<00:23,  1.83it/s]

Epoch [57/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 58%|█████▊    | 58/100 [00:32<00:23,  1.82it/s]

Epoch [58/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 59%|█████▉    | 59/100 [00:32<00:22,  1.82it/s]

Epoch [59/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 60%|██████    | 60/100 [00:33<00:21,  1.83it/s]

Epoch [60/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 61%|██████    | 61/100 [00:34<00:21,  1.82it/s]

Epoch [61/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 62%|██████▏   | 62/100 [00:34<00:21,  1.76it/s]

Epoch [62/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 63%|██████▎   | 63/100 [00:35<00:20,  1.80it/s]

Epoch [63/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 64%|██████▍   | 64/100 [00:35<00:19,  1.82it/s]

Epoch [64/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 65%|██████▌   | 65/100 [00:36<00:19,  1.80it/s]

Epoch [65/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 66%|██████▌   | 66/100 [00:36<00:18,  1.84it/s]

Epoch [66/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 67%|██████▋   | 67/100 [00:37<00:18,  1.83it/s]

Epoch [67/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 68%|██████▊   | 68/100 [00:37<00:17,  1.84it/s]

Epoch [68/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 69%|██████▉   | 69/100 [00:38<00:16,  1.84it/s]

Epoch [69/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 70%|███████   | 70/100 [00:39<00:16,  1.83it/s]

Epoch [70/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 71%|███████   | 71/100 [00:39<00:16,  1.80it/s]

Epoch [71/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 72%|███████▏  | 72/100 [00:40<00:15,  1.83it/s]

Epoch [72/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 73%|███████▎  | 73/100 [00:40<00:14,  1.86it/s]

Epoch [73/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 74%|███████▍  | 74/100 [00:41<00:13,  1.86it/s]

Epoch [74/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 75%|███████▌  | 75/100 [00:41<00:13,  1.85it/s]

Epoch [75/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 76%|███████▌  | 76/100 [00:42<00:13,  1.82it/s]

Epoch [76/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 77%|███████▋  | 77/100 [00:42<00:12,  1.82it/s]

Epoch [77/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 78%|███████▊  | 78/100 [00:43<00:11,  1.88it/s]

Epoch [78/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 79%|███████▉  | 79/100 [00:43<00:11,  1.88it/s]

Epoch [79/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 80%|████████  | 80/100 [00:44<00:10,  1.87it/s]

Epoch [80/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 81%|████████  | 81/100 [00:44<00:10,  1.81it/s]

Epoch [81/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 82%|████████▏ | 82/100 [00:45<00:09,  1.85it/s]

Epoch [82/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 83%|████████▎ | 83/100 [00:46<00:09,  1.88it/s]

Epoch [83/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 84%|████████▍ | 84/100 [00:46<00:08,  1.90it/s]

Epoch [84/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 85%|████████▌ | 85/100 [00:47<00:07,  1.89it/s]

Epoch [85/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 86%|████████▌ | 86/100 [00:47<00:08,  1.75it/s]

Epoch [86/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 87%|████████▋ | 87/100 [00:48<00:07,  1.79it/s]

Epoch [87/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 88%|████████▊ | 88/100 [00:48<00:06,  1.84it/s]

Epoch [88/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 89%|████████▉ | 89/100 [00:49<00:05,  1.86it/s]

Epoch [89/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 90%|█████████ | 90/100 [00:49<00:05,  1.91it/s]

Epoch [90/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 91%|█████████ | 91/100 [00:50<00:04,  1.92it/s]

Epoch [91/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 92%|█████████▏| 92/100 [00:50<00:04,  1.91it/s]

Epoch [92/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 93%|█████████▎| 93/100 [00:51<00:03,  1.90it/s]

Epoch [93/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 94%|█████████▍| 94/100 [00:51<00:03,  1.80it/s]

Epoch [94/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 95%|█████████▌| 95/100 [00:52<00:02,  1.82it/s]

Epoch [95/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 96%|█████████▌| 96/100 [00:53<00:02,  1.82it/s]

Epoch [96/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 97%|█████████▋| 97/100 [00:53<00:01,  1.85it/s]

Epoch [97/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 98%|█████████▊| 98/100 [00:54<00:01,  1.88it/s]

Epoch [98/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 99%|█████████▉| 99/100 [00:54<00:00,  1.83it/s]

Epoch [99/100], Loss: 0.0002





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


100%|██████████| 100/100 [00:55<00:00,  1.80it/s]

100%|██████████| 100/100 [00:55<00:00,  1.81it/s]




Epoch [100/100], Loss: 0.0002




In [9]:
def train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device):
    """
    - teacher: The pretrained model used to help the student model learn
    - student: The smaller, untrained model that uses the teacher's output as an additional label
    - criterion: The loss function
    - optimizer: The optimization algorithm
    - epochs: Number of training epochs
    - device: Device to run training
    """
    independent_student.train()
    independent_student.to(device)
    
    for epoch in tqdm.tqdm(range(epochs)):
        running_loss = 0.0

        for inputs, labels in tqdm.tqdm(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = F.one_hot(labels, num_classes=1000).float()

            # Zero the gradients 
            optimizer.zero_grad()

            independent_student_predictions = independent_student(inputs)

            loss = criterion(independent_student_predictions, labels)

            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            break

        average_loss = running_loss / len(train_dataloader)
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {average_loss:.4f}')


        # save training loss in mlflow
        with mlflow.start_run(run_id=run_id) as run:
            mlflow.log_metric("independent_student_training_loss", average_loss)

    with mlflow.start_run(run_id=run_id) as run:
        mlflow.pytorch.log_model(
            pytorch_model=independent_student.to("cpu"),
            artifact_path="independent_student"
        )

optimizer = torch.optim.Adam(independent_student.parameters(), lr)
criterion = torch.nn.CrossEntropyLoss()
train_independent_student(independent_student, train_dataloader, criterion, optimizer, epochs, device)

  0%|          | 0/100 [00:00<?, ?it/s]




  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  1%|          | 1/100 [00:00<00:40,  2.45it/s]

Epoch [1/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  2%|▏         | 2/100 [00:00<00:42,  2.30it/s]

Epoch [2/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  3%|▎         | 3/100 [00:01<00:42,  2.30it/s]

Epoch [3/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  4%|▍         | 4/100 [00:01<00:39,  2.45it/s]

Epoch [4/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  5%|▌         | 5/100 [00:02<00:37,  2.51it/s]

Epoch [5/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  6%|▌         | 6/100 [00:02<00:41,  2.26it/s]

Epoch [6/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  7%|▋         | 7/100 [00:03<00:45,  2.03it/s]

Epoch [7/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  8%|▊         | 8/100 [00:03<00:42,  2.17it/s]

Epoch [8/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


  9%|▉         | 9/100 [00:03<00:39,  2.30it/s]

Epoch [9/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 10%|█         | 10/100 [00:04<00:38,  2.36it/s]

Epoch [10/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 11%|█         | 11/100 [00:04<00:36,  2.42it/s]

Epoch [11/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 12%|█▏        | 12/100 [00:05<00:36,  2.40it/s]

Epoch [12/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 13%|█▎        | 13/100 [00:05<00:35,  2.48it/s]

Epoch [13/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 14%|█▍        | 14/100 [00:05<00:35,  2.44it/s]

Epoch [14/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 15%|█▌        | 15/100 [00:06<00:34,  2.46it/s]

Epoch [15/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 16%|█▌        | 16/100 [00:06<00:34,  2.43it/s]

Epoch [16/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 17%|█▋        | 17/100 [00:07<00:33,  2.46it/s]

Epoch [17/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 18%|█▊        | 18/100 [00:07<00:32,  2.53it/s]

Epoch [18/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 19%|█▉        | 19/100 [00:07<00:31,  2.55it/s]

Epoch [19/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 20%|██        | 20/100 [00:08<00:32,  2.46it/s]

Epoch [20/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 21%|██        | 21/100 [00:08<00:32,  2.44it/s]

Epoch [21/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 22%|██▏       | 22/100 [00:09<00:32,  2.41it/s]

Epoch [22/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 23%|██▎       | 23/100 [00:09<00:31,  2.44it/s]

Epoch [23/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 24%|██▍       | 24/100 [00:09<00:30,  2.46it/s]

Epoch [24/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 25%|██▌       | 25/100 [00:10<00:31,  2.36it/s]

Epoch [25/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 26%|██▌       | 26/100 [00:10<00:30,  2.42it/s]

Epoch [26/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 27%|██▋       | 27/100 [00:11<00:29,  2.47it/s]

Epoch [27/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 28%|██▊       | 28/100 [00:11<00:28,  2.49it/s]

Epoch [28/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 29%|██▉       | 29/100 [00:11<00:27,  2.56it/s]

Epoch [29/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 30%|███       | 30/100 [00:12<00:28,  2.49it/s]

Epoch [30/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 31%|███       | 31/100 [00:12<00:29,  2.36it/s]

Epoch [31/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 32%|███▏      | 32/100 [00:13<00:28,  2.38it/s]

Epoch [32/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 33%|███▎      | 33/100 [00:13<00:27,  2.44it/s]

Epoch [33/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 34%|███▍      | 34/100 [00:14<00:26,  2.49it/s]

Epoch [34/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 35%|███▌      | 35/100 [00:14<00:27,  2.39it/s]

Epoch [35/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 36%|███▌      | 36/100 [00:14<00:26,  2.40it/s]

Epoch [36/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 37%|███▋      | 37/100 [00:15<00:25,  2.48it/s]

Epoch [37/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 38%|███▊      | 38/100 [00:15<00:25,  2.45it/s]

Epoch [38/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 39%|███▉      | 39/100 [00:16<00:24,  2.52it/s]

Epoch [39/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 40%|████      | 40/100 [00:16<00:24,  2.44it/s]

Epoch [40/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 41%|████      | 41/100 [00:16<00:24,  2.41it/s]

Epoch [41/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 42%|████▏     | 42/100 [00:17<00:26,  2.23it/s]

Epoch [42/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 43%|████▎     | 43/100 [00:17<00:24,  2.35it/s]

Epoch [43/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 44%|████▍     | 44/100 [00:18<00:23,  2.37it/s]

Epoch [44/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 45%|████▌     | 45/100 [00:18<00:22,  2.40it/s]

Epoch [45/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 46%|████▌     | 46/100 [00:19<00:21,  2.47it/s]

Epoch [46/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 47%|████▋     | 47/100 [00:19<00:21,  2.52it/s]

Epoch [47/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 48%|████▊     | 48/100 [00:19<00:20,  2.49it/s]

Epoch [48/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 49%|████▉     | 49/100 [00:20<00:22,  2.31it/s]

Epoch [49/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 50%|█████     | 50/100 [00:20<00:21,  2.32it/s]

Epoch [50/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 51%|█████     | 51/100 [00:21<00:20,  2.39it/s]

Epoch [51/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 52%|█████▏    | 52/100 [00:21<00:19,  2.51it/s]

Epoch [52/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 53%|█████▎    | 53/100 [00:21<00:18,  2.55it/s]

Epoch [53/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 54%|█████▍    | 54/100 [00:22<00:18,  2.53it/s]

Epoch [54/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 55%|█████▌    | 55/100 [00:22<00:17,  2.53it/s]

Epoch [55/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 56%|█████▌    | 56/100 [00:23<00:17,  2.51it/s]

Epoch [56/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 57%|█████▋    | 57/100 [00:23<00:16,  2.56it/s]

Epoch [57/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 58%|█████▊    | 58/100 [00:23<00:17,  2.45it/s]

Epoch [58/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 59%|█████▉    | 59/100 [00:24<00:17,  2.40it/s]

Epoch [59/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 60%|██████    | 60/100 [00:24<00:17,  2.30it/s]

Epoch [60/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 61%|██████    | 61/100 [00:25<00:16,  2.39it/s]

Epoch [61/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 62%|██████▏   | 62/100 [00:25<00:15,  2.43it/s]

Epoch [62/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 63%|██████▎   | 63/100 [00:26<00:14,  2.47it/s]

Epoch [63/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 64%|██████▍   | 64/100 [00:26<00:14,  2.53it/s]

Epoch [64/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 65%|██████▌   | 65/100 [00:26<00:13,  2.58it/s]

Epoch [65/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 66%|██████▌   | 66/100 [00:27<00:13,  2.50it/s]

Epoch [66/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 67%|██████▋   | 67/100 [00:27<00:13,  2.36it/s]

Epoch [67/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 68%|██████▊   | 68/100 [00:28<00:13,  2.43it/s]

Epoch [68/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 69%|██████▉   | 69/100 [00:28<00:12,  2.43it/s]

Epoch [69/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 70%|███████   | 70/100 [00:28<00:12,  2.43it/s]

Epoch [70/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 71%|███████   | 71/100 [00:29<00:11,  2.48it/s]

Epoch [71/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 72%|███████▏  | 72/100 [00:29<00:12,  2.32it/s]

Epoch [72/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 73%|███████▎  | 73/100 [00:30<00:12,  2.13it/s]

Epoch [73/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 74%|███████▍  | 74/100 [00:30<00:12,  2.11it/s]

Epoch [74/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 75%|███████▌  | 75/100 [00:31<00:11,  2.20it/s]

Epoch [75/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 76%|███████▌  | 76/100 [00:31<00:10,  2.25it/s]

Epoch [76/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 77%|███████▋  | 77/100 [00:31<00:09,  2.36it/s]

Epoch [77/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 78%|███████▊  | 78/100 [00:32<00:09,  2.41it/s]

Epoch [78/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 79%|███████▉  | 79/100 [00:32<00:08,  2.38it/s]

Epoch [79/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 80%|████████  | 80/100 [00:33<00:08,  2.40it/s]

Epoch [80/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 81%|████████  | 81/100 [00:33<00:08,  2.34it/s]

Epoch [81/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 82%|████████▏ | 82/100 [00:34<00:07,  2.38it/s]

Epoch [82/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 83%|████████▎ | 83/100 [00:34<00:06,  2.47it/s]

Epoch [83/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 84%|████████▍ | 84/100 [00:34<00:06,  2.39it/s]

Epoch [84/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 85%|████████▌ | 85/100 [00:35<00:06,  2.46it/s]

Epoch [85/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 86%|████████▌ | 86/100 [00:35<00:05,  2.45it/s]

Epoch [86/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 87%|████████▋ | 87/100 [00:36<00:05,  2.24it/s]

Epoch [87/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 88%|████████▊ | 88/100 [00:36<00:05,  2.30it/s]

Epoch [88/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 89%|████████▉ | 89/100 [00:37<00:04,  2.29it/s]

Epoch [89/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 90%|█████████ | 90/100 [00:37<00:04,  2.31it/s]

Epoch [90/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 91%|█████████ | 91/100 [00:37<00:03,  2.41it/s]

Epoch [91/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 92%|█████████▏| 92/100 [00:38<00:03,  2.40it/s]

Epoch [92/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 93%|█████████▎| 93/100 [00:38<00:02,  2.48it/s]

Epoch [93/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 94%|█████████▍| 94/100 [00:39<00:02,  2.50it/s]

Epoch [94/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 95%|█████████▌| 95/100 [00:39<00:02,  2.50it/s]

Epoch [95/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 96%|█████████▌| 96/100 [00:39<00:01,  2.42it/s]

Epoch [96/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 97%|█████████▋| 97/100 [00:40<00:01,  2.38it/s]

Epoch [97/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 98%|█████████▊| 98/100 [00:40<00:00,  2.47it/s]

Epoch [98/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


 99%|█████████▉| 99/100 [00:41<00:00,  2.48it/s]

Epoch [99/100], Loss: 0.0004





  0%|          | 0/17794 [00:00<?, ?it/s]

[A

  0%|          | 0/17794 [00:00<?, ?it/s]


100%|██████████| 100/100 [00:41<00:00,  2.51it/s]

100%|██████████| 100/100 [00:41<00:00,  2.41it/s]




Epoch [100/100], Loss: 0.0004
