In [1]:
# Necessary imports
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchinfo import summary
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
# For reproducibility
# torch.manual_seed(0)
# torch.backends.cudnn.benchmark = True

In [3]:
print(torch.cuda.device_count())
print(torch.cuda.is_available())

1
True


In [4]:
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [5]:
def get_df(img_path):
    image, label = [], []

    i = 0

    for get_img_name in os.listdir(img_path):
        image += [os.path.join(img_path, get_img_name)]
        label += [get_img_name.split('.')[0]]
        
        i = i+1

    PathDF = pd.DataFrame({'image': image, 'label': label})
    print(i)
    PathDF.head()
    return PathDF

In [6]:
transform = transforms.Compose([
            transforms.Resize([224,224]),
            transforms.ToTensor()
            ])

In [7]:
class mod_Dataset(Dataset):
    def __init__(self, path_df, transform=None):
        self.path_df = path_df
        self.transform = transform

    def __len__(self):
        return self.path_df.shape[0]
    
    def __getitem__(self, idx):
        if self.transform is not None:
            image = self.transform(Image.open(self.path_df.iloc[idx]['image']))

            if self.path_df.iloc[idx]['label'] == 'cat':
                label = torch.tensor([1, 0], dtype=torch.float32)
            else:
                label = torch.tensor([0, 1], dtype=torch.float32)

        return image, label

In [8]:
ROOT_PATH = '/home/yasaisen/Desktop/13_research/research_main/lab_02'

In [9]:
train_df = get_df(os.path.join(ROOT_PATH, 'dogs-vs-cats/train'))

validation_fraction = 0.15
test_fraction = 0.10

train2rest = validation_fraction + test_fraction
test2valid = validation_fraction / train2rest


train_df, rest = train_test_split(train_df, random_state=42,
                                 test_size = train2rest)

test_df, valid_df = train_test_split(rest, random_state=42,
                                    test_size = test2valid)

train_data = mod_Dataset(train_df, transform)
valid_data = mod_Dataset(valid_df, transform)
test_data = mod_Dataset(test_df, transform)

train_loader = DataLoader(train_data, batch_size=8, shuffle=True , num_workers=0, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_data, batch_size=8, shuffle=False, num_workers=0)
test_loader = DataLoader(test_data, batch_size=8, shuffle=False, num_workers=0)

25000


In [10]:
class resnet18(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base_model = torchvision.models.resnet18(weights=None)
        self.fc1 = nn.Linear(1000, num_classes)

    def forward(self, input):
        output = self.base_model(input)
        output = self.fc1(output)
        return output

In [11]:
class resnet34(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base_model = torchvision.models.resnet34(weights=None)
        self.fc1 = nn.Linear(1000, num_classes)

    def forward(self, input):
        output = self.base_model(input)
        output = self.fc1(output)
        return output

In [12]:
# model = StudentModel(2)
# # print(model)
# t = torch.randn((32, 3, 224, 224))
# print(t.shape)
# get = model(t)
# print(get.shape)

In [13]:
def check_accuracy(loader, model, device):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            scores = model(x)
            _, predictions = scores.max(1)
            _, y = y.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return (num_correct/num_samples).item()
  

def train(epochs, model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


    for epoch in range(epochs):
        model.train()
        losses = []

        pbar = tqdm(train_loader, total=len(train_loader), position=0, leave=True, desc=f"Epoch {epoch}")
        for data, targets in pbar:
            data = data.to(device)
            targets = targets.to(device)

            # forward
            scores = model(data)
            loss = criterion(scores, targets)
            losses.append(loss.item())
            # backward
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
        
        avg_loss = sum(losses) / len(losses)
        acc = check_accuracy(test_loader, model, device)
        print(f"Loss:{avg_loss:.8f}\tAccuracy:{acc:.8f}")

    return model

In [14]:
def train_step(
    teacher_model,
    student_model,
    optimizer,
    student_loss_fn,
    divergence_loss_fn,
    temp,
    alpha,
    epoch,
    device
):
    losses = []
    pbar = tqdm(train_loader, total=len(train_loader), position=0, leave=True, desc=f"Epoch {epoch}")
    for data, targets in pbar:
        # Get data to cuda if possible
        data = data.to(device)
        targets = targets.to(device)

        # forward
        with torch.no_grad():
            teacher_preds = teacher_model(data)

        student_preds = student_model(data)
        student_loss = student_loss_fn(student_preds, targets)
        
        ditillation_loss = divergence_loss_fn(
            F.log_softmax(student_preds / temp, dim=1),
            F.softmax(teacher_preds / temp, dim=1)
        )
        loss = alpha * student_loss + (1 - alpha) * ditillation_loss
        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
    
    avg_loss = sum(losses) / len(losses)
    return avg_loss
  
def main(epochs, teacher, student, temp=7, alpha=0.3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    teacher = teacher.to(device)
    student = student.to(device)
    student_loss_fn = nn.CrossEntropyLoss()
    divergence_loss_fn = nn.KLDivLoss(reduction="batchmean")
    optimizer = torch.optim.Adam(student.parameters(), lr=1e-4)

    teacher.eval()
    student.train()
    for epoch in range(epochs):
        loss = train_step(
            teacher,
            student,
            optimizer,
            student_loss_fn,
            divergence_loss_fn,
            temp,
            alpha,
            epoch,
            device
        )
        acc = check_accuracy(test_loader, student, device)
        print(f"Loss:{loss:.8f}\tAccuracy:{acc:.8f}")

In [15]:
# resnet18_model = resnet18(2)
# trained_resnet18_model = train(3, resnet18_model)
# torch.save({'state_dict': trained_resnet18_model.state_dict(), 'model' : trained_resnet18_model,}, os.path.join(ROOT_PATH, 'resnet18.pth'))

In [16]:
# resnet34_model = resnet34(2)
# trained_resnet34_model = train(3, resnet34_model)
# torch.save({'state_dict': trained_resnet34_model.state_dict(), 'model' : trained_resnet34_model,}, os.path.join(ROOT_PATH, 'resnet34.pth'))

In [17]:
# trained_resnet18_model = torch.load(os.path.join(ROOT_PATH, 'resnet18.pth'))['model']
# trained_resnet34_model = torch.load(os.path.join(ROOT_PATH, 'resnet34.pth'))['model']

In [18]:
# TE=34, ST=18

trained_resnet34_model = torch.load(os.path.join(ROOT_PATH, 'resnet34.pth'))['model']
trained_resnet34_model = trained_resnet34_model.cuda()

student_model = resnet18(2)
student_model = student_model.cuda()

main(3, trained_resnet34_model, student_model, alpha=0)

Epoch 0: 100%|██████████| 2343/2343 [01:27<00:00, 26.68it/s]


Loss:0.03578717	Accuracy:0.77919996


Epoch 1: 100%|██████████| 2343/2343 [01:27<00:00, 26.75it/s]


Loss:0.00920615	Accuracy:0.88799995


Epoch 2: 100%|██████████| 2343/2343 [01:28<00:00, 26.51it/s]


Loss:0.00402304	Accuracy:0.90719998


In [19]:
# TE=18, ST=34

trained_resnet18_model = torch.load(os.path.join(ROOT_PATH, 'resnet18.pth'))['model'].cuda()

student_model = resnet34(2).cuda()
main(3, trained_resnet18_model, student_model, alpha=0)

Epoch 0: 100%|██████████| 2343/2343 [01:56<00:00, 20.10it/s]


Loss:0.03634158	Accuracy:0.83279997


Epoch 1: 100%|██████████| 2343/2343 [01:56<00:00, 20.09it/s]


Loss:0.01008052	Accuracy:0.87599999


Epoch 2: 100%|██████████| 2343/2343 [02:19<00:00, 16.77it/s]


Loss:0.00559674	Accuracy:0.87119997


In [20]:
# TE=34, ST=34

trained_resnet34_model = torch.load(os.path.join(ROOT_PATH, 'resnet34.pth'))['model'].cuda()

student_model = resnet34(2).cuda()
main(3, trained_resnet34_model, student_model, alpha=0)

Epoch 0: 100%|██████████| 2343/2343 [02:19<00:00, 16.75it/s]


Loss:0.03364192	Accuracy:0.81680000


Epoch 1: 100%|██████████| 2343/2343 [02:06<00:00, 18.49it/s]


Loss:0.00835406	Accuracy:0.89719999


Epoch 2: 100%|██████████| 2343/2343 [02:06<00:00, 18.47it/s]


Loss:0.00450736	Accuracy:0.89039999


In [21]:
# TE=18, ST=18

trained_resnet18_model = torch.load(os.path.join(ROOT_PATH, 'resnet18.pth'))['model'].cuda()

student_model = resnet18(2).cuda()
main(3, trained_resnet18_model, student_model, alpha=0)

Epoch 0: 100%|██████████| 2343/2343 [02:04<00:00, 18.80it/s]


Loss:0.03665120	Accuracy:0.82400000


Epoch 1: 100%|██████████| 2343/2343 [01:20<00:00, 29.15it/s]


Loss:0.01121680	Accuracy:0.85039997


Epoch 2: 100%|██████████| 2343/2343 [02:20<00:00, 16.70it/s]


Loss:0.00483111	Accuracy:0.88559997
