In [1]:
import os
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import torch

  from .autonotebook import tqdm as notebook_tqdm
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd
data_df = pd.read_csv('data/Training_set.csv')
labels = list(data_df.label.unique())
labels.sort()
class_labels = {}

for i in range(len(labels)):
    class_labels[labels[i]] = i
class_labels

{'calling': 0,
 'clapping': 1,
 'cycling': 2,
 'dancing': 3,
 'drinking': 4,
 'eating': 5,
 'fighting': 6,
 'hugging': 7,
 'laughing': 8,
 'listening_to_music': 9,
 'running': 10,
 'sitting': 11,
 'sleeping': 12,
 'texting': 13,
 'using_laptop': 14}

{'calling': 0,
 'clapping': 1,
 'cycling': 2,
 'dancing': 3,
 'drinking': 4,
 'eating': 5,
 'fighting': 6,
 'hugging': 7,
 'laughing': 8,
 'listening_to_music': 9,
 'running': 10,
 'sitting': 11,
 'sleeping': 12,
 'texting': 13,
 'using_laptop': 14}

In [4]:
num_classes = len(labels)
num_classes

15

15

In [5]:
train_csv = data_df.iloc[:11000]
val_csv = data_df.iloc[11000:11500]
test_csv = data_df.iloc[11500:]

In [6]:
train_csv.label.value_counts()

sleeping              747
laughing              744
sitting               742
hugging               742
dancing               740
using_laptop          739
clapping              739
listening_to_music    736
cycling               734
drinking              732
texting               727
eating                725
running               724
calling               723
fighting              706
Name: label, dtype: int64

sleeping              747
laughing              744
sitting               742
hugging               742
dancing               740
using_laptop          739
clapping              739
listening_to_music    736
cycling               734
drinking              732
texting               727
eating                725
running               724
calling               723
fighting              706
Name: label, dtype: int64

In [7]:
val_csv.label.value_counts()

hugging               41
eating                36
fighting              36
calling               35
sleeping              35
running               34
texting               34
dancing               33
sitting               33
using_laptop          33
drinking              32
listening_to_music    32
cycling               30
laughing              28
clapping              28
Name: label, dtype: int64

hugging               41
eating                36
fighting              36
calling               35
sleeping              35
running               34
texting               34
dancing               33
sitting               33
using_laptop          33
drinking              32
listening_to_music    32
cycling               30
laughing              28
clapping              28
Name: label, dtype: int64

In [8]:
test_csv.label.value_counts()

fighting              98
running               82
calling               82
texting               79
eating                79
drinking              76
cycling               76
clapping              73
listening_to_music    72
using_laptop          68
laughing              68
dancing               67
sitting               65
sleeping              58
hugging               57
Name: label, dtype: int64

fighting              98
running               82
calling               82
texting               79
eating                79
drinking              76
cycling               76
clapping              73
listening_to_music    72
using_laptop          68
laughing              68
dancing               67
sitting               65
sleeping              58
hugging               57
Name: label, dtype: int64

In [9]:


class CustomImageDataset(Dataset):
    def __init__(self, annotations_csv, img_dir, class_labels,transform=None, target_transform=None):
        self.img_labels = annotations_csv
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.class_labels = class_labels

    def __len__(self):
        return len(self.img_labels)
        # return 100

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        label_cls = self.class_labels[label]
        
        if self.transform:
            image = self.transform(image)
        return image, label_cls


In [10]:
import torchvision.transforms as T
transform = T.Compose([T.Resize(255), 
       T.CenterCrop(224),  
       T.RandomHorizontalFlip(),
       T.ToTensor(), 
       T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


target_transform = T.Compose([T.ToTensor(),])


In [11]:

train_set = CustomImageDataset(train_csv, r'data\train', class_labels, transform=transform)

train_dataloader = DataLoader(train_set, batch_size=512, shuffle=True)

In [12]:
val_set = CustomImageDataset(val_csv, r'data\train', class_labels, transform=transform)

val_dataloader = DataLoader(val_set, batch_size=256, shuffle=True)

In [13]:
test_set = CustomImageDataset(test_csv, r'data\train', class_labels, transform=transform)

test_dataloader = DataLoader(test_set, batch_size=256, shuffle=True)

In [14]:
from torchvision.models import resnet18
model = resnet18(pretrained=True)
model.fc = torch.nn.Linear(in_features=512, out_features=num_classes, bias=True)

In [15]:
device = torch.device('cuda:1')
device

device(type='cuda', index=1)

device(type='cuda', index=1)

In [16]:
model = model.to(device)

In [17]:
import torch.optim as optim
from torch import nn

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
train_loss_history = []
val_loss_history = []
best_val_loss = 10*5
for epoch in range(100):  # loop over the dataset multiple times

    train_loss = 0
    totall = 0
    correct = 0
    model.train()
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


        train_loss += loss.item()
    train_loss = train_loss / len(train_dataloader)
    train_loss_history.append(train_loss)

    model.eval()
    val_loss = 0
    total = 0 
    correct = 0
    for i, data in enumerate(val_dataloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    val_loss = val_loss / len(val_dataloader)
    val_loss_history.append(val_loss)
    if best_val_loss > val_loss:
        PATH = 'models\\best_val_loss.pth'
        torch.save(model.state_dict(), PATH)
        best_val_loss = val_loss
    print(f'Epoch: {epoch+1}')
    print(f'Train Loss: {train_loss}; Val Loss :{val_loss}')
    
    print(f'Accuracy of the network on the {len(val_set)} test images: {100 * correct // total} %')

print('Finished Training')


Epoch: 1
Train Loss: 2.647872556339611; Val Loss :2.3952138423919678
Accuracy of the network on the 500 test images: 26 %
Epoch: 1
Train Loss: 2.647872556339611; Val Loss :2.3952138423919678
Accuracy of the network on the 500 test images: 26 %
Epoch: 2
Train Loss: 2.195085883140564; Val Loss :1.962400197982788
Accuracy of the network on the 500 test images: 45 %
Epoch: 2
Train Loss: 2.195085883140564; Val Loss :1.962400197982788
Accuracy of the network on the 500 test images: 45 %


In [None]:
PATH = 'models\\best_val_loss.pth'
model.load_state_dict(torch.load(PATH))
model.eval()

In [None]:
import torch
model.eval()
total = 0
correct = 0
for data in test_dataloader:
    images, labels = data
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(test_set)} test images: {100 * correct // total} %')
