In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
import shutil
import torch
import torchvision
import numpy as np
import pandas as pd
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import zipfile
import cv2
from google.colab import drive


In [None]:
# Path to the zip file in Google Drive
zip_file_path = '/content/drive/My Drive/clothing-dataset-master.zip'

# Destination directory to extract the zip file
extract_dir = '/content/extracted_contents'

# Create a directory to extract contents if not exists
os.makedirs(extract_dir, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# List the contents of the extracted directory
extracted_files = os.listdir(extract_dir)
print("Contents of the extracted directory:")
print(extracted_files)

Contents of the extracted directory:
['clothing-dataset-master']


In [None]:
import pandas as pd

# Path to the images.csv file
csv_file_path = '/content/extracted_contents/clothing-dataset-master/images.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Display the first few rows of the DataFrame
print("First few rows of the DataFrame:")
print(df.head())


First few rows of the DataFrame:
                                  image  sender_id     label   kids
0  4285fab0-751a-4b74-8e9b-43af05deee22        124  Not sure  False
1  ea7b6656-3f84-4eb3-9099-23e623fc1018        148   T-Shirt  False
2  00627a3f-0477-401c-95eb-92642cbe078d         94  Not sure  False
3  ea2ffd4d-9b25-4ca8-9dc2-bd27f1cc59fa         43   T-Shirt  False
4  3b86d877-2b9e-4c8b-a6a2-1d87513309d0        189     Shoes  False


In [None]:
class_labels = np.unique(df['label'])

In [None]:
class_labels = {val: index for index, val in enumerate(class_labels)}

In [None]:
class_labels

{'Blazer': 0,
 'Blouse': 1,
 'Body': 2,
 'Dress': 3,
 'Hat': 4,
 'Hoodie': 5,
 'Longsleeve': 6,
 'Not sure': 7,
 'Other': 8,
 'Outwear': 9,
 'Pants': 10,
 'Polo': 11,
 'Shirt': 12,
 'Shoes': 13,
 'Shorts': 14,
 'Skip': 15,
 'Skirt': 16,
 'T-Shirt': 17,
 'Top': 18,
 'Undershirt': 19}

In [None]:
#create a custom dataset class
class CustomDataset(Dataset):
  def __init__(self, transform=None):
    self.transform = transform

  def readimg(self, index):
    self.image_folder = '/content/extracted_contents/clothing-dataset-master/images/'
    image_id = df.iloc[index]['image'] + '.jpg'
    # print("image_id: ", image_id)
    image_path = os.path.join(image_folder, image_id.strip())
    image = cv2.imread(image_path)
    return image

  def __len__(self):
    return 5398

  def __getitem__(self, index, plot=False):
    counter = 0
    image = self.readimg(index)
    while image is None:
      if index + counter > self.__len__():
        image = self.readimg(0)
        label = df.iloc[0]['label']
        return self.transform(image), torch.tensor(class_labels[label])
      image = self.readimg(index + counter)
      counter += 1
    label = df.iloc[index+counter]['label']
    if (self.transform is not None) and (plot==False):
      image = self.transform(image)
      label = class_labels[label]
    return image, torch.tensor(label)

# create data loaders
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224,224)),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
dataset = CustomDataset(transform)

In [None]:
dataset.__len__()

5398

In [None]:
df.iloc[0]['image']

'4285fab0-751a-4b74-8e9b-43af05deee22'

In [None]:
image_folder = '/content/extracted_contents/clothing-dataset-master/images/'
image_id = df.iloc[0]['image'] + '.jpg'
print("image_id: ", image_id)
image_path = os.path.join(image_folder, image_id.strip())
image = cv2.imread(image_path)
label = df.iloc[0]['label']



image_id:  4285fab0-751a-4b74-8e9b-43af05deee22.jpg


In [None]:
#create dataloaders
batch_size = 32
train_loader = DataLoader(dataset, batch_size, num_workers=2)
val_loader = DataLoader(dataset, batch_size, num_workers=2)
test_loader = DataLoader(dataset, batch_size, num_workers=2)

In [None]:
# resnet
from torchvision.models import resnet50, ResNet50_Weights
from torch import nn

In [None]:
# model
model = resnet50(weights = ResNet50_Weights)
loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


In [None]:
image, label = dataset[0]

In [None]:
image = image.unsqueeze(dim=0)

In [None]:
label = label.unsqueeze(dim=0)

In [None]:
# training
for e in range(1000):
  optimizer.zero_grad()
  outputs = model(image)
  loss = loss_criterion(outputs, label)
  loss.requires_grad_ = True
  loss.backward()
  optimizer.step()
  print("Loss: ", loss.item())

Loss:  6.597686290740967
Loss:  4.059597969055176
Loss:  0.8577477931976318
Loss:  0.03454846516251564
Loss:  0.004760005045682192
Loss:  0.0010193157941102982
Loss:  0.0002882065309677273


KeyboardInterrupt: 

In [None]:
from tqdm import tqdm

In [None]:
for epoch in range(10):
  #training
  model.train()
  training_loss = 0.0
  correct = 0
  total = 0
  batch_loss = 0.0
  for i, data in tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Epoch: {epoch}'):
    img, labels = data
    img, labels = img.to('cuda'), labels.to('cuda')
    optimizer.zero_grad()

    outputs = model(img)
    loss = loss_criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    training_loss += loss.item()
    batch_loss += loss.item()
    if i % 100 == 99:
      print(f'[Epoch: {epoch + 1}, {i + 1:5d}] loss: {batch_loss / 100}')
      # wandb.log({'batch loss ': batch_loss})
      batch_loss = 0.0
    total += labels.size(0)
    _, predicted = torch.max(outputs.data, 1)
    correct += (predicted == labels).sum().item()
  train_accuracy = 100 * correct / total

  model.eval()
  #Validation
  validation_loss = 0.0
  correct = 0
  total = 0
  batch_loss = 0.0
  for i, data in tqdm(enumerate(val_loader), total=len(val_loader), desc=f'Epoch: {epoch}'):
    img, labels = data
    img, labels = img.to('cpu'), labels.to('cpu')
    optimizer.zero_grad()

    outputs = model(img)
    loss = loss_criterion(outputs, labels)

    validation_loss += loss.item()
    batch_loss += loss.item()
    if i % 100 == 99:
      print(f'[Epoch: {epoch + 1}, {i + 1:5d}] loss: {batch_loss / 100}')
      # wandb.log({'batch loss ': batch_loss})
      batch_loss = 0.0
    total += labels.size(0)
    _, predicted = torch.max(outputs.data, 1)
    correct += (predicted == labels).sum().item()
  val_accuracy = 100 * correct / total

  #log training loss and accuracy to WandB
  print("Training Loss: {:.4f}, Training Accuracy: {:.4f}".format(training_loss / len(train_loader), train_accuracy))
  # wandb.log({'Training Loss' : training_loss / len(train_loader), 'Training Accuracy': train_accuracy})

  #log validation loss and accuracy to WandB
  print("Validation Loss: {:.4f}, Validation Accuracy: {:.4f}".format(validation_loss / len(val_loader), val_accuracy))
  # wandb.log({'Validation Loss' : validation_loss / len(val_loader), 'Validation Accuracy': val_accuracy})


print("Finished Training")


Epoch: 0:  59%|█████▉    | 100/169 [33:18<23:00, 20.01s/it]

[Epoch: 1,   100] loss: 2.827764196395874


Epoch: 0: 100%|██████████| 169/169 [55:56<00:00, 19.86s/it]
Epoch: 0:  59%|█████▉    | 100/169 [10:42<07:14,  6.30s/it]

[Epoch: 1,   100] loss: 2.0833922839164734


Epoch: 0: 100%|██████████| 169/169 [17:51<00:00,  6.34s/it]

Training Loss: 2.3899, Training Accuracy: 35.0315
Validation Loss: 1.8822, Validation Accuracy: 47.9066



Epoch: 1:   2%|▏         | 3/169 [02:36<2:24:00, 52.05s/it]


RuntimeError: DataLoader worker (pid 19510) is killed by signal: Killed. 