In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print("pyTorch ver : ", torch.__version__)

pyTorch ver :  1.12.1+cu116


## device acustic code

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
!nvidia-smi

Mon Oct 10 19:29:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 517.48       Driver Version: 517.48       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
| N/A   61C    P0    27W /  N/A |    945MiB /  6144MiB |      6%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Classification of images whether the image is of cat and dog


## Steps
1. Create data set of 300+ images of cat and dog
2. Load images as batch of 32
3. Create a ml model to predict
4. Predict

# clean the data and create a csv

In [4]:
import pandas as pd
import os

In [5]:
data = None
dir = 'data'

if (not os.path.isfile(os.path.join(dir, 'data.csv'))):

  print("creating csv")

  subDir = os.listdir(dir)

  dataDict = {}

  for classDir in subDir:
    if os.path.isdir(os.path.join(dir, classDir)):
      dataDict[classDir] = os.listdir(os.path.join(dir, classDir))
    else:
      print(f"not dir : {classDir}")
  
  data = pd.DataFrame({
    'fileName': [],
    'class': []
  })

  for class_ in dataDict:
    for fn in dataDict[class_]:
      data.loc[len(data.index)] = [fn, class_]

  csv = data.to_csv(os.path.join(dir, 'data.csv'), index=False)
  
else:

  print("loading csv")

  data = pd.read_csv(os.path.join(dir, 'data.csv'))

data

loading csv


Unnamed: 0,fileName,class
0,0b1.jpg,cat
1,1080p_cat_images.jpg,cat
2,2261526.jpg,cat
3,235613.jpg,cat
4,249470.jpg,cat
...,...,...
1313,_120248530_gettyimages-157037529.jpg,dog
1314,_120373298_gettyimages-1300362661.jpg,dog
1315,_124800860_gettyimages-1287712627.jpg,dog
1316,_124800861_gettyimages-469360172.jpg,dog


# Create custom dataset loader

In [6]:
from torch.utils.data import Dataset, random_split, DataLoader
from torch import nn
import torchvision
import torchvision.transforms as trans

In [7]:

class MyDataset(Dataset):
  def __init__(self, df, transform=None):
    super(MyDataset, self).__init__()
    self.df = df
    self.transform = transform

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    path = os.path.join(dir, self.df.iloc[index, 1], self.df.iloc[index, 0])
    
    img = torchvision.io.read_image(path).to(device = device)

    y_label = torch.tensor(0 if (self.df.iloc[index, 1] == 'cat') else 1).to(device = device)
    
    if self.transform:
      img = self.transform(img)
    
    # # color_channel, width, height -> width, height, color_channel
    # img = img.permute(1, 2, 0)
    # divide by 255 so that max value will be 1, so that its easier for gpu to work with
    img = img / torch.tensor(255).to(device = device)

    return(img, y_label)

# split the dataset into training, validation and testing sets

In [8]:
# transform for the dataset
transform = trans.Resize(size = (224, 224))

# Create the dataset
ds = MyDataset(data, transform)


In [9]:
trainCount = int(.7 * len(ds))
validationCount = int(.2 * len(ds))
testCount = len(ds) - trainCount - validationCount

print(f'trainSet count : {trainCount},\n validationSet count : {validationCount},\n testSet count : {testCount}')

trainSet count : 922,
 validationSet count : 263,
 testSet count : 133


In [10]:
batchSize = 32
numWorkers = 4

trainDataSet, validationDataSet, testingDataSet = random_split(ds, (trainCount, validationCount, testCount))

print(f'trainSet : {trainDataSet},\n validationSet : {validationDataSet},\n testSet : {testingDataSet}')

trainSet : <torch.utils.data.dataset.Subset object at 0x0000017D170F40D0>,
 validationSet : <torch.utils.data.dataset.Subset object at 0x0000017D170F4220>,
 testSet : <torch.utils.data.dataset.Subset object at 0x0000017D170F4130>


In [11]:
trainDataLoder = DataLoader(
  trainDataSet,
  batch_size=batchSize,
  shuffle=True,
  num_workers=0
)

validationDataLoder = DataLoader(
  validationDataSet,
  batch_size=batchSize,
  shuffle=True,
  num_workers=0
)

testDataLoder = DataLoader(
  testingDataSet,
  batch_size=batchSize,
  shuffle=True,
  num_workers=0
)

dataLoaders = {
  'train': trainDataLoder,
  'validation': validationDataLoder,
  'test': testDataLoder
}

dataLoaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x17d170f4970>,
 'validation': <torch.utils.data.dataloader.DataLoader at 0x17d170f4f70>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x17d170f4e50>}

In [12]:
class myModle (nn.Module):
  def __init__ (self):
    super(myModle, self).__init__()
    #((w - f + 2p) / s) + 1 -> ((150 - 3 + 2*1) / 1) + 1 -> 224

    #in shape = (32, 3, 224, 224)
    self.l1 = nn.Conv2d(
      in_channels=3, out_channels=12,
      kernel_size=3, stride=1, padding=1
    )
    #shape = (32, 12, 224, 224)
    self.bn1 = nn.BatchNorm2d(num_features=12)
    #shape = (32, 12, 224, 224)

    self.pool1 = nn.MaxPool2d(kernel_size=2)
    # reduce the img by 2
    # shape = (32, 12, 112, 112)

    self.l2 = nn.Conv2d(
      in_channels=12, out_channels=32,
      kernel_size=3, stride=1, padding=1
    )
    # shape = (32, 32, 112, 112)

    self.l3 = nn.Conv2d(
      in_channels=32, out_channels=64,
      kernel_size=3, stride=1, padding=1
    )
    # shape = (32, 64, 112, 112)
    self.bn3 = nn.BatchNorm2d(num_features=64)

    self.out = nn.Linear(in_features=64*112*112, out_features=2)

    # cnn(3->12) -> bn -> pool(/2) -> cnn(12->32) -> cnn(32 -> 64) -> bn -> out([64*112*112]802816 -> 2)

  def forward(self, x):
    y = self.l1(x)
    y = self.bn1(y)
    y = nn.ReLU(y)

    y = self.pool1(y)

    y = self.l2(y)
    y = nn.ReLU(y)

    y = self.l3(y)
    y = self.bn3(y)
    y = nn.ReLU(y)

    y = y.view(-1, 64*112*112)

    y = self.out(y)

    return y
  

In [13]:
# ((224 - 3 + 2*1) / 1) + 1
# 64*112*112

In [14]:
model = myModle().to(device= device)
model

myModle(
  (l1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (l2): Conv2d(12, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (l3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (out): Linear(in_features=802816, out_features=2, bias=True)
)

# Optimizer and loss function

In [15]:
optim = torch.optim.Adam(model.parameters(), lr = 0.001, weight_decay=0.0001)
loss = nn.CrossEntropyLoss()

optim, loss

(Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     eps: 1e-08
     foreach: None
     lr: 0.001
     maximize: False
     weight_decay: 0.0001
 ),
 CrossEntropyLoss())

In [16]:
epoch = 1000

In [17]:
acc = []
los = []

for e in range(epoch):

  tacc = 0.0
  tloss = 0.0
  
  # training
  model.train()

  for _, (img, label) in enumerate(dataLoaders['train']):
    optim.zero_grad()
    y = model(img)
    l = loss(y, label)
    l.backward()
    optim.step()

    tloss += l.cpu().data * img.size(0)
    _, pred = torch.max(y.data, 1)
    tacc += int(torch.sum(pred == label.data))

  acc.append(tacc / len(trainDataSet))
  los.append(tloss / len(trainDataSet))

  if e % 100 == 0:
    print(f'epoch : {e} | acc : {tacc / len(trainDataSet)} | loss : {tloss / len(trainDataSet)}')

  # evaluation
  model.eval()

  tacc = 0.0
  tloss = 0.0

RuntimeError: Unsupported image file. Only jpeg and png are currently supported.