


---


**DOWNLOADING & EXTRACTING DATA**


---




In [1]:
!gdown https://drive.google.com/uc?id=1YmiU2tpawsTS4CojYoqwANf5JwLNUJuJ&export=download
!unzip Dataset.zip

Downloading...
From: https://drive.google.com/uc?id=1YmiU2tpawsTS4CojYoqwANf5JwLNUJuJ
To: /content/Dataset.zip
230MB [00:04, 49.1MB/s]
Archive:  Dataset.zip
replace Dataset/Negative/00001.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N




---


**IMPORTS**

---




In [2]:
import os
import glob 
import torch
import shutil
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.utils.data
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets
from sklearn.metrics import f1_score, precision_score, recall_score



---


**DATA LOADING & TRANSFORMATIONS**


---



In [3]:
transform = transforms.Compose([
                    transforms.Grayscale(num_output_channels=3),
                    transforms.Resize(28),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

dataset = datasets.ImageFolder('./Dataset', transform=transform)

train, val = torch.utils.data.random_split(dataset,\
                [int(len(dataset)*0.8), len(dataset) - int(len(dataset)*0.8)])

trainset = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True)
valset = torch.utils.data.DataLoader(val, batch_size=64, shuffle=True)



---


**CNN - MODEL CLASS**


---



In [4]:
def xavier_weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        nn.init.constant_(m.bias, 0.1)


class Net(nn.Module):

    def __init__(self):
      super(Net, self).__init__()
      self.layer1 = nn.Sequential(
          nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2),
          nn.BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2))
      self.layer2 = nn.Sequential(
          nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
          nn.BatchNorm2d(num_features=32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2))
      self.layer3 = nn.Sequential(
          nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
          nn.BatchNorm2d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2))
      self.layer4 = nn.Sequential(
          nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2),
          nn.BatchNorm2d(num_features=128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2))
      
      self.drop_out = nn.Dropout(0.3)
      
      self.fc1 = nn.Linear(128, 128)
      self.fc2 = nn.Linear(128, 64)
      self.fc3 = nn.Linear(64, 2)


    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
      out = out.reshape(out.size(0), -1)
      out = self.drop_out(out)
      out = F.relu(self.fc1(out))
      out = F.relu(self.fc2(out))
      out = F.log_softmax(self.fc3(out))
      return out



---


**TRAIN & TEST FUNCTIONS**


---



In [5]:
def train( model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

In [6]:
def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    return (100 * correct / len(test_loader.dataset))



---


**TRAINING & TESTING THE MODEL**

---



In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model_cnn = Net().to(device)
model_cnn.apply(xavier_weights_init)

epochs = 3
lr = 0.001
momentum = 0.9
log_interval = 50

model = model_cnn
optimizer = optim.AdamW(model.parameters(), lr=lr)

for epoch in range(1, epochs + 1):
    train(model, device, trainset, optimizer, epoch, log_interval)





In [12]:
print('\n1- Accuracy on train: {0:.2f}'.format(test(model, device, trainset)))
print('\n2- Accuracy on Validation: {0:.2f}'.format(test(model, device, valset)))




1- Accuracy on train: 99.78

2- Accuracy on Validation: 99.63




---


**GENERATING PREDICTIONS**

---



In [8]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path
        

def get_predictions(test_set):

  y_true = []
  y_pred = []
  y_names = []

  with torch.no_grad():
    for inputs, labels, paths in test_set:
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      _, predicted = torch.max(outputs.data, 1)
      y_pred.extend(predicted.tolist())
      y_true.extend(labels.tolist())
      y_names.extend([os.path.split(name)[1] for name in paths])

  return y_true, y_pred, y_names


def make_subfolder(test_path):

  new_folder = os.path.join(test_path, 'test_images')

  try:
    os.mkdir(new_folder)  
  except:
    print('Path exists!') 

  ext = ['png', 'jpg', 'tiff']
  files = []
  [files.extend(glob.glob(test_path + '*.' + e)) for e in ext]

  for file in files:
      shutil.copy(file, new_folder)

  return new_folder




---


**PRECISION, RECALL & F1 SCORE** (ON VALIDATION DATA)



---




In [9]:
val_dataset = ImageFolderWithPaths('./Dataset', transform=transform)
dataloader_test = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=True)

y_true_val, y_pred_val, y_names_val = get_predictions(dataloader_test)

print('<> Precision: %f' % precision_score(y_true_val, y_pred_val))
print('<> Recall: %f' % recall_score(y_true_val, y_pred_val))
print('<> F1 score: %f' % f1_score(y_true_val, y_pred_val))



<> Precision: 0.997301
<> Recall: 0.997195
<> F1 score: 0.997248




---


**PREDICTIONS USING TEST DATA**

---





In [13]:
TEST_DATA_PATH = "/content/test_data/"


test_dataset = ImageFolderWithPaths(TEST_DATA_PATH, transform=transform)
dataloader_test = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

_none_test, y_pred_test, y_names_test = get_predictions(dataloader_test)





---


**SAVING MODEL & PREDICTIONS**

---



In [14]:
torch.save(model, 'model.pth')

y_names_test = np.array(y_names_test)
y_pred_test = np.array(y_pred_test)

df = pd.DataFrame({"file" : y_names_test, "label" : y_pred_test})
df.to_csv("output.csv", index=False)
print("Predictions stored in 'output.csv'!")

Predictions stored in 'output.csv'!
