# Skin segmentation

In this assignement you will train classifier to assign colors to skin or no skin classes. The data is taken from [Skin Segmentation Data Set](http://archive.ics.uci.edu/ml/datasets/Skin+Segmentation#) in the UCI Machine Learning repository.

The  data is in a plain text format and contains four columns. First three contain RGB color data  represented as integers in the range 0-255, and the last column is an integer label  with 1 representing skin and 2 representing no skin. This file we can load directly into a numpy array:

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn

In [2]:
data = np.loadtxt('data/Skin_NonSkin.txt')

In [3]:
rgb  = data[:,:3].astype('float32')
lbl = data[:,3].astype('float32') 
lbl = 2-lbl

print(rgb)
print(lbl)

[[ 74.  85. 123.]
 [ 73.  84. 122.]
 [ 72.  83. 121.]
 ...
 [163. 162. 112.]
 [163. 162. 112.]
 [255. 255. 255.]]
[1. 1. 1. ... 0. 0. 0.]


In [4]:
len(data)

245057

In [5]:
np.bincount(lbl.astype('int32'))

array([194198,  50859], dtype=int64)

## Problem 1

Train the neural network to distinguish skin from no skin colors. Calculate the accuracy on train and validation sets. Calculate true positives rate and false positives rate.

In [6]:
tensor_x=torch.from_numpy(rgb)
tensor_y=torch.from_numpy(lbl)

train_size=round(0.8*len(data))
test_size=len(data)-train_size

dataset=torch.utils.data.TensorDataset(tensor_x, tensor_y)
train, test = torch.utils.data.random_split(dataset, (train_size, test_size))

train_set = torch.utils.data.DataLoader(train,batch_size=64)
test_set = torch.utils.data.DataLoader(test,batch_size=64)

In [7]:
class Net(nn.Module):
    def __init__(self,input_shape):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(input_shape,6)
        self.fc2 = nn.Linear(6,2)
        self.fc3 = nn.Linear(2,1)  
    def forward(self,x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

lr = 0.01
epochs = 20

model = Net(input_shape=tensor_x.shape[1])
optimizer = torch.optim.SGD(model.parameters(),lr=lr)
loss_func = nn.BCELoss()

results = []

for i in range(epochs):
    epoch_loss=0
    epoch_acc=0
    for j,(x,y) in enumerate(train_set):
    
        output = model(x)
 
        loss = loss_func(output,y.reshape(-1,1))
        epoch_loss+=loss
        acc = 100 * torch.sum(output.round()==y.reshape(-1,1))/len(x) 
        epoch_acc+=acc
        
        #backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(j%500)==0:
            print("data_batch_no: {}\tloss : {}\t accuracy : {}".format(j,loss,acc))
    epoch_loss=epoch_loss/len(train_set)
    epoch_acc=epoch_acc/len(train_set)
    print("epoch: {}\tloss : {}\t accuracy : {}".format(i,epoch_loss,epoch_acc))

data_batch_no: 0	loss : 0.5954208970069885	 accuracy : 75.0
data_batch_no: 500	loss : 0.14904335141181946	 accuracy : 98.4375
data_batch_no: 1000	loss : 0.13780145347118378	 accuracy : 98.4375
data_batch_no: 1500	loss : 0.12516091763973236	 accuracy : 98.4375
data_batch_no: 2000	loss : 0.10362161695957184	 accuracy : 98.4375
data_batch_no: 2500	loss : 0.1172817200422287	 accuracy : 96.875
data_batch_no: 3000	loss : 0.0448819100856781	 accuracy : 100.0
epoch: 0	loss : 0.12019004672765732	 accuracy : 96.70671844482422
data_batch_no: 0	loss : 0.06643105298280716	 accuracy : 100.0
data_batch_no: 500	loss : 0.045141711831092834	 accuracy : 100.0
data_batch_no: 1000	loss : 0.07790052890777588	 accuracy : 98.4375
data_batch_no: 1500	loss : 0.10192867368459702	 accuracy : 96.875
data_batch_no: 2000	loss : 0.05744212120771408	 accuracy : 98.4375
data_batch_no: 2500	loss : 0.07643819600343704	 accuracy : 96.875
data_batch_no: 3000	loss : 0.024105247110128403	 accuracy : 100.0
epoch: 1	loss : 0.0

In [8]:

test_accuracies=[]
fp=0
tp=0
positives=0
negatives=0
with torch.no_grad():
    for j, (x,y) in enumerate(test_set):
        output=model(x)
        predicted=output.round()
        negatives += torch.sum(y.reshape(-1,1)==0)
        positives += torch.sum(y.reshape(-1,1)==1)
        fp += torch.sum(torch.logical_and(predicted.round()!=y.reshape(-1,1), predicted.round() == 1))
        tp += torch.sum(torch.logical_and(predicted.round()==y.reshape(-1,1), predicted.round() == 1))
        acc = 100 * torch.sum(predicted==y.reshape(-1,1))/len(x) 
        test_accuracies.append(acc)

test_accuracy = sum(test_accuracies)/len(test_accuracies)
fpr = fp/negatives
tpr = tp/positives
print("test accuracy: {} fpr : {} tpr : {}".format(test_accuracy,fpr,tpr))

test accuracy: 99.33502197265625 fpr : 0.006599469110369682 tpr : 0.9931507110595703
