### Importing Libraries

In [1]:
import os 
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score,accuracy_score

### Data Extraction & Preprocessing

In [2]:
datapath = "C:\\Users\\US593\\Work\\Datasets\\Skin_lesions_Classification\\ISIC_2019_Training_Input\\ISIC_2019_Training_Input"

data = []

for img in os.listdir(datapath):

    imgpath = os.path.join(datapath,img)
    image = cv2.imread(imgpath,0)
    try:
        image_resized = cv2.resize(image,(50,50))
        image_flattened = np.array(image_resized).flatten()
        img_name = img.split('.')[0]
        data.append([image_flattened,img_name])
    
    except Exception as e:
        pass


print(len(data))

pick_in = open('image_data.pickle','wb')
pickle.dump(data,pick_in)
pick_in.close()

25331


In [3]:
df = pd.read_csv(r'C:\Users\US593\Work\Datasets\Skin_lesions_Classification\ISIC_2019_Training_GroundTruth.csv')

In [4]:
df.head()

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK
0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
def get_column_names(row):
    return ','.join(df.columns[row == 1])

df['Binary_Column_Names'] = df.apply(get_column_names, axis=1)

In [6]:
df.head()

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,Binary_Column_Names
0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV
1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV
2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL
3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV
4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL


In [7]:
df['Category_Encoded'] = df['Binary_Column_Names'].astype('category').cat.codes

In [8]:
df.head()

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,Binary_Column_Names,Category_Encoded
0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,4
3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,4


In [9]:
pick_in = open('image_data.pickle','rb')
data = pickle.load(pick_in)
pick_in.close()

In [10]:
data1 = pd.DataFrame(x[0] for x in data)

In [11]:
data2 = pd.DataFrame((x[1] for x in data),columns = ['image'])

In [12]:
len(data[0][0])

2500

In [13]:
data3 = pd.concat([data1,data2],axis = 1)

In [14]:
data3.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2491,2492,2493,2494,2495,2496,2497,2498,2499,image
0,176,185,194,198,202,205,208,207,206,203,...,213,213,218,217,217,217,217,216,216,ISIC_0000000
1,130,139,150,156,161,169,168,171,172,170,...,176,180,175,181,178,180,178,177,174,ISIC_0000001
2,7,4,4,5,12,40,72,102,117,132,...,187,180,176,162,147,122,101,66,34,ISIC_0000002
3,222,221,220,218,217,218,220,221,222,221,...,220,222,219,218,220,216,220,222,224,ISIC_0000003
4,2,2,2,2,0,2,0,2,0,2,...,1,1,1,1,1,1,1,1,1,ISIC_0000004


In [15]:
data3.to_csv('image_feature.csv')

In [16]:
final_data = pd.merge(data3,df,on = 'image',how = "left")

In [17]:
final_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,Binary_Column_Names,Category_Encoded
0,176,185,194,198,202,205,208,207,206,203,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
1,130,139,150,156,161,169,168,171,172,170,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
2,7,4,4,5,12,40,72,102,117,132,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,4
3,222,221,220,218,217,218,220,221,222,221,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,5
4,2,2,2,2,0,2,0,2,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,4


In [18]:
df.columns

Index(['image', 'MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK',
       'Binary_Column_Names', 'Category_Encoded'],
      dtype='object')

In [19]:
final_data1 = final_data.drop(['image','MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK','Binary_Column_Names'],axis = 1)

In [20]:
final_data1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2491,2492,2493,2494,2495,2496,2497,2498,2499,Category_Encoded
0,176,185,194,198,202,205,208,207,206,203,...,213,213,218,217,217,217,217,216,216,5
1,130,139,150,156,161,169,168,171,172,170,...,176,180,175,181,178,180,178,177,174,5
2,7,4,4,5,12,40,72,102,117,132,...,187,180,176,162,147,122,101,66,34,4
3,222,221,220,218,217,218,220,221,222,221,...,220,222,219,218,220,216,220,222,224,5
4,2,2,2,2,0,2,0,2,0,2,...,1,1,1,1,1,1,1,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25326,0,0,0,0,0,0,0,0,1,0,...,1,1,1,1,0,1,1,1,1,1
25327,90,103,105,114,119,123,125,128,128,130,...,123,123,125,120,110,105,100,83,73,2
25328,126,127,99,97,100,99,85,78,76,76,...,229,229,228,227,226,224,223,219,215,4
25329,127,130,134,138,141,141,142,137,138,135,...,140,139,137,135,131,126,121,116,106,5


In [21]:
final_data1.to_csv('final_data.csv',index = False)

In [22]:
x = final_data1.drop('Category_Encoded',axis = 1).values  # converting into numpy array using .values for easy computation 
y = final_data1['Category_Encoded'].values

In [23]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size = 0.2,stratify = final_data1.Category_Encoded,random_state = 333)

In [24]:
xtrain.shape

(20264, 2500)

In [25]:
ytrain.shape

(20264,)

In [26]:
xtest.shape

(5067, 2500)

In [27]:
ytest.shape

(5067,)

### SVC

In [28]:
model1 = SVC(C = 1,kernel = 'poly',gamma = 'auto')

In [29]:
model1.fit(xtrain,ytrain)

In [30]:
prediction = model1.predict(xtest)

In [31]:
accuracy = accuracy_score(ytest,prediction)
f1 = f1_score(ytest, prediction,average='weighted')

In [32]:
print('Accuracy: ',accuracy)
print('F1 Score: ',f1)

Accuracy:  0.4651667653443852
F1 Score:  0.47417062974974383


In [33]:
pickle.dump(model1, open('svm.p', 'wb'))

### K-NN

In [34]:
model2 = KNeighborsClassifier(n_neighbors=11)

In [35]:
model2.fit(xtrain,ytrain)

In [36]:
prediction = model2.predict(xtest)

In [37]:
accuracy = accuracy_score(ytest,prediction)
f1 = f1_score(ytest, prediction,average='weighted')

In [38]:
print('Accuracy: ',accuracy)
print('F1 Score: ',f1)

Accuracy:  0.5498322478784291
F1 Score:  0.4850130258732868


In [39]:
pickle.dump(model2, open('knn.p', 'wb'))