# [Machine Learning - Image Classification Code]

In [12]:
#import all library
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import librosa
import cv2
import os
import torchvision.transforms as transforms 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')

In [2]:
train_csv=pd.read_csv("../../dataset/train.csv") #read the train csv file
test_csv=pd.read_csv("../../dataset/test.csv") #read the test csv file

## USER has to initialize these variables when changing feature extraction methods

In [3]:
# array initialization
x_train=[]
x_test=[]
y_train=[]
y_test=[]

## USER INPUT: You can change options through this line 
## Please input 8000 or 16000 for sr, input melspect or mfcc for f_method

### 4 OPTIONS : 1) Mel Spectrogram-16,000 2) Mel Spectrogram - 8,000 3) MFCC - 16,000 4) MFCC - 8,000

In [4]:
sr = 16000 #USER OPTION : 8000 / 16000
f_method = 'melspect' #USER OPTION : melspect / mfcc
image_dir = 'img_' + f_method + '_' + str(sr)+'/' #image file folder name

In [5]:
for idx, file in enumerate(train_csv['file_name']): #train dataset
    file_name=file[:-4] #cut file name
    img_id = file_name +'.jpg'
    img = cv2.imread('../../dataset/image/'+ image_dir + img_id, cv2.IMREAD_GRAYSCALE) #read each image file
    x_train.append(img.flatten())
    y_train.append(train_csv['label'].iloc[idx])

for idx, file in enumerate(test_csv['file_name']): #test dataset
    file_name=file[:-4]
    img_id = file_name +'.jpg'
    img = cv2.imread('../../dataset/image/'+ image_dir + img_id, cv2.IMREAD_GRAYSCALE)
    x_test.append(img.flatten())
    y_test.append(test_csv['label'].iloc[idx])

In [6]:
scaler = StandardScaler() 
x_train = scaler.fit_transform(x_train) # Standardize train data
x_test = scaler.transform(x_test) # Standardize test data

x_train=pd.DataFrame(x_train) # to dataframe
y_train=pd.DataFrame(y_train) # to dataframe

# OPTION: USER has to run manually from here to change models
### 4 OPTIONS : 1) SVM 2) MLP 3) KNN 4)GNB
## Run the corresponding cell.

## OPTION 1) SVM

In [7]:
#Create a SVM Classifier
model = svm.SVC(kernel = 'rbf', C = 1,verbose=True, probability=True)
#Train the model using the training sets
model = model.fit(x_train,y_train)

[LibSVM]

## OPTION 2) MLP

In [8]:
#Create a MLP Classifier
model = MLPClassifier(hidden_layer_sizes=[300, 100], 
                          activation='relu', 
                          early_stopping=True,
                          random_state=32,
                          verbose= True)
#Train the model using the training sets
model = model.fit(x_train, y_train)

Iteration 1, loss = 0.50842047
Validation score: 0.860870
Iteration 2, loss = 0.44005370
Validation score: 0.856522
Iteration 3, loss = 0.39940455
Validation score: 0.856522
Iteration 4, loss = 0.36557143
Validation score: 0.856522
Iteration 5, loss = 0.34149539
Validation score: 0.878261
Iteration 6, loss = 0.30657281
Validation score: 0.865217
Iteration 7, loss = 0.30100778
Validation score: 0.852174
Iteration 8, loss = 0.29120911
Validation score: 0.865217
Iteration 9, loss = 0.28497346
Validation score: 0.852174
Iteration 10, loss = 0.25277530
Validation score: 0.873913
Iteration 11, loss = 0.22581375
Validation score: 0.878261
Iteration 12, loss = 0.20784669
Validation score: 0.839130
Iteration 13, loss = 0.21115574
Validation score: 0.865217
Iteration 14, loss = 0.19649912
Validation score: 0.860870
Iteration 15, loss = 0.18466873
Validation score: 0.847826
Iteration 16, loss = 0.17515012
Validation score: 0.839130
Validation score did not improve more than tol=0.000100 for 10 co

## OPTION 3) KNN

In [9]:
#Create a KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors = 5)
#Train the model using the training sets
model = model.fit(x_train,y_train)

## OPTION 4) GNB

In [10]:
#Create a Gaussian Classifier
model = GaussianNB()
#Train the model using the training sets
model=model.fit(x_train, y_train)

# Evalution Metrics

In [13]:
y_pred=model.predict(x_test) # model predict
y_prob = model.predict_proba(x_test) # calculate probability
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_prob[:,1])
AUC = metrics.auc(fpr, tpr) # calculate AUC

print('accuracy : ', accuracy_score(y_test,y_pred)) # print accuracy
print('AUC : ', AUC) # print AUC
print(classification_report(y_test, y_pred)) # print f1-score

accuracy :  0.7193211488250653
AUC :  0.7167304922419501
              precision    recall  f1-score   support

           0       0.76      0.60      0.67       369
           1       0.69      0.83      0.75       397

    accuracy                           0.72       766
   macro avg       0.73      0.72      0.71       766
weighted avg       0.73      0.72      0.72       766

