In [None]:
import os
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
import torch  
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import torchvision.transforms as transforms 
import numpy as np
from sklearn import *
import csv

In [None]:
# load training data labels
train_labels_info = pd.read_csv('../input/ee4146/train_labels.csv', header=0)  
categories = list(np.unique(train_labels_info['label']))

# endocing categories to numbers
num2cat = dict(zip(range(len(categories)), categories))
cat2num =dict(zip(categories,range(len(categories))))
print(cat2num)
print(num2cat)

In [None]:
import sys 

def printProgressBar(value,max,label):
    n_bar = 40 #size of progress bar
    #max = 100
    j= value/max
    sys.stdout.write('\r')
    bar = '█' * int(n_bar * j)
    bar = bar + '-' * int(n_bar * (1-j))
    
    sys.stdout.write(f"{label.ljust(10)} | [{bar:{n_bar}s}] {int(100 * j)}% ")
    sys.stdout.flush()

In [None]:
def write_csv_kaggle_sub(fname, ID, Y):
    # fname = file name
    # Y is a list/array with class entries
    # ID is the image ID
    tmp = [['ID', 'label']]
    # add ID numbers for each Y
    for (i,y) in enumerate(Y):
        tmp2 = [ID[i], num2cat[y]]
        tmp.append(tmp2)
    # write CSV file
    with open(fname, 'w',newline='') as f:
        writer = csv.writer(f)
        writer.writerows(tmp)

In [None]:
train_feat_root = '../input/densenet/PLAD/'
test_feat_root = '../input/densenet/PLAD_test/'

train_feats = []
test_feats = []

train_labels = []
test_img_id = []

index = 0
# load train features
for feat in os.listdir(train_feat_root):
    img_id = int(feat.split('_')[0])
    train_feats.append(np.load(os.path.join(train_feat_root, feat)))

    label = train_labels_info['label'][img_id]
    train_labels.append(cat2num[label])
    index = index + 1
    if(index % 100 == 0):
      printProgressBar(index,len(os.listdir(train_feat_root)),"Train")

index = 0
# load test features and corresponding ID
for feat in os.listdir(test_feat_root):
    img_id = int(feat.split('_')[0])
    test_img_id.append(img_id)
    test_feats.append(np.load(os.path.join(test_feat_root, feat)))
    index = index + 1
    if(index % 20 == 0):
      printProgressBar(index,len(os.listdir(test_feat_root)),"Test")

print("")
print(len(train_feats))
print(len(test_feats))

# Splitting training set and validation set for offline evaluation
trainX, valX, trainY, valY = model_selection.train_test_split(train_feats, train_labels, train_size=0.75, test_size=0.25, random_state=123)
print(len(trainX))
print(len(valX))

In [None]:
clf = neural_network.MLPClassifier(activation='tanh', alpha=0.05, hidden_layer_sizes= (50, 50, 50), learning_rate = 'constant', solver='sgd',max_iter=1000)
clf.fit(trainX, trainY)
y_pred_MLP = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_MLP))
# predY = clf.predict(test_feats)
# write_csv_kaggle_sub('submission_DenseNet_MLP_Optimized.csv',  test_img_id, predY)

In [None]:
clf = neural_network.MLPClassifier(hidden_layer_sizes=(300,150,100,50), random_state=1,verbose=1, max_iter=1000)
clf.fit(trainX, trainY)
y_pred_MLP = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_MLP))
predY = clf.predict(test_feats)
write_csv_kaggle_sub('submission_DenseNet_MLP.csv',  test_img_id, predY)

In [None]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(max_iter=100)

parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(trainX, trainY)

# Best parameter set
print('Best parameters found:\n', clf.best_params_)

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(trainX, trainY)
y_pred_Ada = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_Ada))
#predY = clf.predict(test_feats)
#write_csv_kaggle_sub('submissionAda.csv',  test_img_id, predY)
# predY = clf.predict(test_feats)
# write_csv_kaggle_sub('submissionSVC.csv',  test_img_id, predY)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(trainX, trainY)
y_pred_SVC = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42, max_iter=1000, tol=1e-3)
sgd_clf.fit(trainX,trainY)
y_pred_SGD = sgd_clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_SGD))
predY = clf.predict(test_feats)
write_csv_kaggle_sub('submission_Dense_SGD.csv',  test_img_id, predY)

In [None]:
predY = sgd_clf.predict(test_feats)
write_csv_kaggle_sub('submissionSGD.csv',  test_img_id, predY)

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf_rfc = RandomForestClassifier(n_estimators=10)
clf_rfc = clf_rfc.fit(trainX,trainY)
y_pred_rfc = clf_rfc.predict(valX)
print(metrics.accuracy_score(valY,y_pred_rfc))

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
bagg = BaggingClassifier(base_estimator=SVC(),n_estimators=10, random_state=0)
bagg = bagg.fit(trainX,trainY)
y_pred_bagg = bagg.predict(valX)
print(metrics.accuracy_score(valY,y_pred_bagg))

In [None]:
train_feat_root = '../input/vgg-model/VGG_train/'
test_feat_root = '../input/vgg-model/VGG_test/'

train_feats = []
test_feats = []

train_labels = []
test_img_id = []

index = 0
# load train features
for feat in os.listdir(train_feat_root):
    img_id = int(feat.split('_')[0])
    train_feats.append(np.load(os.path.join(train_feat_root, feat)))

    label = train_labels_info['label'][img_id]
    train_labels.append(cat2num[label])
    index = index + 1
    if(index % 100 == 0):
      printProgressBar(index,len(os.listdir(train_feat_root)),"Train")

index = 0
# load test features and corresponding ID
for feat in os.listdir(test_feat_root):
    img_id = int(feat.split('_')[0])
    test_img_id.append(img_id)
    test_feats.append(np.load(os.path.join(test_feat_root, feat)))
    index = index + 1
    if(index % 20 == 0):
      printProgressBar(index,len(os.listdir(test_feat_root)),"Test")

print("")
print(len(train_feats))
print(len(test_feats))

# Splitting training set and validation set for offline evaluation
trainX, valX, trainY, valY = model_selection.train_test_split(train_feats, train_labels, train_size=0.75, test_size=0.25, random_state=123)
print(len(trainX))
print(len(valX))

In [None]:

# predY = clf.predict(test_feats)
# write_csv_kaggle_sub('submissionMLP2.csv',  test_img_id, predY)

In [None]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,150,100,50), random_state=1,verbose=1, max_iter=1000)
clf.fit(trainX, trainY)
y_pred_MLP = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_MLP))
predY = clf.predict(test_feats)
write_csv_kaggle_sub('submissionMLP_VGG_Actual.csv',  test_img_id, predY)

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
bagg = BaggingClassifier(base_estimator=SVC(),n_estimators=10, random_state=0)
bagg = bagg.fit(trainX,trainY)
y_pred_bagg = bagg.predict(valX)
print(metrics.accuracy_score(valY,y_pred_bagg))

In [None]:
predY = bagg.predict(test_feats)
write_csv_kaggle_sub('submission_VGG_Bagging.csv',  test_img_id, predY)

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf_rfc = RandomForestClassifier(n_estimators=10)
clf_rfc = clf_rfc.fit(trainX,trainY)
y_pred_rfc = clf_rfc.predict(valX)
print(metrics.accuracy_score(valY,y_pred_rfc))

In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(trainX, trainY)
y_pred_Ada = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_Ada))
# predY = clf.predict(test_feats)
# write_csv_kaggle_sub('submissionAda.csv',  test_img_id, predY)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(trainX, trainY)
y_pred_SVC = clf.predict(valX)
print(metrics.accuracy_score(valY,y_pred_SVC))
predY = clf.predict(test_feats)
write_csv_kaggle_sub('submission_SVC_VGG.csv',  test_img_id, predY)

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=512)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

In [None]:
len(explained_variance)

In [None]:
from sklearn.decomposition import PCA
sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)
pca = PCA(n_components=2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
clf = neural_network.MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,150,100,50), random_state=1,verbose=1, max_iter=1000)
clf.fit(X_train, trainY)
y_pred_MLP = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_MLP))

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(max_depth=2, random_state=0)
classifier.fit(X_train, trainY)

# Predicting the Test set results
y_pred_RFC = classifier.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_RFC))

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(max_depth=2, random_state=0)
classifier.fit(trainX, trainY)

# Predicting the Test set results
y_pred_RFC = classifier.predict(valX)
print(metrics.accuracy_score(valY,y_pred_RFC))

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=128)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=128)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)
Test_Feats = sc.transform(test_feats)

from sklearn.decomposition import PCA

pca = PCA(n_components=64)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
Test_Feats = pca.transform(Test_Feats)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

predY_PCA_SVC = clf.predict(Test_Feats)
# print(metrics.accuracy_score(valY,predY_PCA_SVC))
write_csv_kaggle_sub('submission_DenseNet_PCA_SVC.csv',  test_img_id, predY_PCA_SVC)

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=32)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=16)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))

In [None]:
#PCA Principal Component Analysis
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(trainX)
X_test = sc.transform(valX)

from sklearn.decomposition import PCA

pca = PCA(n_components=8)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

explained_variance = pca.explained_variance_ratio_

from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, trainY)
y_pred_SVC = clf.predict(X_test)
print(metrics.accuracy_score(valY,y_pred_SVC))