In [1]:
import numpy as np
import sklearn
import sklearn.ensemble
import time
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import sklearn.metrics
import pandas as pd
#from sklearn.inspection import DecisionBoundaryDisplay

In [5]:
def Train_Test(model, X_train, X_test, t_train, t_test): # For Classification
    model.fit(X_train, t_train)
    t_pred = model.predict(X_test)
    print(sklearn.metrics.accuracy_score(t_test, t_pred),(np.average(sklearn.model_selection.cross_val_score(model,X_train,t_train,cv=10))))
    print(sklearn.metrics.confusion_matrix(t_test, t_pred))
    return(sklearn.metrics.accuracy_score(t_test, t_pred))

def Train_Test1(model, X_train, X_test, t_train, t_test): #For Regression
    model.fit(X_train, t_train)
    t_pred = model.predict(X_test)
    #print(np.mean((t_pred - t_test) ** 2))
    print(sklearn.metrics.mean_squared_error(y_pred=t_pred,y_true=t_test))
    print(model.score(X_test, t_test))
    return model.score(X_test, t_test)

def Test_NN_Regression(data): #Best Score: 0.527485248781123 Best layer_size: [25, 7]
    print("Training the model with MLPRegressor")
    X, t = data[:, :-2], data[:, -2]
    best_score = 0
    best_layer_size = [0,0]
    for i in range(3,4):
        n_train = 1200 + i * 100
        print("training data: " + str(n_train) )
        X_train = X[0:n_train]
        t_train = t[0:n_train]
        X_test = X[n_train:]
        t_test = t[n_train:]
        for j in range (10,35,5):
            for k in range (5, 20, 2):
                print("training the data with NN with hidden-layer-size( " +str(j) +","+str(k) +")" )
                NN_regr = sklearn.neural_network.MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(j, k),random_state=1, max_iter=10000)
                score= Train_Test1(NN_regr, X_train, X_test, t_train, t_test)
                if score > best_score:
                    best_score = score
                    best_layer_size = [j,k]
    print("Best Score: " + str(best_score) + "Best layer_size: " + str(best_layer_size))


def Test_NN_Classification(data): # Best(49,13) accuracy: 0.81  Slightly more time-consuming than RF
    print("Training the model with MLPClassifier")
    X, t = data[:, :-2], data[:, -1]
    best_score = 0
    best_layer_size = [0, 0]
    for i in range(3,4):
        n_train = 1200 + i * 100
        print("training data: " + str(n_train) )
        X_train = X[0:n_train]
        t_train = t[0:n_train]
        X_test = X[n_train:]
        t_test = t[n_train:]
        for j in range (10,50):
            for k in range (5, 20):
                print("training the data with NN with hidden-layer-size( " +str(j) +","+str(k) +")" )
                NN_class = sklearn.neural_network.MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(j, k),random_state=1, max_iter=10000)
                start_time = time.time()
                score = Train_Test(NN_class, X_train, X_test, t_train, t_test)
                end_time = time.time()
                print("Time used " + str(end_time - start_time))
                if score > best_score:
                    best_score = score
                    best_layer_size = [j, k]
    print("Best score "+ str(best_score)+ "Best layer_size: " + str(best_layer_size) )


def Test_RF_Classification(data): # Best(30,11) accuracy: 0.798
    print("Training the model with RandomForest")
    X, t = data[:, :-2], data[:, -1]
    best_score = 0
    best_n_estimator = 0
    best_n_maxDepth = 0
    best_time = 0
    for i in range(3,4):
        n_train = 1200 + i * 100
        print("training data: " + str(n_train) )
        X_train = X[0:n_train]
        t_train = t[0:n_train]
        X_test = X[n_train:]
        t_test = t[n_train:]
        for j in range (10,100,10):
            for k in range (1, 20, 2):
                print("training the data with NN with hidden-layer-size( " +str(j) +","+str(k) +")" )
                model = sklearn.ensemble.RandomForestClassifier(n_estimators=j, max_depth=k, random_state=0)
                start_time = time.time()
                score = Train_Test(model, X_train, X_test, t_train, t_test)
                end_time = time.time()
                print("Time used " + str(end_time - start_time))
                if score > best_score:
                    best_score = score
                    best_n_estimator = j
                    best_n_maxDepth = k
                    best_time = end_time - start_time
    print("Best score " + str(best_score) + "Best n_estimator: " + str(best_n_estimator) + "Best max_Depth " + str(best_n_maxDepth) + "Best Time " + str(best_time))

def loadData(file_name):
    with open(f"../data/{file_name}") as file:
        data=pd.read_csv(file)
        return data.to_numpy()


In [6]:
# data = np.loadtxt('../data/augmented_data.csv', delimiter=',', skiprows=1, usecols = range(5,17))
# X,t = data[:, :-2], data[:, -1]
# X_train = X[0:1500]
# t_train = t[0:1500]
# X_test = X[1500:]
# t_test = t[1500:]

# X1,t1 = data[:, :-2], data[:, -2]
# X1_train = X1[0:1500]
# t1_train = t1[0:1500]
# X1_test = X1[1500:]
# t1_test = t1[1500:]
dataset= loadData("augmented_data.csv")
y=dataset[:,1:3]
X=dataset[:,3:]
#X_train,X_test,y_train,y_test=sklearn.model_selection.train_test_split(X,y,random_state=0)
X_train=X[0:1500,:]
X_test=X[1500:,:]
X1_train=X_train
X1_test=X_test

t_train=y[0:1500,1]
t_test=y[1500:,1]
t1_train=y[0:1500:,0]
t1_test=y[1500:,0]
data=dataset


In [None]:
regr = sklearn.linear_model.LinearRegression()
SGD = sklearn.linear_model.SGDRegressor();
ada = sklearn.ensemble.AdaBoostRegressor(n_estimators = 500)

svm = sklearn.svm.SVC()
RF = sklearn.ensemble.RandomForestClassifier(n_estimators=100, max_depth=20, random_state=0)
NN = sklearn.neural_network.MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(50, 15), random_state=1,max_iter = 10000)

#LR = sklearn.linear_model.LogisticRegression(random_state=0, solver='newton-cholesky', multi_class='ovr')
LR = sklearn.linear_model.LogisticRegression(random_state=0, solver='newton-cg', multi_class='ovr')
NN_regr = sklearn.neural_network.MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(30, 8), random_state=1,max_iter = 10000)
Train_Test(svm,X_train, X_test, t_train, t_test)
Train_Test(RF,X_train, X_test, t_train, t_test)
Train_Test(LR,X_train, X_test, t_train, t_test)
Train_Test(NN,X_train, X_test, t_train, t_test)
Train_Test1(NN_regr,X1_train, X1_test, t1_train, t1_test)
Train_Test1(ada,X1_train, X1_test, t1_train, t1_test)


Test_NN_Regression(data)
Test_NN_Classification(data)
Test_RF_Classification(data)

0.5540983606557377 0.5486666666666666
[[118  37]
 [ 99  51]]
0.6229508196721312 0.5546666666666666
[[114  41]
 [ 74  76]]
0.5573770491803278 0.5746666666666667
[[109  46]
 [ 89  61]]
0.5442622950819672 0.5373333333333333
[[96 59]
 [80 70]]
170.58100244532523
-3.004745072644929
96.1804123006086
-1.258035928527725
Training the model with MLPRegressor
training data: 1500
training the data with NN with hidden-layer-size( 10,5)
0.25084621146097075
-0.003913647442643553
training the data with NN with hidden-layer-size( 10,7)
0.0002782962914604516
0.9988862281658446
training the data with NN with hidden-layer-size( 10,9)
0.0001539127939957784
0.9993840243649347
training the data with NN with hidden-layer-size( 10,11)
0.0013186723387295627
0.9947225307902978
training the data with NN with hidden-layer-size( 10,13)
0.0006825355671572613
0.9972684188980036
training the data with NN with hidden-layer-size( 10,15)
0.18260546716968493
0.26919318604973586
training the data with NN with hidden-layer-

0.5114754098360655 0.54
[[156   0]
 [149   0]]
Time used 1.41017746925354
training the data with NN with hidden-layer-size( 11,18)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.46945643424987793
training the data with NN with hidden-layer-size( 11,19)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.5041217803955078
training the data with NN with hidden-layer-size( 12,5)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.712069272994995
training the data with NN with hidden-layer-size( 12,6)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.6538901329040527
training the data with NN with hidden-layer-size( 12,7)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.414398431777954
training the data with NN with hidden-layer-size( 12,8)
0.5114754098360655 0.5386666666666666
[[156   0]
 [149   0]]
Time used 2.794907331466675
training the data with NN with hidden-layer-size( 

0.5114754098360655 0.5373333333333333
[[156   0]
 [149   0]]
Time used 2.772254467010498
training the data with NN with hidden-layer-size( 15,16)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.0851263999938965
training the data with NN with hidden-layer-size( 15,17)
0.5114754098360655 0.538
[[156   0]
 [149   0]]
Time used 1.799631118774414
training the data with NN with hidden-layer-size( 15,18)
0.5114754098360655 0.5386666666666666
[[156   0]
 [149   0]]
Time used 0.6558008193969727
training the data with NN with hidden-layer-size( 15,19)
0.5114754098360655 0.538
[[156   0]
 [149   0]]
Time used 0.6392776966094971
training the data with NN with hidden-layer-size( 16,5)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 3.5853939056396484
training the data with NN with hidden-layer-size( 16,6)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.47534656524658203
training the data with NN with hidden-layer-size( 16,7)
0

0.4885245901639344 0.5393333333333333
[[  0 156]
 [  0 149]]
Time used 3.2419233322143555
training the data with NN with hidden-layer-size( 19,14)
0.5114754098360655 0.534
[[156   0]
 [149   0]]
Time used 2.5644729137420654
training the data with NN with hidden-layer-size( 19,15)
0.5114754098360655 0.5273333333333333
[[156   0]
 [149   0]]
Time used 2.491748571395874
training the data with NN with hidden-layer-size( 19,16)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.7227799892425537
training the data with NN with hidden-layer-size( 19,17)
0.5704918032786885 0.542
[[144  12]
 [119  30]]
Time used 1.760904312133789
training the data with NN with hidden-layer-size( 19,18)
0.5114754098360655 0.5386666666666666
[[156   0]
 [149   0]]
Time used 1.5859997272491455
training the data with NN with hidden-layer-size( 19,19)
0.5114754098360655 0.524
[[156   0]
 [149   0]]
Time used 1.6376094818115234
training the data with NN with hidden-layer-size( 20,5)
0.51147540983

0.4885245901639344 0.5386666666666666
[[  0 156]
 [  0 149]]
Time used 2.422430992126465
training the data with NN with hidden-layer-size( 23,12)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.9594573974609375
training the data with NN with hidden-layer-size( 23,13)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.7529563903808594
training the data with NN with hidden-layer-size( 23,14)
0.5114754098360655 0.5426666666666666
[[156   0]
 [149   0]]
Time used 1.185030221939087
training the data with NN with hidden-layer-size( 23,15)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.5260517597198486
training the data with NN with hidden-layer-size( 23,16)
0.5114754098360655 0.5413333333333334
[[156   0]
 [149   0]]
Time used 2.5079312324523926
training the data with NN with hidden-layer-size( 23,17)
0.5114754098360655 0.54
[[156   0]
 [149   0]]
Time used 1.5331971645355225
training the data with NN with hidden-layer-

0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.1709651947021484
training the data with NN with hidden-layer-size( 27,10)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.9150142669677734
training the data with NN with hidden-layer-size( 27,11)
0.4885245901639344 0.542
[[  0 156]
 [  0 149]]
Time used 2.570711851119995
training the data with NN with hidden-layer-size( 27,12)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.5158333778381348
training the data with NN with hidden-layer-size( 27,13)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.8237435817718506
training the data with NN with hidden-layer-size( 27,14)
0.5114754098360655 0.5433333333333333
[[156   0]
 [149   0]]
Time used 1.296060562133789
training the data with NN with hidden-layer-size( 27,15)
0.5114754098360655 0.5346666666666666
[[156   0]
 [149   0]]
Time used 2.122067928314209
training the data with NN with hidden-layer-

0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.46267032623291
training the data with NN with hidden-layer-size( 31,7)
0.4885245901639344 0.5326666666666666
[[  0 156]
 [  0 149]]
Time used 0.9751107692718506
training the data with NN with hidden-layer-size( 31,8)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.815319776535034
training the data with NN with hidden-layer-size( 31,9)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.7476539611816406
training the data with NN with hidden-layer-size( 31,10)
0.5114754098360655 0.532
[[156   0]
 [149   0]]
Time used 2.5321125984191895
training the data with NN with hidden-layer-size( 31,11)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 3.237945318222046
training the data with NN with hidden-layer-size( 31,12)
0.5114754098360655 0.5413333333333334
[[156   0]
 [149   0]]
Time used 1.0517609119415283
training the data with NN with hidden-layer-size

0.5114754098360655 0.5413333333333334
[[156   0]
 [149   0]]
Time used 1.453843355178833
training the data with NN with hidden-layer-size( 34,19)
0.5114754098360655 0.5313333333333333
[[156   0]
 [149   0]]
Time used 3.609736919403076
training the data with NN with hidden-layer-size( 35,5)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.47339487075805664
training the data with NN with hidden-layer-size( 35,6)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.5358235836029053
training the data with NN with hidden-layer-size( 35,7)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 3.6399359703063965
training the data with NN with hidden-layer-size( 35,8)
0.5114754098360655 0.5366666666666667
[[156   0]
 [149   0]]
Time used 2.69504451751709
training the data with NN with hidden-layer-size( 35,9)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 3.663214921951294
training the data with NN with hidden

0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.1861708164215088
training the data with NN with hidden-layer-size( 38,16)
0.5114754098360655 0.5226666666666666
[[156   0]
 [149   0]]
Time used 4.126922845840454
training the data with NN with hidden-layer-size( 38,17)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.939924955368042
training the data with NN with hidden-layer-size( 38,18)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 3.725797414779663
training the data with NN with hidden-layer-size( 38,19)
0.5114754098360655 0.5253333333333334
[[156   0]
 [149   0]]
Time used 2.1182892322540283
training the data with NN with hidden-layer-size( 39,5)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 4.414621114730835
training the data with NN with hidden-layer-size( 39,6)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 0.5402677059173584
training the data with NN with hid

0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 2.4508445262908936
training the data with NN with hidden-layer-size( 42,13)
0.4885245901639344 0.5286666666666667
[[  0 156]
 [  0 149]]
Time used 4.557772636413574
training the data with NN with hidden-layer-size( 42,14)
0.5114754098360655 0.5393333333333333
[[156   0]
 [149   0]]
Time used 1.1132192611694336
training the data with NN with hidden-layer-size( 42,15)
0.5114754098360655 0.5426666666666666
[[156   0]
 [149   0]]
Time used 1.084073781967163
training the data with NN with hidden-layer-size( 42,16)
0.5114754098360655 0.5293333333333333
[[156   0]
 [149   0]]
Time used 2.73140549659729
training the data with NN with hidden-layer-size( 42,17)
0.5114754098360655 0.5233333333333333
[[156   0]
 [149   0]]
Time used 2.043034553527832
training the data with NN with hidden-layer-size( 42,18)
0.4885245901639344 0.5393333333333333
[[  0 156]
 [  0 149]]
Time used 1.9306137561798096
training the data with NN with hi