In [167]:
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,KFold,RepeatedKFold
from sklearn.metrics import accuracy_score,log_loss
from sklearn.neural_network import multilayer_perceptron
from sklearn.model_selection import GridSearchCV,PredefinedSplit
from sklearn.feature_selection import SelectKBest,chi2,SelectFromModel
from sklearn.svm import SVC
warnings.filterwarnings('ignore')
np.random.seed(2)
pd.set_option('display.max_colwidth',100)

In [103]:
training_df=pd.read_csv("data/optdigits.tra",header=None)
training_df=training_df[((training_df.iloc[:,-1]==1) | (training_df.iloc[:,-1]==7))]
training_df.replace({7:0},inplace=True)
DIMENSIONS=64
models=[]
data_partitions=[]
kfold=KFold(n_splits=10,shuffle=True,random_state=2)
for train_index,test_index in kfold.split(training_df):
    data_partitions.append((training_df.iloc[train_index,:-1].values,training_df.iloc[train_index,-1].values,training_df.iloc[test_index,:-1].values,training_df.iloc[test_index,-1].values))

In [111]:
class LogisticRegression:
    def __init__(self,learning_rate,momentum,dimensions,a,b):
        self.learning_rate=learning_rate
        self.momentum=momentum
        self.dimensions=dimensions
        self.weights=np.random.uniform(low=0,high=0.0001,size=(dimensions,1))
        self.a=a
        self.b=b
    def print_model_parameters(self,i):
        print("*"*45)
        print("Run:{0}\nInitial learning rate:{1}\nInitial momentum:{2}".format(i,self.learning_rate,self.momentum))
    def sigmoid_activation(self,x):
        return 1/(1+np.exp(-x))
    def cross_entropy(self,y_true,y_pred):
        return log_loss(y_true,y_pred)
    def train(self,x_train,y_train,n_iter=50):
        #print(x_train.shape)
        errors=[]
        converged=False
        while converged==False and n_iter!=0:
            error=0
            n_iter-=1
            predictions=[]
            temp=np.zeros(shape=(self.dimensions))
            d_weights=np.zeros(shape=(self.dimensions))
            for i in range(x_train.shape[0]):
                o=0
                for j in range(x_train.shape[1]):
                    o+=(self.weights[j]*x_train[i,j])
                y_pred=self.sigmoid_activation(o)
                for j in range(x_train.shape[1]):
                    #d_weights[j]+=learning_rate*(y_train[i]-y_pred)*x_train[i,j]
                    d_weights[j]=(self.learning_rate*(y_train[i]-y_pred)*x_train[i,j])+(self.momentum*d_weights[j])
                predictions.append(y_train[i]-y_pred)
            isDecreasing=True
            errors.append(self.cross_entropy(y_train,np.array(predictions)))
            current_error=errors[-1]
            for i in range(len(errors)-1):
                if errors[i]<=current_error:
                    isDecreasing=False
                    break
            if isDecreasing:
                self.learning_rate+=self.a
            else:
                self.learning_rate-=(self.b*self.learning_rate)
            for j in range(self.dimensions):
                #self.weights[j]+=self.learning_rate*d_weights[j]
                self.weights[j]+=(d_weights[j])
            try:
                if errors[-2]==errors[-1]:
                    converged=True
            except IndexError:
                continue
        self.evaluate("Training",y_train,self.predict(x_train))
    def predict(self,x):
        y_pred=self.sigmoid_activation(np.sum(x*self.weights.T,axis=1))
        y_pred=np.array(y_pred>0.5,dtype=np.int16)
        return y_pred
    def evaluate(self,string,y_true,y_pred):
        print("{0} error rate for run :{1}".format(string,1-accuracy_score(y_true,y_pred)))

In [112]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,DIMENSIONS,0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train,y_train,10)
    models.append(model)
    model.evaluate("Test",y_test,model.predict(x_test))

*********************************************
Run:0
Initial learning rate:0.0004249480807741736
Initial momentum:0.9868188372777147
Training error rate for run :0.008595988538681931
Test error rate for run :0.0
*********************************************
Run:1
Initial learning rate:0.0002873857846716795
Initial momentum:0.9888680194471693
Training error rate for run :0.01002865329512892
Test error rate for run :0.0
*********************************************
Run:2
Initial learning rate:0.00011435976947699817
Initial momentum:0.9784045945797017
Training error rate for run :0.007163323782234943
Test error rate for run :0.0
*********************************************
Run:3
Initial learning rate:0.00021740385762580194
Initial momentum:0.9393618679876573
Training error rate for run :0.011461318051575908
Test error rate for run :0.012820512820512775
*********************************************
Run:4
Initial learning rate:0.000348091504172011
Initial momentum:0.9591138016454379
Trainin

In [113]:
abs_avg_weights=[]
for model in models:
    abs_avg_weights.append(np.abs(model.weights))
abs_avg_weights=np.array(abs_avg_weights)
abs_avg_weights=abs_avg_weights.sum(axis=0)

In [114]:
after_elimination_10=abs_avg_weights.argsort(axis=0)[:int(0.9*DIMENSIONS)]
after_elimination_25=abs_avg_weights.argsort(axis=0)[:int(0.75*DIMENSIONS)]
after_elimination_50=abs_avg_weights.argsort(axis=0)[:int(0.5*DIMENSIONS)]

In [115]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_10.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_10].reshape(x_train.shape[0],after_elimination_10.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_10].reshape(x_test.shape[0],after_elimination_10.shape[0])))

*********************************************
Run:0
Initial learning rate:0.00037560963112285586
Initial momentum:0.9232292498149266
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.00016473354972979372
Initial momentum:0.9815996514641392
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.0005741687889834052
Initial momentum:0.9211236279668327
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.0009151681215947962
Initial momentum:0.9551991310330916
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.0008783852416275361
Initial momentum:0.978

In [116]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_25.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_25].reshape(x_train.shape[0],after_elimination_25.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_25].reshape(x_test.shape[0],after_elimination_25.shape[0])))

*********************************************
Run:0
Initial learning rate:0.0007774359691369259
Initial momentum:0.9544633798414398
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.0004777793791981728
Initial momentum:0.9480409078649433
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.00026045080774150776
Initial momentum:0.9805967946128417
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.00011042210500974593
Initial momentum:0.907610251531215
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.0009810810546223606
Initial momentum:0.9247

In [117]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_50.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_50].reshape(x_train.shape[0],after_elimination_50.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_50].reshape(x_test.shape[0],after_elimination_50.shape[0])))

*********************************************
Run:0
Initial learning rate:0.0004947817419852202
Initial momentum:0.9874806501390709
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.00023758244632954935
Initial momentum:0.925130120617303
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.00012469559605250816
Initial momentum:0.9097590767721605
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.0008187296016928157
Initial momentum:0.9880406463875171
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.00030792969588006383
Initial momentum:0.918

In [161]:
parameters={
    'hidden_layer_sizes':[(10,20),(50,30),(100,10,2),(30,10,2)],
    'solver':['adam','sgd'],
    'alpha':[0.0001,0.00001,0.001,0.1]
}
skmodels=[]
worst_params=[]
scores=[]
kbest=[]
tree_selection=[]
for i,(x_train,y_train,x_test,y_test) in enumerate(data_partitions):
    print("\nRun {0}".format(i))
    model=GridSearchCV(MLPClassifier(),param_grid=parameters,n_jobs=-1,cv=RepeatedKFold(n_splits=2,n_repeats=1,random_state=2),verbose=3,scoring='neg_log_loss')
    model.fit(x_train,y_train)
    worst_training_error=model.cv_results_['mean_test_score']
    worst_params.append(model.cv_results_['params'][np.argmin(worst_training_error)])
    test_model=MLPClassifier(**worst_params[-1])
    test_model.fit(x_train,y_train)
    
    scores.append(1-accuracy_score(y_test,test_model.predict(x_test)))
    selector=SelectKBest(chi2,k=45)
    x_train_=selector.fit_transform(x_train,y_train)
    x_test_=selector.transform(x_test)
    test_model.fit(x_train_,y_train)
    kbest.append(1-accuracy_score(y_test,test_model.predict(x_test_)))
    
    selector=SelectFromModel(SVC(kernel='linear'))
    x_train_=selector.fit_transform(x_train,y_train)
    x_test_=selector.transform(x_test)
    test_model.fit(x_train_,y_train)
    tree_selection.append(1-accuracy_score(y_test,test_model.predict(x_test_)))
    
    skmodels.append(model)
scores=np.array(scores)
worst_params=np.array(worst_params)
df=pd.DataFrame({"Test scores":scores,"Parameters":worst_params,"Test error with \nK Best feature selection":kbest,"Test error with \nTree based selection":tree_selection})
print(df)


Run 0
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.0s finished



Run 1
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.3s finished



Run 2
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.0s finished



Run 3
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.2s finished



Run 4
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.1s finished



Run 5
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    2.9s finished



Run 6
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.2s finished



Run 7
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  41 out of  64 | elapsed:    2.9s remaining:    1.6s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.8s finished



Run 8
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.6s finished



Run 9
Fitting 2 folds for each of 32 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  41 out of  64 | elapsed:    2.6s remaining:    1.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:    3.8s finished


   Test scores                                         Parameters  \
0     0.012821  {'alpha': 0.0001, 'hidden_layer_sizes': (30, 1...   
1     0.012821  {'alpha': 1e-05, 'hidden_layer_sizes': (100, 1...   
2     0.012821  {'alpha': 0.001, 'hidden_layer_sizes': (100, 1...   
3     0.000000  {'alpha': 0.0001, 'hidden_layer_sizes': (30, 1...   
4     0.012821  {'alpha': 0.1, 'hidden_layer_sizes': (30, 10, ...   
5     0.576923  {'alpha': 1e-05, 'hidden_layer_sizes': (30, 10...   
6     0.415584  {'alpha': 1e-05, 'hidden_layer_sizes': (100, 1...   
7     0.441558  {'alpha': 0.0001, 'hidden_layer_sizes': (30, 1...   
8     0.012987  {'alpha': 0.0001, 'hidden_layer_sizes': (100, ...   
9     0.493506  {'alpha': 1e-05, 'hidden_layer_sizes': (30, 10...   

   Test error with K Best feature selection  Test error with Tree selection  
0                                  0.000000                        0.500000  
1                                  0.012821                        0.000000  
2     