In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from Network import Network
from GridSearch import GridSearcher
from activation_functions import sigmoid, relu,relu_derivative, sigmoid_derivative, tanh, tanh_derivative
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

In [2]:
# Set random seed for reproducibility
np.random.seed(42)
df_vote = pd.read_csv("CongressionalVotingID.shuf.lrn.csv", index_col="ID")
df_vote = df_vote.applymap(lambda x: 1 if x == "y" else 0 if x == "n" else x)
df_vote = df_vote.replace("democrat", 1)
df_vote = df_vote.replace("republican", 0)
df_vote = df_vote.replace("unknown", 0)

In [3]:
x_train, x_test, y_train, y_test = train_test_split(df_vote.iloc[:,1:], df_vote.iloc[:, 0],test_size=0.4, random_state=11)

In [4]:
# x_train = np.expand_dims(x_train.to_numpy(), axis=1)
# y_train = np.expand_dims(y_train.to_numpy(), axis=1)
# x_test = np.expand_dims(x_test.to_numpy(), axis=1)
# y_test = np.expand_dims(y_test.to_numpy(), axis=1)

In [5]:
learning_rate=0.1

In [6]:
x_train

Unnamed: 0_level_0,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
239,1,1,1,0,0,0,1,1,0,1,0,0,0,0,1,0
192,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,0
407,1,0,1,0,1,1,0,0,1,1,0,0,1,1,0,1
386,1,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0
235,0,0,1,0,0,1,1,1,1,1,0,1,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149,0,1,0,1,1,1,0,0,0,1,1,1,1,1,0,1
225,0,1,0,1,1,1,0,0,0,1,0,0,1,1,0,0
58,0,1,0,1,1,1,0,0,0,1,0,1,1,1,0,1
119,0,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1


In [7]:
network = Network(learning_rate, epochs=1000, node_counts = [3, 3, 3],activation_function=(tanh, tanh_derivative))

In [8]:
parameters = {
    'learning_rate': [0.1, 0.5],
    'epochs': [1000,2000],
    'node_counts': [[3,3],[10,10],[100,100],[3,3,3],[10,10,10],[20,20,20]],
    'activation_function': [(sigmoid,sigmoid_derivative),(relu,relu_derivative),(tanh,tanh_derivative)],

}
gs = GridSearcher(network, parameters, pd.DataFrame(x_train), pd.Series(y_train))
gs_results = gs.search()
gs_results

1 / 108: 0.39434987002931254
2 / 108: 0.2447955792783379
3 / 108: 0.39434987002931254
4 / 108: 0.39434987002931254
5 / 108: 0.2447955792783379
6 / 108: 0.39434987002931254


In [None]:
# params=gs_results.iloc[gs_results['score_mean'].idxmax()]
# comment/delete the below line when grid search works with valid output
params=gs_results.iloc[0] 

selected_prams=params["params"]
selected_prams

In [None]:
new_network= Network(learning_rate=selected_prams["learning_rate"], epochs=selected_prams["epochs"], node_counts=selected_prams["node_counts"],activation_function=selected_prams["activation_function"]
)

In [None]:
new_network.fit(x_train, y_train)

out = new_network.predict(x_test)

In [None]:
y_pred=new_network.predict(x_test)
y_pred=np.where(np.array(y_pred) > 0.5, 1, 0)


In [None]:
def score(y_true, y_pred):
    correct_predictions = 0
    total_predictions = len(y_true)

    for true_label, predicted_label in zip(y_true, y_pred):
        if true_label == predicted_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_predictions
    return accuracy
accuracy=score(y_test,y_pred)
print("accuracy")
print(accuracy)

***Classification report for Self Neural Network***

In [None]:
print(classification_report(y_pred=np.squeeze(y_pred),y_true=np.squeeze(y_test)))

***Tensor Flow***

In [None]:
def train_model(x_train,y_train, num_nodes, dropout_prob,lr, batch_size, epochs):
    nn_model=tf.keras.Sequential([tf.keras.layers.Dense(num_nodes, activation='relu', input_shape=(16,)),
                              tf.keras.layers.Dropout(dropout_prob),
                             tf.keras.layers.Dense(num_nodes, activation='relu'),
                             tf.keras.layers.Dropout(dropout_prob),
                             tf.keras.layers.Dense(1, activation='sigmoid')])

    nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="binary_crossentropy",metrics=['accuracy'])

    #x_train=x_train.astype(float)

    history = nn_model.fit(x_train,y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
    return nn_model, history

In [None]:
nn_model, history= train_model(np.squeeze(x_train),np.squeeze(y_train),16 , 0,0.1, 32, 1000)

In [None]:
y_pred=nn_model.predict(np.squeeze(x_test))
y_pred=(y_pred>0.5).astype(int).reshape(-1)
y_pred


print(classification_report(y_pred,np.squeeze(y_test)))

**Grid Search**

In [None]:
parameters_sk = {
    'learning_rate_init': [0.01, 0.1,0.5,0.8],
    "learning_rate": ["constant"],
    'max_iter': [10,100,1000,2000],
    'hidden_layer_sizes': [[3,3],[5,5],[10,10],[50,50],[100,100],[3,3,3],[5,5,5],[10,10,10],[20,20,20]],
    'activation': ["logistic", "relu", "tanh"],
}


In [None]:
sk_nn = MLPClassifier()

gs_sk = GridSearcher(sk_nn, parameters_sk, X_train, y_train)

In [None]:
gs_results_sk = gs_sk.search()