In [None]:
# data handling libs
import numpy as np
import pandas as pd
import warnings


In [None]:

# Ignore display of unnecessary warnings
def fxn():
    warnings.warn("deprecated", DeprecationWarning)
warnings.filterwarnings("ignore")
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

In [None]:
# data preprocessing libs
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# sklearn classifiers to import
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# tensorflow classifier import
import tensorflow as tf
#from tensorflow.keras.contrib import DNNClassifier
import logging

# model building, predict, accuracy imports
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from IPython.display import display

In [None]:
# Logging level
#tf.logging.set_verbosity(tf.logging.FATAL)
logging.basicConfig(level=logging.INFO)

In [None]:
# Get data from csv file
data = pd.read_csv("iris.csv", names=['sp_length', 'sp_width', 'p_length', 'p_width', 'class'])
print('Dataset used: Iris Data set')
print('Number of instances in dataset:', len(data))
print('Number of attributes in dataset:', len(data.columns.values)-1)
num_folds = 15
len(data['class'].unique())
data.shape

Dataset used: Iris Data set
Number of instances in dataset: 150
Number of attributes in dataset: 4


(150, 5)

In [None]:
data.head()

Unnamed: 0,sp_length,sp_width,p_length,p_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
# Remove any NAN rows from the dataset
data.dropna(inplace=True)

In [None]:
# separate feature data and target data
X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values

# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.18, random_state=42)

In [None]:
# Build parameters of all classifiers
random_forest_params = dict(n_estimators=[5, 10, 15, 20, 25], criterion=['gini', 'entropy'],
                            max_features=[2, 3, 4, 'auto', 'log2', 'sqrt', None], bootstrap=[False, True]
                            )
decision_tree_params = dict(criterion=['gini', 'entropy'], splitter=['best', 'random'], min_samples_split=[2, 3, 4],
                            max_features=[2,3,'auto', 'log2', 'sqrt', None], class_weight=['balanced', None])

perceptron_params = dict(penalty=[None, 'l2', 'l1', 'elasticnet'], fit_intercept=[False, True], shuffle=[False, True],
                         class_weight=['balanced', None], alpha=[0.0001, 0.00025], max_iter=[30,50,90])

svm_params = dict(shrinking=[False, True], degree=[3,4], class_weight=['balanced', None])

neural_net_params = dict(activation=['identity', 'logistic', 'tanh', 'relu'], hidden_layer_sizes = [(20,15,10),(30,20,15,10),(16,8,4)],
                         max_iter=[50,80,150], solver=['adam','lbfgs'], learning_rate=['constant', 'invscaling', 'adaptive'], shuffle=[True, False])

log_reg_params = dict(class_weight=['balanced', None], solver=['newton-cg', 'lbfgs', 'liblinear', 'sag'], fit_intercept=[True, False])

knn_params = dict(n_neighbors=[2, 3, 5, 10], weights=['uniform', 'distance'],
                  algorithm=['auto', 'ball_tree', 'kd_tree', 'brute'], leaf_size=[5,10,15,20])

bagging_params = dict(n_estimators=[5, 12, 15, 20], bootstrap=[False, True])

ada_boost_params = dict(n_estimators=[50, 75, 100], algorithm=['SAMME', 'SAMME.R'])

guassiannb_params = dict()

gradient_boosting_params = dict(n_estimators=[15, 25, 50])

params = [
    random_forest_params, decision_tree_params, perceptron_params,
    svm_params, neural_net_params, log_reg_params, knn_params,
    bagging_params, ada_boost_params, guassiannb_params, gradient_boosting_params
]


In [None]:
# classifiers to test
classifiers = [
    RandomForestClassifier(), DecisionTreeClassifier(), Perceptron(),
    SVC(), MLPClassifier(), LogisticRegression(),
    KNeighborsClassifier(), BaggingClassifier(), AdaBoostClassifier(),
    GaussianNB(), GradientBoostingClassifier()
]

names = [
    'RandomForest', 'DecisionTree', 'Perceptron', 'SVM',
    'NeuralNetwork', 'LogisticRegression',
    'KNearestNeighbors', 'Bagging', 'AdaBoost', 'Naive-Bayes', 'GradientBoosting'
]

models = dict(zip(names, zip(classifiers, params)))

In [None]:
#Finding best parameters using Gridsearch
def parameter_tuning(models, X_train, X_test, y_train, y_test):
    print(num_folds,'fold cross-validation is used')
    print()
    accuracies = []
    # dataframe to store intermediate results
    dataframes = []
    best_parameters = []
    for name, clf_and_params in models.items():
        print('Computing GridSearch on {} '.format(name))
        clf, clf_params = clf_and_params
        grid_clf = GridSearchCV(estimator=clf, param_grid=clf_params, cv=num_folds)
        grid_clf = grid_clf.fit(X_train, y_train)
        dataframes.append((name, grid_clf.cv_results_))
        best_parameters.append((name, grid_clf.best_params_))
        predictions = grid_clf.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        cv_scores = cross_val_score(clf, X_train, y_train, cv=num_folds)
        accuracies.append((name, accuracy, np.mean(cv_scores)))
    return accuracies, dataframes, best_parameters

In [None]:
results, dataframes, best_parameters = parameter_tuning(models, X_train, X_test, y_train, y_test)

15 fold cross-validation is used

Computing GridSearch on RandomForest 
Computing GridSearch on DecisionTree 
Computing GridSearch on Perceptron 
Computing GridSearch on SVM 
Computing GridSearch on NeuralNetwork 
Computing GridSearch on LogisticRegression 
Computing GridSearch on KNearestNeighbors 
Computing GridSearch on Bagging 
Computing GridSearch on AdaBoost 
Computing GridSearch on Naive-Bayes 
Computing GridSearch on GradientBoosting 


In [None]:
print()
print('============================================================')
for classifier, acc, cv_acc in results:
    print('{}: Accuracy with Best Parameters = {}% || Mean Cross Validation Accuracy = {}%'.format(classifier, round(acc*100,4), round(cv_acc*100,4)))
print()

for name, bp in best_parameters:
    print('============================================================')
    print('{} classifier GridSearch Best Parameters'.format(name))
    display(bp)
print()
print()

# Deep Learning using Tensor flow



import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf



# One-hot encode the target variable
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

# Step 2: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Step 3: Build the DNN Model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 classes for Iris dataset
])

# Step 4: Compile the Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Step 5: Train the Model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Step 6: Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')










print('============================================================')
print('Deep Learning classifier Accuracy = ', round(acc*100,4),'%')
print('------------------------------------------------------------')
print('Deep Learning classifier Best Parameters')
#display(deep_learning.params)
print('***************** Execution Completed **********************')
print('------------------------------------------------------------')


RandomForest: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 93.4259%
DecisionTree: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 92.5926%
Perceptron: Accuracy with Best Parameters = 74.0741% || Mean Cross Validation Accuracy = 78.9815%
SVM: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 95.0%
NeuralNetwork: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 96.6667%
LogisticRegression: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 96.6667%
KNearestNeighbors: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 94.2593%
Bagging: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 95.0926%
AdaBoost: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 93.4259%
Naive-Bayes: Accuracy with Best Parameters = 100.0% || Mean Cross Validation Accuracy = 94.2593%
GradientBoosting: Accuracy wi

{'bootstrap': True, 'criterion': 'gini', 'max_features': 2, 'n_estimators': 25}

DecisionTree classifier GridSearch Best Parameters


{'class_weight': None,
 'criterion': 'entropy',
 'max_features': 'auto',
 'min_samples_split': 2,
 'splitter': 'random'}

Perceptron classifier GridSearch Best Parameters


{'alpha': 0.0001,
 'class_weight': 'balanced',
 'fit_intercept': True,
 'max_iter': 30,
 'penalty': None,
 'shuffle': True}

SVM classifier GridSearch Best Parameters


{'class_weight': 'balanced', 'degree': 3, 'shrinking': False}

NeuralNetwork classifier GridSearch Best Parameters


{'activation': 'tanh',
 'hidden_layer_sizes': (20, 15, 10),
 'learning_rate': 'constant',
 'max_iter': 150,
 'shuffle': True,
 'solver': 'adam'}

LogisticRegression classifier GridSearch Best Parameters


{'class_weight': 'balanced', 'fit_intercept': True, 'solver': 'sag'}

KNearestNeighbors classifier GridSearch Best Parameters


{'algorithm': 'auto', 'leaf_size': 5, 'n_neighbors': 3, 'weights': 'uniform'}

Bagging classifier GridSearch Best Parameters


{'bootstrap': True, 'n_estimators': 15}

AdaBoost classifier GridSearch Best Parameters


{'algorithm': 'SAMME.R', 'n_estimators': 100}

Naive-Bayes classifier GridSearch Best Parameters


{}

GradientBoosting classifier GridSearch Best Parameters


{'n_estimators': 15}



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy: 96.67%
Deep Learning classifier Accuracy =  100.0 %
------------------------------------------------------------
Deep Learning classifier Best Parameters
***************** Execution Completed **********************
------------------------------------------------------------


In [None]:
# Deep Learning using Tensor flow



import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf



# One-hot encode the target variable
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

# Step 2: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Step 3: Build the DNN Model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 classes for Iris dataset
])

# Step 4: Compile the Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Step 5: Train the Model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Step 6: Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy: 100.00%


In [None]:

# Convert results to a DataFrame
results_df = pd.DataFrame(results, columns=['Classifier', 'Accuracy', 'Mean CV Accuracy'])

# Print the results in tabular form
print('Results:')
print(results_df)

# Assuming best_parameters is a list of tuples where each tuple contains (name, bp)

for name, bp in best_parameters:
    # Convert best parameters to a DataFrame
    bp_df = pd.DataFrame(bp.items(), columns=['Parameter', 'Value'])

    print('============================================================')
    print(f'{name} classifier GridSearch Best Parameters:')
    display(bp_df)

# Assuming acc is the accuracy value

print('============================================================')
print(f'Deep Learning classifier Accuracy = {round(acc*100, 4)}%')
print('------------------------------------------------------------')

# Assuming deep_learning.params is a dictionary of parameters

# Initialize an empty list to store parameters
params = []

# Loop through the layers in the model
for layer in model.layers:
    # Check if the layer has weights (parameters)
    if layer.weights:
        for weight in layer.weights:
            params.append((weight.name, weight.numpy()))

# Create a DataFrame from the parameters
params_df = pd.DataFrame(params, columns=['Parameter', 'Value'])


print('Deep Learning classifier Best Parameters:')
display(params_df)

print('***************** Execution Completed **********************')
print('------------------------------------------------------------')


Results:
            Classifier  Accuracy  Mean CV Accuracy
0         RandomForest  1.000000          0.934259
1         DecisionTree  1.000000          0.925926
2           Perceptron  0.740741          0.789815
3                  SVM  1.000000          0.950000
4        NeuralNetwork  1.000000          0.966667
5   LogisticRegression  1.000000          0.966667
6    KNearestNeighbors  1.000000          0.942593
7              Bagging  1.000000          0.950926
8             AdaBoost  1.000000          0.934259
9          Naive-Bayes  1.000000          0.942593
10    GradientBoosting  1.000000          0.934259
RandomForest classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,bootstrap,True
1,criterion,gini
2,max_features,2
3,n_estimators,25


DecisionTree classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,class_weight,
1,criterion,entropy
2,max_features,auto
3,min_samples_split,2
4,splitter,random


Perceptron classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,alpha,0.0001
1,class_weight,balanced
2,fit_intercept,True
3,max_iter,30
4,penalty,
5,shuffle,True


SVM classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,class_weight,balanced
1,degree,3
2,shrinking,False


NeuralNetwork classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,activation,tanh
1,hidden_layer_sizes,"(20, 15, 10)"
2,learning_rate,constant
3,max_iter,150
4,shuffle,True
5,solver,adam


LogisticRegression classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,class_weight,balanced
1,fit_intercept,True
2,solver,sag


KNearestNeighbors classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,algorithm,auto
1,leaf_size,5
2,n_neighbors,3
3,weights,uniform


Bagging classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,bootstrap,True
1,n_estimators,15


AdaBoost classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,algorithm,SAMME.R
1,n_estimators,100


Naive-Bayes classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value


GradientBoosting classifier GridSearch Best Parameters:


Unnamed: 0,Parameter,Value
0,n_estimators,15


Deep Learning classifier Accuracy = 100.0%
------------------------------------------------------------
Deep Learning classifier Best Parameters:


Unnamed: 0,Parameter,Value
0,dense_6/kernel:0,"[[0.1132168, 0.15358461, -0.14365739, 0.249161..."
1,dense_6/bias:0,"[0.058095977, 0.008847783, 0.0, 0.0548075, -0...."
2,dense_7/kernel:0,"[[-0.08220623, -0.03771624, 0.11644138, -0.109..."
3,dense_7/bias:0,"[0.038606714, 0.0, -0.0055540586, -0.048505116..."
4,dense_8/kernel:0,"[[-0.13739638, 0.042634316, -0.25163385], [-0...."
5,dense_8/bias:0,"[0.050837938, 0.027795825, -0.049045447]"


***************** Execution Completed **********************
------------------------------------------------------------
