In [1]:
import random
import sys
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from matplotlib import rcParams
rcParams['font.family'] = 'serif'
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from topology import Topology
from simulation import Simulation
from simulation import SimulationResult
from packet import PacketSf
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score, matthews_corrcoef

random.seed(42)  # for now seed is constant


class SimulationFigure():
    def __init__(self, x_axis, plot_names):
        self.x_axis = x_axis
        self.plot_names = plot_names
        self.plot_data = {}
        for plot_name in self.plot_names:
            self.plot_data[plot_name] = []

    def get_plot(self, xlabel, ylabel, ylim_bottom=None, ylim_top=None, xlim_left=None, xlim_right=None):
        plt.figure()
        for plot_name in self.plot_names:
            plt.plot(self.x_axis, self.plot_data[plot_name], label=plot_name)

        if ylim_bottom is not None:
            plt.ylim(bottom=ylim_bottom)
        if ylim_top is not None:
            plt.ylim(top=ylim_top)
        if xlim_left is not None:
            plt.xlim(left=xlim_left)
        if xlim_right is not None:
            plt.xlim(right=xlim_right)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.grid(True)
        plt.tight_layout()


def prediction_accuracy(averaging, number_of_gws, packet_rate, packet_size, simulation_duration, traffic_type):
    for radius in [3000, 5000, 7000, 10000]:
        for number_of_nodes in [100, 500, 1000]:
            prediction_rf_acc_averaging_sum = 0
            prediction_rf_precision_sum = 0
            prediction_rf_recall_sum = 0
            prediction_rf_f1_score_sum = 0
            prediction_rf_mcc_sum = 0

            prediction_GBC_acc_averaging_sum = 0
            prediction_GBC_precision_sum = 0
            prediction_GBC_recall_sum = 0
            prediction_GBC_f1_score_sum = 0
            prediction_GBC_mcc_sum = 0

            topology = Topology.create_random_topology(number_of_nodes=number_of_nodes, radius=radius, number_of_gws=number_of_gws, node_traffic_proportions=traffic_type)

            for repeat in range(averaging):
                simulation = Simulation(topology=topology, packet_rate=packet_rate, packet_size=packet_size, simulation_duration=simulation_duration, sf=PacketSf.SF_Random)
                simulation_result = simulation.run()

                X_train, X_test, y_train, y_test = simulation.get_training_data(test_size=0.2)

                # Random Forest with Stratified Cross-Validation
                RF_classifier = RandomForestClassifier(class_weight='balanced', n_estimators=100)
                RF_classifier.fit(X_train, y_train)
                rf_predicted = RF_classifier.predict(X_test)
                rf_acc = accuracy_score(y_test, rf_predicted)
                rf_precision = precision_score(y_test, rf_predicted,average='weighted', zero_division=1)
                rf_recall = recall_score(y_test, rf_predicted, average='weighted', zero_division=1)
                rf_f1_score = f1_score(y_test, rf_predicted, average='weighted')
                rf_mcc = matthews_corrcoef(y_test, rf_predicted)
                
                prediction_rf_acc_averaging_sum += rf_acc * 100
                prediction_rf_precision_sum += rf_precision
                prediction_rf_recall_sum += rf_recall
                prediction_rf_f1_score_sum += rf_f1_score
                prediction_rf_mcc_sum += rf_mcc

                # Gradient Boosting Classifier with Stratified Cross-Validation
                GBC_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=42, loss='log_loss')
                GBC_classifier.fit(X_train, y_train)
                gbc_predicted = GBC_classifier.predict(X_test)
                gbc_acc = accuracy_score(y_test, gbc_predicted)
                gbc_precision = precision_score(y_test, gbc_predicted,average='weighted',zero_division=1)
                gbc_recall = recall_score(y_test, rf_predicted, average='weighted', zero_division=1)
                gbc_f1_score = f1_score(y_test, gbc_predicted,average='weighted')
                gbc_mcc = matthews_corrcoef(y_test, gbc_predicted)
                
                prediction_GBC_acc_averaging_sum += gbc_acc * 100
                prediction_GBC_precision_sum += gbc_precision
                prediction_GBC_recall_sum += gbc_recall
                prediction_GBC_f1_score_sum += gbc_f1_score
                prediction_GBC_mcc_sum += gbc_mcc

            # Average scores
            avg_accuracy_rf = prediction_rf_acc_averaging_sum / averaging
            avg_precision_rf = prediction_rf_precision_sum / averaging
            avg_recall_rf = prediction_rf_recall_sum / averaging
            avg_f1_score_rf = prediction_rf_f1_score_sum / averaging
            avg_mcc_rf = prediction_rf_mcc_sum / averaging
            
            avg_accuracy_gbc = prediction_GBC_acc_averaging_sum / averaging
            avg_precision_gbc = prediction_GBC_precision_sum / averaging
            avg_recall_gbc = prediction_GBC_recall_sum / averaging
            avg_f1_score_gbc = prediction_GBC_f1_score_sum / averaging
            avg_mcc_gbc = prediction_GBC_mcc_sum / averaging

            # Print metrics in table format
            metrics_df = pd.DataFrame({
                'Classifier': ['Random Forest', 'Gradient Boosting'],
                'Accuracy (%)': [avg_accuracy_rf, avg_accuracy_gbc],
                'Precision': [avg_precision_rf, avg_precision_gbc],
                'Recall': [avg_recall_rf, avg_recall_gbc],
                'F1 Score': [avg_f1_score_rf, avg_f1_score_gbc],
                'MCC': [avg_mcc_rf, avg_mcc_gbc]
            })
            print(f"Number of nodes={number_of_nodes}, Radius={radius}")
            print(metrics_df.to_string(index=False))


# All units are SI base units
TOPOLOGY_RADIUS = 3000  # meters
NUMBER_OF_GWS = 1
PRED_TOPOLOGY_RADIUS = 5000  # meters
PRED_NUMBER_OF_GWS = 3
SIMULATION_DURATION = 3600  # seconds
PACKET_RATE = 0.01  # per second
PACKET_SIZE = 60  # bytes, header + payload, 13 + max(51 to 222)
TRAFFIC_TYPE = (1, 0)  # poisson, periodic
AVERAGING = 5
NUMBER_OF_NODES_LIST = range(50, 1001, 50)

prediction_accuracy(averaging=AVERAGING,
                    number_of_gws=PRED_NUMBER_OF_GWS,
                    packet_rate=PACKET_RATE,
                    packet_size=PACKET_SIZE,
                    simulation_duration=SIMULATION_DURATION,
                    traffic_type=TRAFFIC_TYPE)

Number of nodes=100, Radius=3000
       Classifier  Accuracy (%)  Precision  Recall  F1 Score      MCC
    Random Forest     86.143028   0.947622 0.86143  0.899932 0.084518
Gradient Boosting     96.946495   0.970409 0.86143  0.954437 0.000000
Number of nodes=500, Radius=3000
       Classifier  Accuracy (%)  Precision   Recall  F1 Score      MCC
    Random Forest     76.621404   0.822644 0.766214  0.788471 0.257726
Gradient Boosting     86.044300   0.832686 0.766214  0.802940 0.127600
Number of nodes=1000, Radius=3000
       Classifier  Accuracy (%)  Precision   Recall  F1 Score      MCC
    Random Forest     72.648219   0.751498 0.726482  0.735547 0.357206
Gradient Boosting     78.015459   0.764922 0.726482  0.750119 0.356800
Number of nodes=100, Radius=5000
       Classifier  Accuracy (%)  Precision   Recall  F1 Score      MCC
    Random Forest     88.635939   0.956456 0.886359  0.918062 0.102733
Gradient Boosting     97.412175   0.974826 0.886359  0.961361 0.000000
Number of nodes=50