In [1]:
import random
import sys
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from matplotlib import rcParams
rcParams['font.family'] = 'serif'
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from topology import Topology
from simulation import Simulation
from simulation import SimulationResult
from packet import PacketSf
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd
import seaborn as sns
random.seed(42)  # for now seed is constant


class SimulationFigure():
    def __init__(self, x_axis, plot_names):
        self.x_axis = x_axis
        self.plot_names = plot_names
        self.plot_data = {}
        for plot_name in self.plot_names:
            self.plot_data[plot_name] = []

    def get_plot(self, xlabel, ylabel, ylim_bottom=None, ylim_top=None, xlim_left=None, xlim_right=None):
        plt.figure()
        for plot_name in self.plot_names:
            plt.plot(self.x_axis, self.plot_data[plot_name], label=plot_name)

        if ylim_bottom is not None:
            plt.ylim(bottom=ylim_bottom)
        if ylim_top is not None:
            plt.ylim(top=ylim_top)
        if xlim_left is not None:
            plt.xlim(left=xlim_left)
        if xlim_right is not None:
            plt.xlim(right=xlim_right)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.grid(True)
        plt.tight_layout()


def prediction_accuracy(averaging, number_of_gws, packet_rate, packet_size, simulation_duration, traffic_type):
    for radius in [3000, 5000, 7000, 10000]:
        for number_of_nodes in [100, 500, 1000]:
            prediction_rf_acc_averaging_sum = 0
            prediction_GBC_acc_averaging_sum = 0

            topology = Topology.create_random_topology(number_of_nodes=number_of_nodes, radius=radius, number_of_gws=number_of_gws, node_traffic_proportions=traffic_type)

            for repeat in range(averaging):
                simulation = Simulation(topology=topology, packet_rate=packet_rate, packet_size=packet_size, simulation_duration=simulation_duration, sf=PacketSf.SF_Random)
                simulation_result = simulation.run()

                X_train, X_test, y_train, y_test = simulation.get_training_data(test_size=0.2)

                # Random Forest with Stratified Cross-Validation
                RF_classifier = RandomForestClassifier(class_weight='balanced', n_estimators=100)
                rf_cross_val_scores = cross_val_score(RF_classifier, X_train, y_train, cv=StratifiedKFold(n_splits=5))
                prediction_rf_acc_averaging_sum += rf_cross_val_scores.mean() * 100

                # Gradient Boosting Classifier with Stratified Cross-Validation
                GBC_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=42, loss='log_loss')
                gbc_cross_val_scores = cross_val_score(GBC_classifier, X_train, y_train, cv=StratifiedKFold(n_splits=5))
                prediction_GBC_acc_averaging_sum += gbc_cross_val_scores.mean() * 100

            # Average accuracy scores
            avg_accuracy_rf = prediction_rf_acc_averaging_sum / averaging
            avg_accuracy_gbc = prediction_GBC_acc_averaging_sum / averaging

            # Print accuracy scores in table format
            accuracy_df = pd.DataFrame({
                'Classifier': ['Random Forest', 'Gradient Boosting'],
                'Accuracy (%)': [avg_accuracy_rf, avg_accuracy_gbc]
            })
            print(f"Number of nodes={number_of_nodes}, Radius={radius}")
            print(accuracy_df.to_string(index=False))


# All units are SI base units
TOPOLOGY_RADIUS = 3000  # meters
NUMBER_OF_GWS = 1
PRED_TOPOLOGY_RADIUS = 5000  # meters
PRED_NUMBER_OF_GWS = 3
SIMULATION_DURATION = 3600  # seconds
PACKET_RATE = 0.01  # per second
PACKET_SIZE = 60  # bytes, header + payload, 13 + max(51 to 222)
TRAFFIC_TYPE = (1, 0)  # poisson, periodic
AVERAGING = 5
NUMBER_OF_NODES_LIST = range(50, 1001, 50)


prediction_accuracy(averaging=AVERAGING,
                    number_of_gws=PRED_NUMBER_OF_GWS,
                    packet_rate=PACKET_RATE,
                    packet_size=PACKET_SIZE,
                    simulation_duration=SIMULATION_DURATION,
                    traffic_type=TRAFFIC_TYPE)


Number of nodes=100, Radius=3000
       Classifier  Accuracy (%)
    Random Forest     87.786446
Gradient Boosting     96.643643
Number of nodes=500, Radius=3000
       Classifier  Accuracy (%)
    Random Forest     76.878868
Gradient Boosting     85.925337
Number of nodes=1000, Radius=3000
       Classifier  Accuracy (%)
    Random Forest     72.650279
Gradient Boosting     78.410701
Number of nodes=100, Radius=5000
       Classifier  Accuracy (%)
    Random Forest     88.735183
Gradient Boosting     97.057227




Number of nodes=500, Radius=5000
       Classifier  Accuracy (%)
    Random Forest     76.342246
Gradient Boosting     85.619945
Number of nodes=1000, Radius=5000
       Classifier  Accuracy (%)
    Random Forest     71.792683
Gradient Boosting     77.847871
Number of nodes=100, Radius=7000
       Classifier  Accuracy (%)
    Random Forest     86.963729
Gradient Boosting     93.450252
Number of nodes=500, Radius=7000
       Classifier  Accuracy (%)
    Random Forest     75.408680
Gradient Boosting     82.954281
Number of nodes=1000, Radius=7000
       Classifier  Accuracy (%)
    Random Forest     72.437195
Gradient Boosting     75.795204
Number of nodes=100, Radius=10000
       Classifier  Accuracy (%)
    Random Forest     87.890020
Gradient Boosting     88.941588
Number of nodes=500, Radius=10000
       Classifier  Accuracy (%)
    Random Forest     78.138388
Gradient Boosting     77.575206
Number of nodes=1000, Radius=10000
       Classifier  Accuracy (%)
    Random Forest     75.9