In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy
from tabulate import tabulate

Load dataset for processing

In [None]:
raw_df = pd.read_csv('results_CBS_v1.csv')

# Print loaded file's head
print(raw_df.head())

# Data post-processing
A new dataframe is created, containing the raw data post processed

In [None]:
# Creating copy of the raw dataframe
df = deepcopy(raw_df)

# --- Cost analysis
# Add column with difference between cost and ideal cost
df['total_cost_diff'] = round(df['cost'] - df['ideal_cost'], 3)

# Normalise the cost difference per agent with respect to the number of agents
df['cost_diff_per_agent'] = round(df['total_cost_diff'] / df['nb_agents'], 3)

# Cleaning up the dataframe
del df['cost']
del df['ideal_cost']
del df['total_cost_diff']

# Print resulting dataframe's head
print(df.head())

Furthermore, new dataframes with the data bucketed according to agent count, start type, and map type are created

## Bucketing by agent count

In [None]:
# Mean
df_mean_by_agent_count = df.groupby('nb_agents', as_index=False).mean()

# Cleanup dataframe
del df_mean_by_agent_count["start_type"]

print("--> Mean by agent count")
print(df_mean_by_agent_count.head())

# Create dataframe from raw data bucketed by agent count
df_std_by_agent_count = df.groupby('nb_agents', as_index=False).std()

# Cleanup dataframe
del df_std_by_agent_count["start_type"]

print("\n--> Standard deviation by agent count")
print(df_std_by_agent_count.head())

## Bucketing by map type

In [None]:
# Mean
df_mean_by_test_name = df.groupby('test_name', as_index=False).mean()

# Cleanup dataframe
del df_mean_by_test_name["start_type"]
del df_mean_by_test_name["nb_agents"]

print("--> Mean by test name")
print(df_mean_by_test_name)

# Standard deviation
df_std_by_test_name = df.groupby('test_name', as_index=False).std()

# Cleanup dataframe
del df_std_by_test_name["start_type"]
del df_std_by_test_name["nb_agents"]

print("\n--> Standard deviation by test name")
print(df_std_by_test_name)

## Bucketing by start type

In [None]:
# Mean
df_mean_by_start_type = df.groupby('start_type', as_index=False).mean()

# Cleanup dataframe
del df_mean_by_start_type["nb_agents"]

print("--> Mean by start type")
print(df_mean_by_start_type.head())

# Standard deviation
df_std_by_start_type = df.groupby('start_type', as_index=False).std()

# Cleanup dataframe
del df_std_by_start_type["nb_agents"]

print("\n--> Standard deviation by start type")
print(df_std_by_start_type.head())

# Statistical significance analysis

## Distribution analysis
First, an analysis of the dataset's various distributions is perfomed to get good inisght on the results' properties.

### Cost difference per agent
The cost difference per agent is first analysed. The first step is to obtain basic statistical properties on the entire dataset.

In [None]:
# Mean cost difference per agent
mean_cost_difference_per_agent = round(df['cost_diff_per_agent'].mean(), 3)
print(f'Mean cost difference per agent: {mean_cost_difference_per_agent}')

# Standard deviation of cost difference per agent
std_cost_diff_per_agent = round(df['cost_diff_per_agent'].std(), 3)
print(f'Std cost difference per agent: {std_cost_diff_per_agent}')

The data split into various buckets is then considered, providing further insight on the dataset's statistical properties and basic relationship between key variables. The correlation between the various variables can notably be determined.

In [None]:
# --- Correlation with agent count
print("--> Mean cost difference per agent correlation with agent count")

# Solve for the correlation between the mean cost difference per agent and the agent count
correlation_mean_cost_difference_per_agent_and_agent_count = df_mean_by_agent_count['cost_diff_per_agent'].corr(df_mean_by_agent_count['nb_agents'])
print("Correlation =", correlation_mean_cost_difference_per_agent_and_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['cost_diff_per_agent'].plot(kind='bar')
plt.show()

print("--> Std cost difference per agent correlation with agent count")
# Solve for the correlation between the std cost difference per agent and the agent count
correlation_std_cost_difference_per_agent_and_agent_count = df_std_by_agent_count['cost_diff_per_agent'].corr(df_std_by_agent_count['nb_agents'])
print("Correlation =", correlation_std_cost_difference_per_agent_and_agent_count)

# Plot the mean per agent count
df_std_by_agent_count['cost_diff_per_agent'].plot(kind='bar')
plt.show()

# --- Correlation with test type
print("--> Cost difference per agent mean by test type (map type)")

# Plot the mean per agent count
df_mean_by_test_name['cost_diff_per_agent'].plot(kind='bar')
plt.show()

# --- Correlation with start type
print("--> Cost difference per agent mean by start type")

# Plot the mean per agent count
df_mean_by_test_name['cost_diff_per_agent'].plot(kind='bar')
plt.show()

In [None]:
# Mean run time
mean_average_deviation_per_agent = round(df['run_time'].mean(), 3)
print(f'Mean run time: {mean_cost_difference_per_agent}')

# Standard deviation of run time
std_average_deviation_per_agent = round(df['run_time'].std(), 3)
print(f'Std run time: {std_cost_diff_per_agent}')

In [None]:
# --- Correlation with agent count
print("--> Mean run time correlation with agent count")
# Solve for the correlation between the mean run time and the agent count
correlation_mean_run_time_and_agent_count = df_mean_by_agent_count['run_time'].corr(df_mean_by_agent_count['nb_agents'])
print("Correlation =", correlation_mean_run_time_and_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['run_time'].plot(kind='bar')
plt.show()

print("--> Std run time correlation with agent count")
# Solve for the correlation between the std run time and the agent count
correlation_std_run_time_and_agent_count = df_std_by_agent_count['run_time'].corr(df_std_by_agent_count['nb_agents'])
print("Correlation =", correlation_std_run_time_and_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['run_time'].plot(kind='bar')
plt.show()

# --- Correlation with test type
print("--> Average deviation mean by test type (map type)")

# Plot the mean per agent count
df_mean_by_test_name['run_time'].plot(kind='bar')
plt.show()

# --- Correlation with start type
print("--> Average deviation mean by start type")

# Plot the mean per agent count
df_mean_by_test_name['run_time'].plot(kind='bar')
plt.show()

### Average deviation
Finally, the same analysis is performed on the average deviation

In [None]:
# Mean average deviation
mean_average_deviation_per_agent = round(df['avg_deviation'].mean(), 3)
print(f'Mean average deviation: {mean_cost_difference_per_agent}')

# Standard deviation of average deviation
std_average_deviation_per_agent = round(df['avg_deviation'].std(), 3)
print(f'Std average deviation: {std_cost_diff_per_agent}')

In [None]:
# --- Correlation with agent count
print("--> Mean average deviation correlation with agent count")
# Solve for the correlation between the mean average deviation and the agent count
correlation_mean_avg_deviation_and_agent_count = df_mean_by_agent_count['avg_deviation'].corr(df_mean_by_agent_count['nb_agents'])
print("Correlation =", correlation_mean_avg_deviation_and_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['avg_deviation'].plot(kind='bar')
plt.show()

print("--> Std average deviation correlation with agent count")
# Solve for the correlation between the std average deviation and the agent count
correlation_std_avg_deviation_and_agent_count = df_std_by_agent_count['avg_deviation'].corr(df_std_by_agent_count['nb_agents'])
print("Correlation =", correlation_std_avg_deviation_and_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['avg_deviation'].plot(kind='bar')
plt.show()

# --- Correlation with test type
print("--> Average deviation mean by with test type (map type)")

# Plot the mean per agent count
df_mean_by_test_name['avg_deviation'].plot(kind='bar')
plt.show()

# --- Correlation with start type
print("--> Average deviation mean by start type")

# Plot the mean per agent count
df_mean_by_test_name['avg_deviation'].plot(kind='bar')
plt.show()

## Local Sensitivity analysis
A sensitivity analysis of the agent count is performed, providing insight on the impact of the parameter on cost difference mean, standard deviation, and on run time mean.
Given the integer nature of the agent count variable, and the constraints in time and processing power available to the team, it was decided to take as reference value an agent count of 6, and gather data on the +4 and -4 range.

In [None]:
# --- Sensitivity analysis of means to agent count
reference_value = 3.5

# Positive
df_mean_by_agent_count["S+"] = (df_mean_by_agent_count['cost_diff_per_agent'] - reference_value) / ((df_mean_by_agent_count['nb_agents'] - reference_value) / reference_value)



# Negative
df_mean_by_agent_count["S-"] = (reference_value - df_mean_by_agent_count['cost_diff_per_agent']) / ((df_mean_by_agent_count['nb_agents'] - reference_value) / reference_value)

print(df_mean_by_agent_count)

# Plot the mean per agent count
df_mean_by_agent_count['S+'].plot(kind='line')
df_mean_by_agent_count['S-'].plot(kind='line')
plt.show()

# Results
The results obtained above are gathered here

In [None]:
# Distribution analysis (full set)

print("==========> Full dataset statistics")
table = [
    ["", "Mean", "Std"],
    ["Cost diff. per agent", mean_cost_difference_per_agent, std_cost_diff_per_agent],
    # ["Avg. deviation per agent", mean_average_deviation_per_agent, std_average_deviation_per_agent],
    ["Run time", mean_average_deviation_per_agent, std_average_deviation_per_agent]
]

print(tabulate(
    table,
    headers="firstrow",
    tablefmt="presto"
))

print("\n==========> Bucketed dataset statistics")
print("-----> Bucketed by agent count")
print("--> Mean by agent count")
print(df_mean_by_agent_count)
print("\n--> Standard deviation by agent count")
print(df_std_by_agent_count)

print("\n-----> Bucketed by test name")
print("--> Mean by test name")
print(df_mean_by_test_name)
print("\n--> Standard deviation by test name")
print(df_std_by_test_name)

print("\n-----> Bucketed by start type")
print("--> Mean by start type")
print(df_mean_by_start_type)
print("\n--> Standard deviation by start type")
print(df_std_by_start_type)

print("\n==========> Dataset correlation statistics")
print("> Correlation with agent count")
table = [
    ["", "Mean", "Std. deviation"],
    ["Cost diff. per agent", correlation_mean_cost_difference_per_agent_and_agent_count, correlation_std_cost_difference_per_agent_and_agent_count],
    ["Avg. deviation per agent", correlation_mean_avg_deviation_and_agent_count, correlation_std_avg_deviation_and_agent_count],
    ["Run time", correlation_mean_run_time_and_agent_count, correlation_std_run_time_and_agent_count]
]

print(tabulate(
    table,
    headers="firstrow",
    tablefmt="presto"
))