In [1]:
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [2]:
def load_plot_archive(path):
    """
    Loads the data from the experiment into RAM
    
    returns:
        - keys df
        - df
    """
    file_path = [path]

    for path in file_path:
        with open(path, 'rb') as f:
            data = np.load(f, allow_pickle=True)[()]
    
    keys = data.keys()
    return keys, data

In [3]:
def create_df_list(keys, df):
    """
    Creates a list of dataframes
    
    Returns:
        - df list
    """

    df_list = []
    for key in keys:
        df_list.append(total_file_df[key])
    print(df_list)

In [4]:
def get_outbreaks(data, threshold=50):
    """
    Calculates the outbreaks from the actives data.

    Returns:
        - An array with the outbreak peak sizes.
        - An array with the outbreak durations.
    """
    outbreak_peaks = []
    outbreak_widths = []
    counting = False
    current_peak = 0
    start = 0

    for i in range(len(data)):

        if data[i] >= threshold and not counting:
            counting = True
            if current_peak < data[i]:
                current_peak = data[i]
            start = i

        elif data[i] >= threshold and counting:
            if current_peak < data[i]:
                current_peak = data[i]

        elif data[i] < threshold and counting:
            outbreak_peaks.append(current_peak)
            outbreak_widths.append(i-start)
            current_peak = 0
            counting = False

    # Capture cases where timeline ends in an outbreak.
    # Obviously skewers data, but preferable over 0 or infinite outbreaks.
    if not outbreak_peaks and counting:
        outbreak_peaks.append(current_peak)
        outbreak_widths.append(len(data))
    elif not outbreak_peaks and not counting:
        outbreak_peaks.append(0)
        outbreak_widths.append(0)

    return outbreak_peaks, outbreak_widths

In [5]:
def create_peakduration_df(name, data):
    """
    Creates a dataframe of the data

    Returns:
        - Pandas.DataFrame Object 
    """

    total_peaks = []
    total_durations = []
    average_outbreaks = []
    for df in df_list:
        peaks , durations = get_outbreaks(df.ACTIVE)
        total_peaks += peaks
        total_durations += durations
        average_outbreaks.append(len(durations))

    df = pd.DataFrame(data={'PeakHeight': total_peaks, 'PeakDuration': total_durations})
    df["Network"] = name
    return df

In [6]:
def read_filenames(directory):
    """
    Reads the different filenames of a directory
    
    returns:
        - filenames
    """    
    return [directory + name for name in os.listdir(directory)]

In [7]:
def drop_rows_df(df, columnname, condition):
    """
    Drops rows based on a condition
    
    returns:
        - dataframe
    """
    return df[df[columnname] != condition]

In [8]:
path = 'data/'
filenames = read_filenames(path)
names = [file.split('_')[5] for file in filenames]

full_df = pd.DataFrame()
for index in range(len(filenames)):
    
    keys, total_file_df = load_plot_archive(filenames[index])
    df_list = create_df_list(keys, total_file_df)
    df_plot = create_peakduration_df(names[index], df_list)
    full_df = pd.concat([full_df, df_plot])

full_df = drop_rows_df(full_df, 'Network', 'run')
full_df = drop_rows_df(full_df, 'PeakDuration', 401)

for key in keys:
    total_file_df[key].ACTIVE[0:8].plot()


[     QUIESCENT  ACTIVE  JAILED  LEGITIMACY  OUTBREAKS
0         1110       9       0    0.820000          0
1         1008      69      42    0.820000          0
2         1008      69      42    0.788022          1
3          618     392     109    0.788022          1
4          618     392     109    0.764357          1
..         ...     ...     ...         ...        ...
396        938       0     181    0.817360          5
397        969       1     149    0.817360          5
398        969       1     149    0.819992          5
399        958      21     140    0.819992          5
400        958      21     140    0.812462          5

[401 rows x 5 columns],      QUIESCENT  ACTIVE  JAILED  LEGITIMACY  OUTBREAKS
0         1143      14       0    0.820000          0
1          853     256      48    0.820000          0
2          853     256      48    0.784509          1
3          333     716     108    0.784509          1
4          333     716     108    0.769217          1
..

TypeError: 'NoneType' object is not iterable

In [None]:
sns.displot(full_df, x="PeakHeight", hue='Network', fill=True, kind='kde')
sns.displot(full_df, x='PeakDuration', hue='Network', fill=True, kind='kde')