In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [None]:
STREAM_DATA_PATH = Path("data/stream")
EDGE_DATA_PATH = Path("data/edge")

OUT_PATH = Path("out/")


In [None]:
# loading all the data
def load_csv_folder(folder_path):
    dfs = []
    for path in folder_path.iterdir():
        if path.suffix == ".csv":
            df = pd.read_csv(path)
            dfs.append(df)

    return dfs

data = {"stream": load_csv_folder(STREAM_DATA_PATH),
            "edge": load_csv_folder(EDGE_DATA_PATH)}


In [None]:
# remove all data up to first cough and 20 seconds after first cough since we use the example wav
def get_example_wav_data(df):
    m = df["label"].where(df["label"] == "cough").ffill() # create a mask
    df = df[m.notnull()].reset_index(drop=True)

    starting_timestamp = df["timestamp"].min()

    # only look at a 20 second period cause thats how long the example wave is
    df = df[df["timestamp"] < starting_timestamp + 20000]
    df["time_from_start"] = df["timestamp"] - starting_timestamp
    return df

for origin_type, dfs in data.items():
    for i in range(len(dfs)):
        dfs[i] = get_example_wav_data(dfs[i])


In [None]:
def get_prediction_rate(df):
    # returns prediction rate in predictions/sec
    time_range = (df["timestamp"].max() - df["timestamp"].min()) / 1000 # in seconds

    return len(df)/time_range

for origin_type, dfs in data.items():
    prediction_rates = []
    for df in dfs:
        prediction_rates.append(get_prediction_rate(df))
    prediction_rates = np.array(prediction_rates)

    print(f"{origin_type} Mean: {np.mean(prediction_rates)} SD: {np.std(prediction_rates)}")


In [None]:
def get_col_from_df_list(dfs, col_name):
    vals = []
    for df in dfs:
        vals.extend(df[col_name].to_list())

    return np.array(vals)

delays = {
            "Edge Full Delay": get_col_from_df_list(data["edge"], "delay"),
            "Stream Full Delay": get_col_from_df_list(data["stream"], "delay"),
            "Stream Recording Delay": get_col_from_df_list(data["stream"], "record_delay"),
            "Stream Processing Delay": get_col_from_df_list(data["stream"], "process_delay")
}


In [None]:
for delay_type, delay in delays.items():
    print(f"{delay_type} Mean: {np.mean(delay)} SD: {np.std(delay)}")


In [None]:
for delay_type, delay in delays.items():
    data_range = (np.min(delay)-20, np.min(delay)+150)
    plt.figure()
    plt.hist(delay, bins=50, range=data_range)
    plt.ylabel("Frequency (count)")
    plt.xlabel("Delay (ms)")
    plt.xlim(data_range)
    plt.title(delay_type)

    plt.savefig(OUT_PATH / f"{delay_type}_histogram.png")


In [None]:
sizes = [np.mean(delays["Stream Recording Delay"]), np.mean(delays["Stream Processing Delay"])]
labels = ["Recording Delay", "Processing Delay"]

plt.pie(sizes, labels=labels, autopct='%1.1f%%')
plt.title("Composition of Stream Delay")
plt.savefig(OUT_PATH / "stream_delay_composition.png")


In [None]:
to_graph = {"Edge Full Delay": delays["Edge Full Delay"],
            "Stream Processing Delay": delays["Stream Processing Delay"]}

fig, ax = plt.subplots()
ax.boxplot(to_graph.values())
ax.set_xticklabels(to_graph.keys())

ax.set_title("Model Processing Delay")

plt.savefig(OUT_PATH/"processing_delay.png")


In [None]:
for df in data["edge"]:
    plt.plot(df["time_from_start"], df["delay"], alpha=.7)
    plt.ylim(np.min(df["delay"]) - 30, np.max(df["delay"]) + 100)

plt.xlabel("Time From Start (ms)")
plt.ylabel("Delay")
plt.title("Edge Delay Over Time")

plt.savefig(OUT_PATH/"edge_over_time.png")


In [None]:
for df in data["stream"]:
    plt.plot(df["time_from_start"], df["delay"], alpha=.7)
    plt.ylim(np.min(df["delay"]) - 30, np.max(df["delay"]) + 200)

plt.xlabel("Time From Start (ms)")
plt.ylabel("Delay")
plt.title("Stream Delay Over Time")

plt.savefig(OUT_PATH/"stream_over_time.png")
