In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from statistics import mean, stdev

In [None]:
sns.set_theme()

ACTIVITY_TAG_MAP = {
    1: "fall_forward_hands",
    2: "fall_forward_knees",
    3: "fall_backwards",
    4: "fall_sideward",
    5: "fall_sitting_chair",
    6: "walking",
    7: "standing",
    8: "sitting",
    9: "picking_object",
    10: "jumping",
    11: "laying",
}

labels_map = {
    "Ankle": "Tornozelo",
    "Belt": "Cintura",
    "full": "Todos",
    "Neck": "Pescoço",
    "RightPocket": "Bolso",
    "Wrist": "Punho",
}

metrics_map = {
    "recall": "Recall",
    "accuracy": "Acurácia",
    "precision": "Precisão",
    "f1": "f1-score",
}

features_map = {
    "time": "Tempo",
    "frequency": "Frequência",
    "both": "Ambos",
}

In [None]:
results_to_process_dir = Path.cwd() / "results_to_process"
datasets = []
for file in results_to_process_dir.iterdir():
    print(file)
    algorithm = file.name.split("_")[0]
    dataset = pd.read_csv(file, index_col=False)
    dataset["algorithm"] = algorithm
    datasets.append(dataset)

data = pd.concat(datasets)
# data = data.rename({"full"})

In [None]:
datasets_path = Path.cwd() / "datasets"
total_of_each_activity = {position: {} for position in labels_map.keys()}
# total_of_each_activity = {
#     position: {features: 0}
#     for position in labels_map.keys()
#     for features in features_map.keys()
# }

for dataset in datasets_path.iterdir():
    sensor_position, features_domain = dataset.name.rstrip("dataset.csv").split("_", 1)
    features_domain = "both" if not features_domain else features_domain.rstrip("_")

    df = pd.read_csv(dataset, header=0).dropna()
    df = df[df["Tag"] != 20]
    df.groupby("Tag").count()
    total_of_each_activity[sensor_position][features_domain] = {
        ACTIVITY_TAG_MAP[key]: value
        for key, value in df.groupby("Tag")
        .agg({"Tag": ["count"]})
        .to_dict()[("Tag", "count")]
        .items()
    }

In [None]:
mistakes_to_process_dir = Path.cwd() / "mistakes_to_process"
mistakes_dfs = []
for file in mistakes_to_process_dir.iterdir():
    print(file)
    algorithm = file.name.split("_")[0]
    dataset = pd.read_csv(file, index_col=False)
    dataset["algorithm"] = algorithm
    mistakes_dfs.append(dataset)

mistakes_df = pd.concat(mistakes_dfs)
percentage_mistakes_df = mistakes_df.copy()


for position, position_info in total_of_each_activity.items():
    for feature, feature_info in position_info.items():
        for activity, total in feature_info.items():
            percentages = (
                percentage_mistakes_df[
                    (percentage_mistakes_df["sensor_position"] == position)
                    & (percentage_mistakes_df["features_domain"] == feature)
                ][activity]
                / total
            )
            percentage_mistakes_df.loc[
                (percentage_mistakes_df["sensor_position"] == position)
                & (percentage_mistakes_df["features_domain"] == feature),
                activity,
            ] = percentages
            # print(percentages)
            # percentage_mistakes_df[
            #     (percentage_mistakes_df["sensor_position"] == position)
            #     & (percentage_mistakes_df["features_domain"] == feature)
            # ][activity].update(percentages)


percentage_mistakes_df

In [None]:
mistakes_mean = mistakes_df.groupby(["algorithm", "sensor_position"]).agg(
    {activity: ["mean"] for activity in ACTIVITY_TAG_MAP.values()}
)
# mistakes_df.head()
mistakes_mean = mistakes_mean.transpose()

percentage_mistakes_mean = (
    percentage_mistakes_df.groupby(["algorithm", "sensor_position"]).agg(
        {activity: ["mean"] for activity in ACTIVITY_TAG_MAP.values()}
    )
    * 100
)
# mistakes_df.head()
percentage_mistakes_mean = percentage_mistakes_mean.transpose()

In [None]:
data["sensor_position"] = data["sensor_position"].map(labels_map)
data["features_domain"] = data["features_domain"].map(features_map)
data = data.rename(columns={"algorithm": "Algoritmo"})

metrics = ["recall", "accuracy", "precision", "f1"]
features = ["time", "frequency", "both"]
# features = [feature for feature in features_map.values()]

In [None]:
print(data[data["Algoritmo"] == "KNN"].mode()["best_params"][0:2])
print(data[data["Algoritmo"] == "RF"].mode()["best_params"][0:2])
print(data[data["Algoritmo"] == "MLP"].mode()["best_params"][0:2])
print(data[data["Algoritmo"] == "SVM"].mode()["best_params"][0:2])

In [None]:
table = (
    data.groupby(["Algoritmo", "sensor_position", "features_domain"]).agg(
        {
            "recall": ["mean", "std"],
            "precision": ["mean", "std"],
            "f1": ["mean", "std"],
            "accuracy": ["mean", "std"],
        }
    )
    * 100
).reset_index(level=[0, 1, 2])

In [None]:
# Horizontal
for metric in metrics:
    plot = sns.catplot(
        data,
        kind="bar",
        errorbar="sd",
        capsize=0.3,
        x=metric,
        y="sensor_position",
        hue="Algoritmo",
        row="features_domain",
        legend_out=True,
        # sharex=False,
    )

    plot.set(xlim=(0, 1))
    plot.set_axis_labels(metrics_map[metric], "Posição do sensor")
    plot.set_titles("{row_name}")
    # sns.move_legend(plot, "upper left", bbox_to_anchor=(0.55, 0.45))
    # plot.set_yticklabels(size=12)
    for ax in plot.axes:
        ax[0].tick_params(labelbottom=True)
        # plt.axes.Axes()
        # ax.
        # ax.tick_params(labelbottom=True)

    plt.subplots_adjust(bottom=0.1, hspace=0.2)

In [None]:
# # 1 1 1
col_order = ["Tempo", "Frequência", "Ambos"]
for metric in metrics:
    plot = sns.catplot(
        data,
        kind="bar",
        errorbar="sd",
        capsize=0.3,
        x="sensor_position",
        hue="Algoritmo",
        col="features_domain",
        col_order=col_order,
        # col_wrap=2,
        y=metric,
        # sharex=False,
    )

    plot.set(ylim=(0, 1))

    plot.set_axis_labels("Posição do sensor", metrics_map[metric], fontsize=12)
    # plt.legend(loc="upper left", fontsize=12, bbox_to_anchor=(0.55, 0.45))
    # plt.tick_params(labelsize=12)
    plot.set_titles("{col_name}")
    # plt.

    sns.move_legend(
        plot, "upper center", bbox_to_anchor=(0.5, 1.1), frameon=True, ncol=4
    )

    letters = ["a", "b", "c"]
    i = 0

    # for ax in plot.axes:
    #     ax.set_title(f"({letters[i]}) {col_order[i]}")
    #     ax.tick_params(
    #         labelbottom=True,
    #         labelsize=12,
    #     )
    #     ax.set(xlabel=None)

    #     i += 1

    plt.subplots_adjust(bottom=0.1, hspace=0.2)

    plt.savefig(
        f"plots/apresentacao/errorbars_{metric}.png", bbox_inches="tight", dpi=500
    )

In [None]:
# # 2 - 1
col_order = ["Tempo", "Frequência", "Ambos"]
for metric in metrics:
    plot = sns.catplot(
        data,
        kind="bar",
        errorbar="sd",
        capsize=0.3,
        x="sensor_position",
        hue="Algoritmo",
        col="features_domain",
        col_order=col_order,
        col_wrap=2,
        y=metric,
        # sharex=False,
    )

    plot.set(ylim=(0, 1))

    plot.set_axis_labels("Posição do sensor", metrics_map[metric], fontsize=12)
    # plt.legend(loc="upper left", fontsize=12, bbox_to_anchor=(0.55, 0.45))
    # plt.tick_params(labelsize=12)
    # plot.set_titles("")
    # plt.

    sns.move_legend(
        plot, "upper center", bbox_to_anchor=(0.5, 1.05), frameon=True, ncol=4
    )

    letters = ["a", "b", "c"]
    i = 0

    for ax in plot.axes:
        ax.set_title(f"({letters[i]}) {col_order[i]}")
        ax.tick_params(
            labelbottom=True,
            labelsize=12,
        )
        ax.set(xlabel=None)

        i += 1

    plt.subplots_adjust(bottom=0.1, hspace=0.2)

    # subplot_pos = plot.axes[2].get_position().get_points().flatten()
    # print(subplot_pos)
    # x_diff = subplot_pos[2] / 3
    # y_diff = subplot_pos[3] / 5
    # width = subplot_pos[2] - subplot_pos[0]
    # height = subplot_pos[3] - subplot_pos[1]

    # subplot_pos[0] += x_diff
    # # subplot_pos[2] += x_diff
    # subplot_pos[1] -= y_diff
    # subplot_pos[2] = width
    # subplot_pos[3] = height
    # # subplot_pos[3] += y_diff

    # print(subplot_pos)
    # # plot.axes[2].get_position().set_points(subplot_pos)
    # plot.axes[2].set_position(subplot_pos.flatten())

    plt.savefig(f"plots/errorbars_{metric}_2.png", bbox_inches="tight")

In [None]:
# # Individual
# for metric in metrics:

#     for feature in features:


#         plt.figure(figsize=(12, 8))
#         bar_plot = sns.barplot(


#             data[data["features_domain"] == feature],
#             x="sensor_position",
#             y=metric,
#             hue="Algoritmo",
#             order=["Tornozelo", "Bolso", "Cintura", "Punho", "Pescoço", "Todos"],
#             errorbar="sd",
#             capsize=0.3,
#             native_scale=True,
#         )


#         bar_plot.set_ylim(0, 1)


#         bar_plot.set_xlabel("Posição do sensor", fontsize=14)
#         bar_plot.set_ylabel(metrics_map[metric], fontsize=14)

#         plt.legend(fontsize=12)
#         plt.tick_params(labelsize=12)

#         sns.move_legend(bar_plot, "upper center", bbox_to_anchor=(0.5, 1.2))

#         plt.savefig(f"plots/errorbar_{metric}_{feature}.png", bbox_inches="tight")

In [None]:
# Choose best model between folds
    best_params.append(
            {"params": classifier.best_params_, "recall": fold_metrics["recall"]}
        )
    sorted_by_recall = sorted(best_params, key=lambda d: d["recall"], reverse=True)
    print(sorted_by_recall)
    params = [str(d["params"]) for d in sorted_by_recall]
    print(params)
    chosen_params = multimode(params)