In [None]:
import pandas as pd
def read_from_csv(data_name, header=0, names=None):
    if "tsv" in data_name:
        data = pd.read_csv(data_name,
                            sep='\t',
                            encoding = "utf-8",
                            engine = "python",
                            header = header,
                            names = names)
    elif "csv" in data_name:
        data = pd.read_csv(data_name,
                        encoding = "utf-8",
                        engine = "python",
                        header = header,
                        names = names)
    else:
        raise NotImplementedError("Given data file type is not supported yet.")
    return data

## Plot results

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")
sns.set_context("paper")
sns.color_palette()
rc = {
    'figure.figsize':(5,4),
      'axes.facecolor':'white',
      'axes.grid' : True,
      'grid.color': '.8',
      'text.color': 'black',
      'xtick.color': 'black',
      'ytick.color': 'black',
      'font.family':'Times New Roman',
      'font.size' : 16}
plt.rcParams.update(rc)

data_name_orig2display = {'twitter-hate-speech-tsa': 'Twitter-Hate-Speech', 
                            'civil-comments': 
                                # r'CC-5k-$\rho$=11.5', 
                                    'Civil-Comments',
                            'civil-comments-20k': r'CC-20k-$\rho$=11.5', 
                            'civil-comments-40k': r'CC-40k-$\rho$=11.5',
                            'civil-comments-5k-7p5': r'CC-5k-$\rho$=7.5',
                            'gibert-2018-shs': 'Gibert-2018', 
                            'us-election-2020': 'US-Election-2020', 
                            'cmsb-tsd': 'CMSB', 
                            'waseem-and-hovy-2016': 'Waseem-and-Hovy-2016',
                            'founta-2018-thas': 'Founta-2018', 
                            'davidson-thon': 'Davidson-2017', 
                            'ami': 'AMI-2018'}

In [None]:
df_all_mean_results = read_from_csv("mean_results.csv")
df_all_agg_results = read_from_csv("agg_results.csv")
def convert_str2number(x):
    if type(x) == str and x != "-":
        return ast.literal_eval(x)
    else:
        return x
for var in variant_cols[1:-1]:
    df_all_mean_results[var] = df_all_mean_results[var].apply(lambda x: convert_str2number(x))

In [None]:
df_all_mean_results.variant.unique()

In [None]:
sns.color_palette()

In [None]:
sns.color_palette()[1]

In [None]:
sns.color_palette("YlOrBr", as_cmap=True).get_over()

## Std

In [None]:
fig, ax = plt.subplots(figsize=(4,2.6))#figsize=(5,2.5)4.6
df_std = df_all_agg_results[["data_name", "test_f1_macro_std"]]
df_std = df_std.rename({"data_name":"Dataset", "test_f1_macro_std":"Standard Deviation of Macro F1 Scores"}, axis=1)
df_std["Dataset"] = df_std['Dataset'].map(data_name_orig2display)
data_order = [data_name_orig2display[data_name] for data_name in eval_data_names]
if cc:
    palette = {r'CC-5k-$\rho$=11.5': (0.8666666666666667, 0.5176470588235295, 0.3215686274509804),
               r'CC-5k-$\rho$=7.5': [1.        , 0.8        , 0.6, 1.        ],
               r'CC-20k-$\rho$=11.5': [0.8       , 0.35, 0.1, 0.2        ],
               r'CC-40k-$\rho$=11.5':[0.7       , 0.25, 0.1, 0.2       ],
               } 
else:
    palette = sns.color_palette()
sns.boxplot(x="Standard Deviation of Macro F1 Scores", y="Dataset", data=df_std, order=data_order, palette=palette)
# ax.set_xticks([0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0])
ax.figure.savefig('cc_std_distribution.svg', dpi=600, bbox_inches="tight")

### Random Oversampling

#### Overfitting

In [None]:
# rho_name = "augmentation_rho"
# data_clear_overfitting = ["civil-comments", "davidson-thon"]
# which_data_to_check = (df_all_mean_results["variant"].isin(["baseline", "augmentation_external_data"])) & (df_all_mean_results["data_name"].isin(data_clear_overfitting))
# df_overfitting = df_all_mean_results[which_data_to_check][["data_name", rho_name, "train_f1_macro", "val_f1_macro", "test_f1_macro"]]
# def compute_deviation_to_baseline(row, split):
#     baseline_condition = (df_overfitting["data_name"] == row["data_name"]) & (df_overfitting[rho_name] == "-")
#     baseline_value = df_overfitting.loc[baseline_condition, f"{split}_f1_macro"].values[0]
#     return row[f"{split}_f1_macro"] - baseline_value
# for split in ["train", "val", "test"]:
#     df_overfitting[f"{split}_f1_macro_delta"] = df_overfitting.apply(lambda row: compute_deviation_to_baseline(row, split), axis=1)
# df_overfitting = df_overfitting[df_overfitting[rho_name] != "-"]
# df_overfitting = df_overfitting[[rho_name, "data_name", "train_f1_macro_delta", "val_f1_macro_delta", "test_f1_macro_delta"]]
# df_overfitting

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
data_clear_overfitting = ["twitter-hate-speech-tsa", "civil-comments", "cmsb-tsd", "founta-2018-thas", "davidson-thon", ]
data_no_overfitting = ["gibert-2018-shs", "us-election-2020", "ami"]

# fig_name = "no_overfitting"
# data = data_no_overfitting
# rho_ticks = [1,2,3,5,7.5]

fig_name = "clear_overfitting"
data = data_clear_overfitting
rho_ticks = [1,2,3,5,7.5,10,15]

which_data_to_check = ((df_all_mean_results["variant"].isin(["baseline", "sampling_modifiedRS_oversampling"])) 
                        & (df_all_mean_results["data_name"].isin(data))
                        )
rho_name = "sampling_modifiedRS_rho"
splits = ["train", "val"]
df_overfitting = df_all_mean_results[which_data_to_check][["data_name", rho_name]+[f"{split}_f1_macro" for split in splits]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_overfitting["data_name"] == row["data_name"]) & (df_overfitting[rho_name] == "-")
    baseline_value = df_overfitting.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
for split in splits:
    df_overfitting[f"{split}_f1_macro_delta"] = df_overfitting.apply(lambda row: compute_deviation_to_baseline(row, split), axis=1)
df_overfitting = df_overfitting[df_overfitting[rho_name] != "-"]
df_overfitting = df_overfitting[["data_name", rho_name]+[f"{split}_f1_macro_delta" for split in splits]]

df_overfitting_expanded = pd.DataFrame(columns=[rho_name, "data_name", "split", "f1_delta"])
rhos_unique = df_overfitting[rho_name].unique().tolist()[:-1]
data_names_unique = df_overfitting.data_name.unique().tolist()
data_names_list = []
for data_name in data_names_unique:
    data_names_list += [data_name_orig2display[data_name]] * len(splits) * len(rhos_unique)
df_overfitting_expanded["data_name"] = data_names_list
rhos_list = []
for rho in rhos_unique:
    rhos_list += [rho] * len(splits)
df_overfitting_expanded[rho_name] = rhos_list * len(data_names_unique)
df_overfitting_expanded["split"] = splits * len(rhos_unique) * len(data_names_unique)
for data_name in data_names_unique:
    for split in splits:
        for rho in rhos_unique:
            where_to_assign = ((df_overfitting_expanded["data_name"] == data_name_orig2display[data_name]) 
                                & (df_overfitting_expanded["split"] == split)
                                & (df_overfitting_expanded[rho_name] == rho))
            value_from_where = ((df_overfitting["data_name"] == data_name)
                                & (df_overfitting[rho_name] == rho))
            if len(df_overfitting[value_from_where]) > 0:
                value = df_overfitting.loc[value_from_where, f"{split}_f1_macro_delta"].values[0]
                df_overfitting_expanded.loc[where_to_assign, "f1_delta"] = value
df_overfitting_expanded = df_overfitting_expanded.rename({"data_name":"Dataset", "split":"Split"}, axis=1)

fig, ax = plt.subplots(figsize=(4.8,4.95))
sns.lineplot(x=rho_name, y="f1_delta", data=df_overfitting_expanded, 
                hue="Dataset", style="Split", marker="o", ax=ax)#
ax.set_xticks(rho_ticks, labels=rho_ticks)
ax.set_xlabel(r"Desired $\rho^{\prime}$ for Random Oversampling")
# delta_ticks = [y for y in list(range(-2, 8, 2))]
# ax.set_yticks(delta_ticks)
ax.set_ylabel(r'Deviation from Baseline: $\delta_{ROS}$')
# '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted'
ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
ax.figure.tight_layout()
ax.figure.savefig(f'ROS_{fig_name}.svg', dpi=600, bbox_inches="tight")

In [None]:
data2color = {"Twitter-Hate-Speech": sns.color_palette()[0], 
                            "Civil-Comments": sns.color_palette()[1],
                            'Gibert-2018': sns.color_palette()[2],
                            'US-Election-2020': sns.color_palette()[3],
                            'CMSB': sns.color_palette()[4],
                            'Founta-2018': sns.color_palette()[5],
                            'Davidson-2017': sns.color_palette()[6],
                            'AMI-2018': sns.color_palette()[7],}

In [None]:
rho_name = "sampling_modifiedRS_rho"
splits = ["train", "val"]
data_clear_overfitting_binary = ["twitter-hate-speech-tsa", "civil-comments", "cmsb-tsd", ]
data_clear_overfitting_multi = ["founta-2018-thas", "davidson-thon", ]
which_data_to_check = ((df_all_mean_results["variant"] == "sampling_modifiedRS_oversampling")
                       & (df_all_mean_results["data_name"].isin(data_clear_overfitting_binary+data_clear_overfitting_multi)))
df_overfitting = df_all_mean_results[which_data_to_check][["data_name", rho_name]+[f"{split}_f1_macro" for split in splits]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_all_mean_results["data_name"] == row["data_name"]) & (df_all_mean_results["variant"] == "baseline")
    baseline_value = df_all_mean_results.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
for split in splits:
    df_overfitting[f"{split}_f1_macro_delta"] = df_overfitting.apply(lambda row: compute_deviation_to_baseline(row, split), axis=1)
df_overfitting = df_overfitting[["data_name", rho_name]+[f"{split}_f1_macro_delta" for split in splits]]

data2ifoverfitting = {data_name: "Overfitting:Binary Dataset" for data_name in data_clear_overfitting_binary}
data2ifoverfitting.update({data_name: "Overfitting: Multi-Class Dataset" for data_name in data_clear_overfitting_multi})
df_overfitting["IfOverfitting"] = df_overfitting["data_name"].map(data2ifoverfitting)
df_overfitting["data_name"] = df_overfitting['data_name'].map(data_name_orig2display)
df_overfitting = df_overfitting.rename({"data_name":"Dataset"}, axis=1)

rc = {
    # 'figure.figsize':(5,4),
      'axes.facecolor':'white',
      'axes.grid' : True,
      'grid.color': '.8',
      'text.color': 'black',
      'xtick.color': 'black',
      'ytick.color': 'black',
      'font.family':'Times New Roman',
      'font.size' : 20}
plt.rcParams.update(rc)
g = sns.FacetGrid(df_overfitting, col="IfOverfitting", hue="Dataset", sharex=False, sharey=False, height=4, aspect=1, palette=data2color)
g.map(sns.lineplot, "sampling_modifiedRS_rho", "val_f1_macro_delta", marker="o", linestyle='--')
g.map(sns.lineplot, "sampling_modifiedRS_rho", "train_f1_macro_delta", marker="o")
# g.add_legend()
# g._legend.remove()

axs = list(g.axes_dict.values())
axs[0].set_title('Overfitting: Binary Dataset')
rho_ticks = [1,2,3,5,7.5,10]
axs[0].set_xticks(rho_ticks, labels=rho_ticks)
axs[1].set_title('Overfitting: Multi-Class Dataset')
rho_ticks = [1,2,3,5,7.5, 10, 15]
axs[1].set_xticks(rho_ticks, labels=rho_ticks)

from matplotlib.lines import Line2D
custom_lines = [
                Line2D([0], [0], color=sns.color_palette()[0], lw=1, linestyle="solid", marker="o", label="Twitter-Hate-Speech"),
                Line2D([0], [0], color=sns.color_palette()[1], lw=1, linestyle="solid", marker="o", label="Civil-Comments"),
                Line2D([0], [0], color=sns.color_palette()[4], lw=1, linestyle="solid", marker="o", label="CMSB"),
                Line2D([0], [0], color="black", lw=1, linestyle="solid", label="Train"),
                Line2D([0], [0], color="black", lw=1, linestyle="dashed", label="Val"),
                ]
axs[0].legend(handles=custom_lines, title="Dataset", loc="lower right", bbox_to_anchor=(1, 0.))#
custom_lines = [
                Line2D([0], [0], color=sns.color_palette()[5], lw=1, linestyle="solid", marker="o", label="Founta-2018"),
                Line2D([0], [0], color=sns.color_palette()[6], lw=1, linestyle="solid", marker="o", label="Davidson-2017"),
                Line2D([0], [0], color="black", lw=1, linestyle="solid", label="Train"),
                Line2D([0], [0], color="black", lw=1, linestyle="dashed", label="Val"),
                ]
axs[1].legend(handles=custom_lines, title="Dataset", loc="upper right", bbox_to_anchor=(1, 1.))#

for ax in axs:
    ax.set_ylabel(r"$\delta_{ROS}$")
    ax.set_xlabel(r"$\rho^{\prime}$")
    ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
sns.despine(fig=None, ax=None, top=False, right=False, left=False, bottom=False, offset=False, trim=False)
# sns.move_legend(g, "upper left", bbox_to_anchor=(0.865, 0.5))
g.figure.tight_layout()
g.figure.savefig('ROS_overfitting.svg', dpi=600, bbox_inches="tight")

#### Changes with rho

In [None]:
rho_name = "sampling_modifiedRS_rho"
data_clear_ros = ["founta-2018-thas"]
which_data_to_check = ((df_all_mean_results["variant"].isin(["baseline", "sampling_modifiedRS_oversampling"])) 
                        # & ~(df_all_mean_results["data_name"].isin(data_clear_ros))
                        )
df_ros = df_all_mean_results[which_data_to_check][["data_name", rho_name, "val_f1_macro"]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_ros["data_name"] == row["data_name"]) & (df_ros[rho_name] == "-")
    baseline_value = df_ros.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
df_ros["val_f1_macro_delta"] = df_ros.apply(lambda row: compute_deviation_to_baseline(row, "val"), axis=1)
df_ros = df_ros[df_ros[rho_name] != "-"]
df_ros = df_ros[[rho_name, "data_name", "val_f1_macro_delta"]]
df_ros = df_ros.rename({"data_name":"Dataset"}, axis=1)
df_ros["Dataset"] = df_ros['Dataset'].map(data_name_orig2display)
rhos_unique = df_ros.sampling_modifiedRS_rho.unique().tolist()

fig, ax = plt.subplots(figsize=(5,4.6)) # 
sns.lineplot(x="sampling_modifiedRS_rho", y="val_f1_macro_delta", data=df_ros, 
                hue="Dataset", marker="o", ax=ax)#
rho_ticks = [1,2,3,5,7.5,10,15]
ax.set_xticks(rho_ticks, labels=rho_ticks)
ax.set_xlabel(r"$\rho^{\prime}$")
ax.set_ylabel(r"$\delta_{ROS}$")
ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
ax.figure.tight_layout()
ax.figure.savefig('ROS_trend.svg', dpi=600, bbox_inches="tight")

In [None]:
rho_name = "sampling_modifiedRS_rho"
data_noclear_rs = ["gibert-2018-shs", "us-election-2020", "ami"]
which_data_to_check = ((df_all_mean_results["variant"].isin(["sampling_modifiedRS_oversampling", "sampling_modifiedRS_undersampling"])) 
                       & ~(df_all_mean_results["data_name"].isin(data_noclear_rs))
                       )
df_ros_rus = df_all_mean_results[which_data_to_check][["data_name", "variant", rho_name, "val_f1_macro"]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_all_mean_results["data_name"] == row["data_name"]) & (df_all_mean_results["variant"] == "baseline")
    baseline_value = df_all_mean_results.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
df_ros_rus["val_f1_macro_delta"] = df_ros_rus.apply(lambda row: compute_deviation_to_baseline(row, "val"), axis=1)
df_ros_rus = df_ros_rus[[rho_name, "variant", "data_name", "val_f1_macro_delta"]]
df_ros_rus = df_ros_rus.rename({"data_name":"Dataset"}, axis=1)
df_ros_rus["Dataset"] = df_ros_rus['Dataset'].map(data_name_orig2display)

rc = {
    # 'figure.figsize':(5,4),
      'axes.facecolor':'white',
      'axes.grid' : True,
      'grid.color': '.8',
      'text.color': 'black',
      'xtick.color': 'black',
      'ytick.color': 'black',
      'font.family':'Times New Roman',
      'font.size' : 21}
plt.rcParams.update(rc)
g = sns.FacetGrid(df_ros_rus, col="variant", hue="Dataset", sharey=False, height=4.2, aspect=1, legend_out=False, palette=data2color)
g.map(sns.lineplot, "sampling_modifiedRS_rho", "val_f1_macro_delta", marker="o")
# g.add_legend()
rho_ticks = [1,2,3,5,7.5,10,15]
axs = list(g.axes_dict.values())
axs[0].set_title('Random Oversampling')
axs[0].set_ylabel(r"$\delta_{ROS}$")
# axs[0].set_yticks([-3,-2,-1,0,1,2], labels=[-3,-2,-1,0,1,2])
axs[1].set_title('Random Undersampling')
axs[1].set_ylabel(r"$\delta_{RUS}$")

from matplotlib.lines import Line2D
custom_lines = [
                Line2D([0], [0], color=sns.color_palette()[0], lw=1, linestyle="solid", marker="o", label="Twitter-Hate-Speech"),
                Line2D([0], [0], color=sns.color_palette()[1], lw=1, linestyle="solid", marker="o", label="Civil-Comments"),
                Line2D([0], [0], color=sns.color_palette()[4], lw=1, linestyle="solid", marker="o", label="CMSB"),
                Line2D([0], [0], color=sns.color_palette()[5], lw=1, linestyle="solid", marker="o", label="Founta-2018"),
                Line2D([0], [0], color=sns.color_palette()[6], lw=1, linestyle="solid", marker="o", label="Davidson-2017"),
                ]
axs[0].legend(handles=custom_lines, title="Dataset", loc="center right", bbox_to_anchor=(1, 0.21))#
custom_lines = [
                Line2D([0], [0], color=sns.color_palette()[0], lw=1, linestyle="solid", marker="o", label="Twitter-Hate-Speech"),
                Line2D([0], [0], color=sns.color_palette()[1], lw=1, linestyle="solid", marker="o", label="Civil-Comments"),
                Line2D([0], [0], color=sns.color_palette()[4], lw=1, linestyle="solid", marker="o", label="CMSB"),
                Line2D([0], [0], color=sns.color_palette()[5], lw=1, linestyle="solid", marker="o", label="Founta-2018"),
                Line2D([0], [0], color=sns.color_palette()[6], lw=1, linestyle="solid", marker="o", label="Davidson-2017"),
                ]
axs[1].legend(handles=custom_lines, title="Dataset", loc="lower right", bbox_to_anchor=(1, 0.))#
# rus_yticks = list(range(-10, 2, 2))
# axs[1].set_yticks(rus_yticks, labels=rus_yticks)
for ax in axs:
    ax.set_xticks(rho_ticks, labels=rho_ticks)
    ax.set_xlabel(r"$\rho^{\prime}$")
    ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
sns.despine(fig=None, ax=None, top=False, right=False, left=False, bottom=False, offset=None, trim=False)
# sns.move_legend(g, loc='lower left', bbox_to_anchor=(0.3, 0.19))
g.figure.tight_layout()
g.figure.savefig('ROS_RUS_trend.svg', dpi=600)

## Random Undersampling

In [None]:
data_noclear_lossinfo_imbalance = ["founta-2018-thas"]
which_data_to_check = ((df_all_mean_results["variant"].isin(["baseline", "sampling_modifiedRS_undersampling"]))
                        # & (~df_all_mean_results["data_name"].isin(data_noclear_lossinfo_imbalance))
                        )
df_rus = df_all_mean_results[which_data_to_check][["data_name", "sampling_modifiedRS_rho", "val_f1_macro"]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_rus["data_name"] == row["data_name"]) & (df_rus["sampling_modifiedRS_rho"] == "-")
    baseline_value = df_rus.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
df_rus["val_f1_macro_delta"] = df_rus.apply(lambda row: compute_deviation_to_baseline(row, "val"), axis=1)
df_rus = df_rus[df_rus["sampling_modifiedRS_rho"] != "-"]
df_rus = df_rus[["sampling_modifiedRS_rho", "data_name", "val_f1_macro_delta"]]
df_rus = df_rus.rename({"data_name":"Dataset"}, axis=1)
df_rus["Dataset"] = df_rus['Dataset'].map(data_name_orig2display)
rhos_unique = df_rus.sampling_modifiedRS_rho.unique().tolist()

fig, ax = plt.subplots(figsize=(4,4))
sns.lineplot(x="sampling_modifiedRS_rho", y="val_f1_macro_delta", data=df_rus, 
                hue="Dataset", marker="o", ax=ax)#
rho_ticks = [1,2,3,5,7.5,10,15]
ax.set_xticks(rho_ticks, labels=rho_ticks)
ax.set_xlabel(r"$\rho^{\prime}$")
ax.set_ylabel(r"$\delta_{RUS}$")
ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
ax.figure.tight_layout()
ax.figure.savefig('RUS_trend.svg', dpi=600, bbox_inches="tight")

## Combi RS

In [None]:
# data_noclear_lossinfo_imbalance = ["founta-2018-thas"]
which_data_to_check = ((df_all_mean_results["variant"].isin(["baseline", "sampling_weightedRS_combi"]))
                        # & (~df_all_mean_results["data_name"].isin(data_noclear_lossinfo_imbalance))
                        )
rho_name = "sampling_weightedRS_percentage"
df_rus = df_all_mean_results[which_data_to_check][["data_name", rho_name, "val_f1_macro"]]
def compute_deviation_to_baseline(row, split):
    baseline_condition = (df_rus["data_name"] == row["data_name"]) & (df_rus[rho_name] == "-")
    baseline_value = df_rus.loc[baseline_condition, f"{split}_f1_macro"].values[0]
    return row[f"{split}_f1_macro"] - baseline_value
df_rus["val_f1_macro_delta"] = df_rus.apply(lambda row: compute_deviation_to_baseline(row, "val"), axis=1)
df_rus = df_rus[df_rus[rho_name] != "-"]
df_rus = df_rus[[rho_name, "data_name", "val_f1_macro", "val_f1_macro_delta"]]
df_rus = df_rus.rename({"data_name":"Dataset"}, axis=1)
df_rus["Dataset"] = df_rus['Dataset'].map(data_name_orig2display)
rhos_unique = df_rus[rho_name].unique().tolist()

import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots()#figsize=(4.3,4.4)
sns.lineplot(x=rho_name, y="val_f1_macro_delta", data=df_rus, 
                hue="Dataset", marker="o", ax=ax)#
rho_ticks = rhos_unique
ax.set_xticks(rho_ticks, labels=rho_ticks)
ax.set_xlabel(r"Sampling Percentage for Combi RS")
# delta_ticks = [y for y in list(range(-2, 8, 2))]
# ax.set_yticks(delta_ticks)
ax.set_ylabel(r"Deviation from Baseline: $\delta_{Combi RS}$")
# '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted'
ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
ax.figure.tight_layout()
ax.figure.savefig('results/Combi-RS_trend.svg', dpi=600, bbox_inches="tight")

## Compare ROS, Augmentation

In [None]:
variant_to_check = ["baseline", "sampling_modifiedRS_oversampling", "augmentation_bert", "augmentation_abusive_lexicon", "augmentation_external_data"]
which_data_to_check = (df_all_agg_results["variant"].isin(variant_to_check))
metrics_to_check = ["test_f1_macro", "test_f1_per_label_0", "test_f1_per_label_1", "test_f1_per_label_2", "test_f1_per_label_3", "test_f1_per_label_4"]
df_mean_ros_aug = df_all_agg_results[which_data_to_check][["data_name", "variant", ] + metrics_to_check]
df_mean_ros_aug

In [None]:
df_mean_ros_aug_by_data = pd.DataFrame(columns=["variant"] + sum([[f"{data_name}_{metric}" for metric in metrics_to_check] for data_name in data_names], []))
df_mean_ros_aug_by_data["variant"] = variant_to_check
for data_name in data_names:
    for variant in variant_to_check:
        conditions = (df_mean_ros_aug["data_name"] == data_name) & (df_mean_ros_aug["variant"] == variant)
        if len(df_mean_ros_aug[conditions]) > 0:
            value = df_mean_ros_aug.loc[conditions, "test_f1_macro"].values[0]
            df_mean_ros_aug_by_data.loc[df_mean_ros_aug_by_data["variant"] == variant, f"{data_name}_test_f1_macro"] = value
            for i in range(5):
                value = df_mean_ros_aug.loc[conditions, f"test_f1_per_label_{i}"].values[0]
                df_mean_ros_aug_by_data.loc[df_mean_ros_aug_by_data["variant"] == variant, f"{data_name}_test_f1_per_label_{i}"] = value
df_mean_ros_aug_by_data.to_csv("compare_ROS_augmentation.csv", index=False)

### Weighted Cross Entropy

In [None]:
conditions = (df_all_mean_results["variant"] == "wce") & (df_all_mean_results["num_classes"] == 2)
df_mean_wce = df_all_mean_results[conditions][["data_name", "wce_alpha", "val_f1_macro", "val_f1_per_label_0", "val_f1_per_label_1"]]
def compute_deviation_to_baseline(row):
    baseline_condition = (df_all_mean_results["data_name"] == row["data_name"]) & (df_all_mean_results["variant"] == "baseline")
    baseline_value = df_all_mean_results.loc[baseline_condition, "val_f1_per_label_1"].values[0]
    return row['val_f1_per_label_1'] - baseline_value
df_mean_wce["pos_f1_delta"] = df_mean_wce.apply(lambda row: compute_deviation_to_baseline(row), axis=1)
df_mean_wce["data_name"] = df_mean_wce["data_name"].map(data_name_orig2display)
df_mean_wce

In [None]:
conditions = (df_all_mean_results["variant"] == "wce") & (df_all_mean_results["num_classes"] > 2)
df_mean_wce_multi = df_all_mean_results[conditions][["data_name", "wce_alpha", "val_f1_macro", 'val_f1_per_label_0', 'val_f1_per_label_1', 'val_f1_per_label_2', 'val_f1_per_label_3', 'val_f1_per_label_4']]
df_mean_wce_multi["data_name"] = df_mean_wce_multi["data_name"].map(data_name_orig2display)
df_mean_wce_multi

In [None]:
# wce_alpha_unique = df_mean_wce.wce_alpha.unique().tolist()
wce_alpha_unique = [0.1, 0.25, 0.75, 0.878, 0.888, 0.9, 0.93, 0.99]
# data_name_unique = df_mean_wce.data_name.unique().tolist()
data_name_unique = [data_name_orig2display[data_name] for data_name in ["twitter-hate-speech-tsa", "gibert-2018-shs", "us-election-2020"]]
df_wce_delta_by_data = pd.DataFrame(columns=["wce_alpha"] + sum([[f"{data_name}_macro_f1", 
                                                                    f"{data_name}_f1_non-hate", 
                                                                    f"{data_name}_f1_hate"] for data_name in data_name_unique], []))
df_wce_delta_by_data["wce_alpha"] = wce_alpha_unique
for data_name in data_name_unique:
    for wce_alpha in wce_alpha_unique:
        conditions = (df_mean_wce["data_name"] == data_name) & (df_mean_wce["wce_alpha"] == wce_alpha)
        if len(df_mean_wce[conditions]) > 0:
            value = df_mean_wce.loc[conditions, "val_f1_macro"].values[0]
            df_wce_delta_by_data.loc[df_wce_delta_by_data["wce_alpha"] == wce_alpha, f"{data_name}_macro_f1"] = value
            value = df_mean_wce.loc[conditions, "val_f1_per_label_0"].values[0]
            df_wce_delta_by_data.loc[df_wce_delta_by_data["wce_alpha"] == wce_alpha, f"{data_name}_f1_non-hate"] = value
            value = df_mean_wce.loc[conditions, "val_f1_per_label_1"].values[0]
            df_wce_delta_by_data.loc[df_wce_delta_by_data["wce_alpha"] == wce_alpha, f"{data_name}_f1_hate"] = value
df_wce_delta_by_data = df_wce_delta_by_data.sort_values(by=['wce_alpha']).reset_index(drop=True)
df_wce_delta_by_data

### Focal Loss

In [None]:
conditions = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["num_classes"] == 2)
df_mean_fl = df_all_mean_results[conditions][["data_name", "fl_gamma", "val_f1_macro", "val_f1_per_label_0", "val_f1_per_label_1"]]
df_mean_fl["data_name"] = df_mean_fl["data_name"].map(data_name_orig2display)

fl_gamma_unique = df_mean_fl.fl_gamma.unique().tolist()
data_name_unique = df_mean_fl.data_name.unique().tolist()
df_mean_fl_by_data = pd.DataFrame()
df_mean_fl_by_data["fl_gamma"] = fl_gamma_unique
for data_name in data_name_unique:
    for fl_gamma in fl_gamma_unique:
        conditions = (df_mean_fl["data_name"] == data_name) & (df_mean_fl["fl_gamma"] == fl_gamma)
        if len(df_mean_fl[conditions]) > 0:
            value = df_mean_fl.loc[conditions, "val_f1_macro"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_macro_f1"] = value
            value = df_mean_fl.loc[conditions, "val_f1_per_label_0"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_f1_non-hate"] = value
            value = df_mean_fl.loc[conditions, "val_f1_per_label_1"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_f1_hate"] = value
df_mean_fl_by_data = df_mean_fl_by_data.sort_values(by=['fl_gamma']).reset_index(drop=True)
df_mean_fl_by_data

In [None]:
conditions = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["data_name"] == "davidson-thon")
df_mean_fl = df_all_mean_results[conditions][["data_name", "fl_gamma", "val_f1_macro", "val_f1_per_label_0", "val_f1_per_label_1", "val_f1_per_label_2", "val_f1_per_label_3"]]
df_mean_fl["data_name"] = df_mean_fl["data_name"].map(data_name_orig2display)

fl_gamma_unique = df_mean_fl.fl_gamma.unique().tolist()
data_name_unique = df_mean_fl.data_name.unique().tolist()
df_mean_fl_by_data = pd.DataFrame()
df_mean_fl_by_data["fl_gamma"] = fl_gamma_unique
for data_name in data_name_unique:
    for fl_gamma in fl_gamma_unique:
        conditions = (df_mean_fl["data_name"] == data_name) & (df_mean_fl["fl_gamma"] == fl_gamma)
        if len(df_mean_fl[conditions]) > 0:
            value = df_mean_fl.loc[conditions, "val_f1_macro"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_macro_f1"] = value
            for i in range(3):
                value = df_mean_fl.loc[conditions, f"val_f1_per_label_{i}"].values[0]
                df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_f1_per_label_{i}"] = value
df_mean_fl_by_data = df_mean_fl_by_data.sort_values(by=['fl_gamma']).reset_index(drop=True)
df_mean_fl_by_data = df_mean_fl_by_data.rename({"data_name":"Dataset"}, axis=1)
df_mean_fl_by_data

In [None]:
conditions = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["data_name"] == "founta-2018-thas")
df_mean_fl = df_all_mean_results[conditions][["data_name", "fl_gamma", "val_f1_macro", "val_f1_per_label_0", "val_f1_per_label_1", "val_f1_per_label_2", "val_f1_per_label_3"]]
df_mean_fl["data_name"] = df_mean_fl["data_name"].map(data_name_orig2display)

fl_gamma_unique = df_mean_fl.fl_gamma.unique().tolist()
data_name_unique = df_mean_fl.data_name.unique().tolist()
df_mean_fl_by_data = pd.DataFrame()
df_mean_fl_by_data["fl_gamma"] = fl_gamma_unique
for data_name in data_name_unique:
    for fl_gamma in fl_gamma_unique:
        conditions = (df_mean_fl["data_name"] == data_name) & (df_mean_fl["fl_gamma"] == fl_gamma)
        if len(df_mean_fl[conditions]) > 0:
            value = df_mean_fl.loc[conditions, "val_f1_macro"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_macro_f1"] = value
            for i in range(4):
                value = df_mean_fl.loc[conditions, f"val_f1_per_label_{i}"].values[0]
                df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_f1_per_label_{i}"] = value
df_mean_fl_by_data = df_mean_fl_by_data.sort_values(by=['fl_gamma']).reset_index(drop=True)
df_mean_fl_by_data = df_mean_fl_by_data.rename({"data_name":"Dataset"}, axis=1)
df_mean_fl_by_data

In [None]:
conditions = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["data_name"] == "ami")
df_mean_fl = df_all_mean_results[conditions][["data_name", "fl_gamma", "val_f1_macro", "val_f1_per_label_0", "val_f1_per_label_1", "val_f1_per_label_2", "val_f1_per_label_3", "val_f1_per_label_4"]]
df_mean_fl["data_name"] = df_mean_fl["data_name"].map(data_name_orig2display)

fl_gamma_unique = df_mean_fl.fl_gamma.unique().tolist()
data_name_unique = df_mean_fl.data_name.unique().tolist()
df_mean_fl_by_data = pd.DataFrame()
df_mean_fl_by_data["fl_gamma"] = fl_gamma_unique
for data_name in data_name_unique:
    for fl_gamma in fl_gamma_unique:
        conditions = (df_mean_fl["data_name"] == data_name) & (df_mean_fl["fl_gamma"] == fl_gamma)
        if len(df_mean_fl[conditions]) > 0:
            value = df_mean_fl.loc[conditions, "val_f1_macro"].values[0]
            df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_macro_f1"] = value
            for i in range(5):
                value = df_mean_fl.loc[conditions, f"val_f1_per_label_{i}"].values[0]
                df_mean_fl_by_data.loc[df_mean_fl_by_data["fl_gamma"] == fl_gamma, f"{data_name}_f1_per_label_{i}"] = value
df_mean_fl_by_data = df_mean_fl_by_data.sort_values(by=['fl_gamma']).reset_index(drop=True)
df_mean_fl_by_data = df_mean_fl_by_data.rename({"data_name":"Dataset"}, axis=1)
df_mean_fl_by_data

In [None]:
conditions = (df_all_mean_results["variant"] == "wfl") & (df_all_mean_results["num_classes"] == 2)
df_mean_wfl = df_all_mean_results[conditions][["data_name", "fl_gamma", "wce_alpha", "test_f1_macro"]]
df_mean_wfl["data_name"] = df_mean_wfl["data_name"].map(data_name_orig2display)

fl_gamma_unique = df_mean_wfl.fl_gamma.unique().tolist()
wce_alpha_unique = [0.1, 0.25, 0.75, 0.9, 0.99]
data_name_unique = df_mean_wfl.data_name.unique().tolist()
df_mean_wfl_by_data = pd.DataFrame()
df_mean_wfl_by_data["fl_gamma"] = fl_gamma_unique * len(wce_alpha_unique)
df_mean_wfl_by_data["wce_alpha"] = sum([[alpha] * len(fl_gamma_unique) for alpha in wce_alpha_unique], [])
for data_name in data_name_unique:
    for fl_gamma in fl_gamma_unique:
        for wce_alpha in wce_alpha_unique:
            value_is_from_where = (df_mean_wfl["data_name"] == data_name) & (df_mean_wfl["fl_gamma"] == fl_gamma) & (df_mean_wfl["wce_alpha"] == wce_alpha)
            if len(df_mean_wfl[value_is_from_where]) > 0:
                value = df_mean_wfl.loc[value_is_from_where, "test_f1_macro"].values[0]
                where_to_assign = (df_mean_wfl_by_data["fl_gamma"] == fl_gamma) & (df_mean_wfl_by_data["wce_alpha"] == wce_alpha)
                df_mean_wfl_by_data.loc[where_to_assign, f"{data_name}_macro_f1"] = value
df_mean_wfl_by_data = df_mean_wfl_by_data.sort_values(by=['fl_gamma', 'wce_alpha']).reset_index(drop=True)
df_mean_wfl_by_data

In [None]:
data_no_clear_fl = ["us-election-2020", "cmsb-tsd", "founta-2018-thas"]
which_data_to_check = (((df_all_mean_results["variant"] == "fl"))
                        & (~df_all_mean_results["data_name"].isin(data_noclear_rs))
                        )
df_mean_fl = df_all_mean_results[which_data_to_check][["data_name", "fl_gamma", "val_f1_macro"]]
def compute_deviation_to_baseline(row):
    baseline_condition = (df_all_mean_results["data_name"] == row["data_name"]) & (df_all_mean_results["variant"] == "baseline")
    baseline_value = df_all_mean_results.loc[baseline_condition, "val_f1_macro"].values[0]
    return row['val_f1_macro'] - baseline_value
df_mean_fl["val_f1_macro_delta"] = df_mean_fl.apply(lambda row: compute_deviation_to_baseline(row), axis=1)
df_mean_fl["data_name"] = df_mean_fl["data_name"].map(data_name_orig2display)
df_mean_fl = df_mean_fl.rename({"data_name":"Dataset"}, axis=1)

rc = {
    # 'figure.figsize':(5,4),
    #   'axes.facecolor':'white',
    #   'axes.grid' : True,
      'grid.color': '.8',
      'text.color': 'black',
      'xtick.color': 'black',
      'ytick.color': 'black',
      'font.family':'Times New Roman',
      'font.size' : 21}
plt.rcParams.update(rc)
# sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(4.8,4.55))
sns.lineplot(x="fl_gamma", y="val_f1_macro_delta", data=df_mean_fl, hue="Dataset", marker="o", ax=ax, palette=data2color)
gamma_ticks = [0.1,0.2,0.5,1.0,2.0,5.0]
ax.set_xticks(gamma_ticks, labels=gamma_ticks, rotation=90)
ax.set_xlabel(r"$\gamma$")
# # delta_ticks = [y for y in list(range(-2, 8, 2))]
# # ax.set_yticks(delta_ticks)

custom_lines = [
                Line2D([0], [0], color=sns.color_palette()[0], lw=1, linestyle="solid", marker="o", label="Twitter-Hate-Speech"),
                Line2D([0], [0], color=sns.color_palette()[1], lw=1, linestyle="solid", marker="o", label="Civil-Comments"),
                Line2D([0], [0], color=sns.color_palette()[4], lw=1, linestyle="solid", marker="o", label="CMSB"),
                Line2D([0], [0], color=sns.color_palette()[5], lw=1, linestyle="solid", marker="o", label="Founta-2018"),
                Line2D([0], [0], color=sns.color_palette()[6], lw=1, linestyle="solid", marker="o", label="Davidson-2017"),
                ]
ax.legend(handles=custom_lines, title="Dataset", loc="best")#, bbox_to_anchor=(1, 1.)

ax.set_ylabel(r"$\delta_{FL}$")
# # '-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed', 'dashdot', 'dotted'
ax.axhline(y=0, linewidth=0.8, color="black", ls="-.")
# sns.move_legend(ax, "upper right", bbox_to_anchor=(.93, 1)) #
ax.figure.tight_layout()
ax.figure.savefig('FL_trend.svg', dpi=600, bbox_inches="tight")

#### Why are some data not good in FL?

In [None]:
# check if unpromising results in fl is because the negative class was improved
data_not_good_in_fl = ["us-election-2020", "waseem-and-hovy-2016"]
relevant_cols = ["data_name", "fl_gamma", "val_f1_macro", "test_f1_macro", 
                    "train_f1_per_label_0", "train_f1_per_label_1", 
                    "val_f1_per_label_0", "val_f1_per_label_1", 
                    "test_f1_per_label_0", "test_f1_per_label_1"]
which_row_to_check = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["data_name"].isin(data_not_good_in_fl))
df_all_mean_results[which_row_to_check][relevant_cols]

In [None]:
# check if unpromising results in fl is because the negative class was improved
data_good_in_fl = ["founta-2018-thas", "ami"]
relevant_cols = ["data_name", "fl_gamma", "val_f1_macro", "test_f1_macro", 
                    "train_f1_per_label_0", "train_f1_per_label_1", "train_f1_per_label_2", "train_f1_per_label_3", "train_f1_per_label_4",
                    "val_f1_per_label_0", "val_f1_per_label_1", "val_f1_per_label_2", "val_f1_per_label_3", "val_f1_per_label_4",
                    "test_f1_per_label_0", "test_f1_per_label_1", "test_f1_per_label_2", "test_f1_per_label_3", "test_f1_per_label_4"]
which_row_to_check = (df_all_mean_results["variant"] == "fl") & (df_all_mean_results["data_name"].isin(data_good_in_fl))
df_all_mean_results[which_row_to_check][relevant_cols]

### Weighted Focal Loss

In [None]:
conditions = (df_all_mean_results["variant"] == "wfl")
df_mean_wfl = df_all_mean_results[conditions][["data_name", "wce_alpha", "fl_gamma", "val_f1_macro"]]
df_mean_wfl["data_name"] = df_mean_wfl["data_name"].map(data_name_orig2display)
df_mean_wfl

### Augmentation

In [None]:
conditions = (df_all_mean_results["variant"] == "augmentation_bert")
# data_not_good_in_fl = ["us-election-2020", "waseem-and-hovy-2016"]
relevant_cols = ["data_name", "augmentation_rho", "augmentation_percentage", "augmentation_top_k", "val_f1_macro", "test_f1_macro", 
                    "train_f1_per_label_0", "train_f1_per_label_1", 
                    "val_f1_per_label_0", "val_f1_per_label_1", 
                    "test_f1_per_label_0", "test_f1_per_label_1"]
df_mean_aug_bert = df_all_mean_results[conditions][relevant_cols]
# df_mean_aug["data_name"] = df_mean_aug["data_name"].map(data_name_orig2display)
df_mean_aug_bert

In [None]:
df_all_mean_results[df_all_mean_results["variant"].isin(["sampling_modifiedRS_oversampling", "augmentation_external_data"])]

In [None]:
data_to_check = data_names[:-1]
conditions = (df_all_mean_results["variant"].isin(["sampling_modifiedRS_oversampling", "augmentation_external_data"]) 
                & df_all_mean_results["data_name"].isin(data_to_check))
relevant_cols = ["data_name", "variant", "sampling_modifiedRS_rho", "augmentation_rho", "val_f1_macro", "test_f1_macro", 
                    "train_f1_per_label_0", "train_f1_per_label_1", 
                    "val_f1_per_label_0", "val_f1_per_label_1", 
                    "test_f1_per_label_0", "test_f1_per_label_1"]
df_mean_aug_ext = df_all_mean_results[conditions][relevant_cols]
def get_rho(row):
    if row["sampling_modifiedRS_rho"] != "-":
        return row["sampling_modifiedRS_rho"]
    if row["augmentation_rho"] != "-":
        return row["augmentation_rho"]
df_mean_aug_ext["rho"] = df_mean_aug_ext.apply(lambda row: get_rho(row), axis=1)
df_mean_aug_ext["data_name"] = df_mean_aug_ext["data_name"].map(data_name_orig2display)
df_mean_aug_ext = df_mean_aug_ext[["data_name", "variant", "rho"] + relevant_cols[4:]]
df_mean_aug_ext = df_mean_aug_ext.sort_values(by=['data_name', 'rho']).reset_index(drop=True)
df_mean_aug_ext.to_csv("check_external_data.csv", index=False)