## Load data.

In [1]:
import numpy as np
import os
import pickle
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

from helper_functions import set_yticks, SENT2LAB, SENT2IDX, SENT_COLORS, print_sigfig, pval_star

CWD = os.path.abspath("")  # Jupyter notebook path.
dir_input = os.path.join(CWD, "input")  # For params.
dir_dict = os.path.join(CWD, "dicts")  # Data to plot.
dir_output = os.path.join(CWD, "output")  # Folder to put figures in.
dir_npy = os.path.join(CWD, "npy")  # Data files needed for plotting figures.

# Set up plotting parameters.
sent_colors = ["#504DB2", "#414042", "#B2504D"]  # POS, NEU, NEG
# costra_colors = ["#2CBEC6", "#F59448"]  # collaborators, non-collaborators
# misc_colors = ["#E6B32F", "#3F6F88"]
font_kw = {"family": "arial", "weight": "normal", "size": "7"}  # Markers and such.
mpl.rc("font", **font_kw)

# Load data to plot.
ratio_mat_rel = np.load(os.path.join(dir_npy, "ratio_mat_rel-t_collab.npy"))
year_ranges = np.load(os.path.join(dir_npy, "groups-t_collab.npy"))
year_ranges = [tuple(row) for row in year_ranges]

rmrs_pre = np.load(os.path.join(dir_npy, "ratio_mat_rel-t_collab_will_collab.npy"))
yr_pre = np.load(os.path.join(dir_npy, "groups-t_collab_will_collab.npy"))
yr_pre = [tuple(row) for row in yr_pre]

rmrs_post = np.load(os.path.join(dir_npy, "ratio_mat_rel-t_collab_has_collab.npy"))
yr_post = np.load(os.path.join(dir_npy, "groups-t_collab_has_collab.npy"))
yr_post = [tuple(row) for row in yr_post]


with open(os.path.join(dir_input, "params.json")) as f:
    params = json.load(f)

## Make figures.

In [None]:
# Figure production.
fig, ax = plt.subplots(figsize=(3.41, 3.41))
xticklabels = [f"{x}" for x in year_ranges]  # Time to collaborate label.
x_arr = np.arange(len(xticklabels))
# Baseline (indistinguishable from null).
ax.plot(x_arr, [0 for _ in x_arr], color="grey", alpha=0.5, zorder=1, linestyle=":")
for i in range(3):  # One curve for each of the 3 sentiments.
    m = np.nanmean(ratio_mat_rel[:, i, :], axis=-1)
    std = np.nanstd(ratio_mat_rel[:, i, :], axis=-1, ddof=1)
    ax.plot(x_arr, m, color=sent_colors[i])
    ax.fill_between(x_arr, m - std, m + std, color=sent_colors[i], alpha=0.3, edgecolor=None)

    # m = np.nanmean(ratio_mat_rel_no[i, :], axis=-1) * np.ones_like(x_arr)
    # std = np.nanstd(ratio_mat_rel_no[i, :], axis=-1, ddof=1) * np.ones_like(x_arr)
    # ax.plot(x_arr, m, color=sent_colors[i], linestyle="--")
    # ax.fill_between(x_arr, m - std, m + std, color=sent_colors[i], alpha=0.3, edgecolor=None)

ax.set_xlabel("Year Since First Collaborate", size=10)  # x-large
ax.set_ylabel("Sentiment", size=10)  # x-large
ax.set_xticks(x_arr, xticklabels)
ax.set_ylim(params["ylim_all"])
ax.grid(which="major", axis="x", alpha=0.2)

set_yticks(ax)
major_len=6.5
major_width=1.5
ax.tick_params(axis="x", which="major", length=major_len, width=major_width, labelsize=10)

fig.tight_layout()
plt.show()
fig.savefig(os.path.join(dir_output, "Time Since Collaborate.svg"), bbox_inches="tight", transparent=True)
fig.clf()  # Clear figure.
plt.close(fig=fig)  # Close figure.

In [None]:
print("two-sided Welch's $t$-test:\n")
for s, i in SENT2IDX.items():
    print(f"{SENT2LAB[s]}")
    abc1 = np.mean(rmrs_pre[i, :])
    abc2 = np.std(rmrs_pre[i, :], ddof=1)
    abc3 = np.mean(rmrs_post[i, :])
    abc4 = np.std(rmrs_post[i, :], ddof=1)
    res = stats.ttest_ind(rmrs_pre[i, :], rmrs_post[i, :], equal_var=False, alternative="two-sided")
    txt_box0 = ""
    txt_box0 += f"t({print_sigfig(res.df)})={print_sigfig(res.statistic)}, {pval_star(res.pvalue)}"
    txt_box0 += f", pre-post diff {print_sigfig((abc1-abc3))}, 95% CI ({print_sigfig(res.confidence_interval().low)},{print_sigfig(res.confidence_interval().high)})"
    txt_box0 += f"\npre-collab {print_sigfig(abc1)}±{print_sigfig(abc2)}\npost-collab {print_sigfig(abc3)}±{print_sigfig(abc4)}\n"
    print(txt_box0, end="\n")