## Load data.

In [1]:
import numpy as np
import os
import csv
import pickle
import json
import matplotlib as mpl

from helper_functions import loadPKL, plot_scatter_and_fit, print_stats_lite, plot_effects_2_lite_NEG_POS
from prepare_plot_data import get_sample_size_department

CWD = os.path.abspath("")  # Jupyter notebook path.
dir_input = os.path.join(CWD, "input")  # For params.
dir_dict = os.path.join(CWD, "dicts")  # Data to plot.
dir_output = os.path.join(CWD, "output")  # Folder to put figures in.
dir_npy = os.path.join(CWD, "npy")  # Data files needed for plotting figures.

# Set up plotting parameters.
font_kw = {"family": "arial", "weight": "normal", "size": "7"}  # Markers and such.
mpl.rc("font", **font_kw)

# Load data to plot.
USE_CAPS = True  # Capitalize department names.
BY = "Department"  # Used in figure file names.

department2synthesis = loadPKL(dir_dict, "department2synthesis")
department2benchwork = loadPKL(dir_dict, "department2benchwork")

department2avg_n_coau = loadPKL(dir_dict, "department2avg_n_coau")
department2avg_n_country = loadPKL(dir_dict, "department2avg_n_country")
department2avg_n_department = loadPKL(dir_dict, "department2avg_n_department")

ratio_mat_rel = np.load(os.path.join(dir_npy, "ratio_mat_rel-department_effect.npy"))
departments_select = np.load(os.path.join(dir_npy, "groups-department_effect.npy"))
ratio_mat_rel_res = np.load(os.path.join(dir_npy, "ratio_mat_rel-department_effect-research-article.npy"))
departments_select_res = np.load(os.path.join(dir_npy, "groups-department_effect-research-article.npy"))
n_collab = get_sample_size_department(dir_dict)


with open(os.path.join(dir_input, "params.json")) as f:
    params = json.load(f)

## Make figures.

In [None]:
kwargs = dict(caps=USE_CAPS, alpha_err=0.33, by=BY)
yln = [params["ylim_NEG"], None]  # ylims for mean and bias, respectively.
ylp = [params["ylim_POS"], None]  # ylims for mean and bias, respectively.

# Groups to annotate on figures.
# grps_subset = np.array([k for k, v in n_collab.items() if v >= 1500])
grps_subset = ["biology", "neuroscience", "physics", "compute"]

x_factor = np.array([department2synthesis[c] for c in departments_select])
x_lab = "Synthesis"
plot_effects_2_lite_NEG_POS(
    ratio_mat_rel, departments_select, grps_subset, x_factor, x_lab, dir_output, [0, 0.21], [yln[0], ylp[0]], **kwargs
)

x_factor = np.array([department2benchwork[c] for c in departments_select])
x_lab = "Benchwork Score"
plot_effects_2_lite_NEG_POS(
    ratio_mat_rel, departments_select, grps_subset, x_factor, x_lab, dir_output, None, [yln[0], ylp[0]], **kwargs
)

In [None]:

x_factor = np.array([department2avg_n_coau[c] for c in departments_select])
x_lab = "Average Number of Coauthors"
plot_effects_2_lite_NEG_POS(ratio_mat_rel, departments_select, grps_subset, x_factor, x_lab, dir_output, None, [yln[0], ylp[0]], **kwargs)

x_factor = np.array([department2avg_n_country[c] for c in departments_select])
x_lab = "Average Number of Countries"
plot_effects_2_lite_NEG_POS(ratio_mat_rel, departments_select, grps_subset, x_factor, x_lab, dir_output, None, [yln[0], ylp[0]], **kwargs)

x_factor = np.array([department2avg_n_department[c] for c in departments_select])
x_lab = "Average Number of Departments"
plot_effects_2_lite_NEG_POS(ratio_mat_rel, departments_select, grps_subset, x_factor, x_lab, dir_output, None, [yln[0], ylp[0]], **kwargs)

In [None]:
kwargs = dict(caps=USE_CAPS, alpha_err=0.33, by=BY)
yln = [params["ylim_NEG"], None]  # ylims for mean and bias, respectively.
ylp = [params["ylim_POS"], None]  # ylims for mean and bias, respectively.

# Groups to annotate on figures.
# grps_subset = np.array([k for k, v in n_collab.items() if v >= 1500])
grps_subset = ["biology", "neuroscience", "physics", "compute"]

x_factor = np.array([department2synthesis[c] for c in departments_select_res])
x_lab = "Synthesis (research only)"
plot_effects_2_lite_NEG_POS(
    ratio_mat_rel_res, departments_select_res, grps_subset, x_factor, x_lab, dir_output, [0, 0.21], [yln[0], ylp[0]], **kwargs
)

x_factor = np.array([department2benchwork[c] for c in departments_select_res])
x_lab = "Benchwork Score (research only)"
plot_effects_2_lite_NEG_POS(
    ratio_mat_rel_res, departments_select_res, grps_subset, x_factor, x_lab, dir_output, None, [yln[0], ylp[0]], **kwargs
)

### For more statistics, run below.

In [None]:
from tabulate import tabulate


# Available formats: https://pypi.org/project/tabulate/#:~:text=M%20%2019%0A%2D%20%20%2D%20%20%2D%2D-,Table%20format,-There%20is%20more
tablefmt = "psql"  # Looks nice here.
tablefmt = "plain"


x_factor = np.array([department2synthesis[c] for c in departments_select])
xlab = "Synthesis"
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=-1, use_SEM=False)
print(tabulate(df, headers="keys", tablefmt=tablefmt))
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))


x_factor = np.array([department2benchwork[c] for c in departments_select])
xlab = "Benchwork Score"
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=-1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))

Supplement.

In [None]:
from tabulate import tabulate


# Available formats: https://pypi.org/project/tabulate/#:~:text=M%20%2019%0A%2D%20%20%2D%20%20%2D%2D-,Table%20format,-There%20is%20more
tablefmt = "psql"  # Looks nice here.
tablefmt = "plain"


x_factor = np.array([department2avg_n_coau[c] for c in departments_select])
xlab = "Avg # Author"
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=-1, use_SEM=False)
print(tabulate(df, headers="keys", tablefmt=tablefmt))
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))


x_factor = np.array([department2avg_n_department[c] for c in departments_select])
xlab = "Avg # Department"
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=-1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))

x_factor = np.array([department2avg_n_country[c] for c in departments_select])
xlab = "Avg # Country"
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=-1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))
df = print_stats_lite(ratio_mat_rel, departments_select, x_factor, xlab, sent=1, use_SEM=False)
print(tabulate(df, headers="firstrow", tablefmt=tablefmt))