### Step 1: Import Libraries & Load Data

In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel

# Load the Likert responses CSV
df = pd.read_csv("../data/likert_responses.csv")
df.head()

Unnamed: 0,participant,question,phase,response
0,1,Are you currently enrolled in a university deg...,unspecified,No
1,2,Are you currently enrolled in a university deg...,unspecified,Yes
2,3,Are you currently enrolled in a university deg...,unspecified,Yes
3,4,Are you currently enrolled in a university deg...,unspecified,Yes
4,5,Are you currently enrolled in a university deg...,unspecified,Yes


### Step 2: Normalize Likert Values

In [16]:
# Mapping from Likert text to numeric
likert_map = {
    "1 - Strongly disagree": 1,
    "2 - Disagree": 2,
    "3 - Neither disagree or agree": 3,
    "3 - Neither diagree or agree": 3,
    "4 - Agree": 4,
    "5 - Strongly agree": 5,
    "1 - Very ineffective": 1,
    "2 - Ineffective": 2,
    "3 - Neither effective or ineffective": 3,
    "4 - Effective": 4,
    "5 - Very effective": 5,
}

df["score"] = df["response"].map(likert_map)
df.dropna(subset=["score"], inplace=True)
df["score"] = df["score"].astype(int)
df.head()

Unnamed: 0,participant,question,phase,response,score
105,1,I felt significantly anxious or stressed. (Que...,unspecified,2 - Disagree,2
106,2,I felt significantly anxious or stressed. (Que...,unspecified,4 - Agree,4
107,3,I felt significantly anxious or stressed. (Que...,unspecified,4 - Agree,4
108,4,I felt significantly anxious or stressed. (Que...,unspecified,2 - Disagree,2
110,6,I felt significantly anxious or stressed. (Que...,unspecified,3 - Neither disagree or agree,3


### Step 3: Descriptive Statistics per Question & Phase

In [17]:
# Descriptive stats grouped by question and phase
desc_stats = df.groupby(["question", "phase"])["score"].describe()
desc_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
question,phase,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Did you feel disconnected from your academic or work environment? (Question adapted from Hohne et al. (2022); Siegel et al. (2022)),after,15.0,2.733333,0.593617,2.0,2.0,3.0,3.0,4.0
Did you feel disconnected from your academic or work environment? (Question adapted from Hohne et al. (2022); Siegel et al. (2022)),during,15.0,3.533333,1.125463,2.0,3.0,3.0,4.5,5.0
Did you feel disconnected from your academic or work environment? (Question adapted from Hohne et al. (2022); Siegel et al. (2022)),unspecified,15.0,2.933333,1.032796,1.0,2.0,3.0,4.0,4.0
How did you perceive the communication with peers or colleagues? (Question adapted from Siegel et al. (2022)),after,10.0,3.9,0.737865,2.0,4.0,4.0,4.0,5.0
How did you perceive the communication with peers or colleagues? (Question adapted from Siegel et al. (2022)),during,12.0,2.666667,1.230915,1.0,2.0,2.0,4.0,5.0


### Step 4: Paired t-Tests for Repeated Measures

In [18]:
# Pivot to wide format
wide_df = df.pivot_table(index="participant", columns=["question", "phase"], values="score")

# Only test questions that exist in multiple phases
from itertools import combinations

t_results = []

for question in wide_df.columns.get_level_values(0).unique():
    phases = wide_df[question].dropna(axis=1, how='all').columns
    if len(phases) >= 2:
        for (p1, p2) in combinations(phases, 2):
            if p1 in wide_df[question] and p2 in wide_df[question]:
                a = wide_df[question][p1].dropna()
                b = wide_df[question][p2].dropna()
                merged = pd.merge(a, b, left_index=True, right_index=True)
                if len(merged) >= 5:
                    tstat, pval = ttest_rel(merged.iloc[:, 0], merged.iloc[:, 1])
                    t_results.append({
                        "question": question,
                        "phase_1": p1,
                        "phase_2": p2,
                        "t_stat": round(tstat, 3),
                        "p_value": round(pval, 4),
                        "n": len(merged)
                    })

pd.DataFrame(t_results)

Unnamed: 0,question,phase_1,phase_2,t_stat,p_value,n
0,Did you feel disconnected from your academic o...,after,during,-2.567,0.0224,15
1,Did you feel disconnected from your academic o...,after,unspecified,-0.716,0.486,15
2,Did you feel disconnected from your academic o...,during,unspecified,1.382,0.1887,15
3,How did you perceive the communication with pe...,after,during,3.545,0.0063,10
4,How did you perceive the communication with pe...,after,unspecified,1.0,0.3466,9
5,How did you perceive the communication with pe...,during,unspecified,-1.551,0.1519,11
6,I feel that my practical computer science skil...,after,during,1.655,0.1202,15
7,I feel that my practical computer science skil...,after,unspecified,0.235,0.8178,15
8,I feel that my practical computer science skil...,during,unspecified,-1.835,0.0878,15
9,I felt significantly anxious or stressed. (Que...,after,during,-2.028,0.0674,12
