# Digital Eye Strain (DES) – Statistical Analysis Notebook
This notebook reproduces all statistical analyses performed for the MSc thesis comparing **digital eye strain** in patients **with** and **without** dry eye disease (DED).

## 1. Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, f_oneway, spearmanr, shapiro
import statsmodels.formula.api as smf
from statsmodels.stats.diagnostic import het_breuschpagan

## 2. Load Data

In [None]:
# Adjust path if needed
file_path = '/mnt/data/Final_Adjusted_DES_Scores.xlsx'
df = pd.read_excel(file_path)

# Basic preprocessing
df['Age'] = 2025 - df['Year of Birth']
df['DED'] = df['DED (Yes/No)']
df['Weekday_Use'] = df['Internet Use (Weekdays - hrs/day)']
if 'Contact Lenses OD' in df.columns:
    df['Contact_Lens_Use'] = df['Contact Lenses OD']
else:
    df['Contact_Lens_Use'] = np.nan
df.head()

## 3. Descriptive Statistics

In [None]:
df[['Age', 'Weekday_Use', 'Internet Use (Weekends - hrs/day)', 'DES_Score']].describe()

## 4. Group Comparisons

In [None]:
# DED vs non‑DED
ded_scores = df.groupby('DED')['DES_Score']
print('DED vs non‑DED:', ttest_ind(ded_scores.get_group(0), ded_scores.get_group(1)))

# Sex
gender_scores = df.groupby('Gender')['DES_Score']
print('Male vs Female:', ttest_ind(gender_scores.get_group('Male'), gender_scores.get_group('Female')))

# Contact lenses (if both groups exist)
if df['Contact_Lens_Use'].notna().any() and set(df['Contact_Lens_Use'].unique()) >= {0,1}:
    cl_scores = df.groupby('Contact_Lens_Use')['DES_Score']
    print('Contacts vs No Contacts:', ttest_ind(cl_scores.get_group(0), cl_scores.get_group(1)))

## 5. Spearman Correlations

In [None]:
for ded_label, group in df.groupby('DED'):
    print(f'\nDED={ded_label}')
    print(' Weekday:', spearmanr(group['DES_Score'], group['Internet Use (Weekdays - hrs/day)']))
    print(' Weekend :', spearmanr(group['DES_Score'], group['Internet Use (Weekends - hrs/day)']))

## 6. ANOVA Tests

In [None]:
symptom_cols = ['Shoulder/Neck Pain', 'Burning Sensation', 'Red Eyes',
               'Foreign Body Sensation', 'Prickling Sensation', 'Photophobia',
               'Watery Eyes', 'Diplopia', 'Halo']
anova_data = [df[df[c]==1]['DES_Score'] for c in symptom_cols]
print('Symptoms ANOVA:', f_oneway(*anova_data))

# Age group & usage bins
df['Age_Group'] = pd.cut(df['Age'], bins=[18,29,39,49,100], labels=['18‑29','30‑39','40‑49','50+'])
df['Usage_Bin'] = pd.cut(df['Weekday_Use'], bins=[0,4,6,24], labels=['Low','Moderate','High'])
print('Age group ANOVA:', f_oneway(*[g['DES_Score'] for _, g in df.groupby('Age_Group')]))
print('Usage ANOVA   :', f_oneway(*[g['DES_Score'] for _, g in df.groupby('Usage_Bin')]))

## 7. Multiple Linear Regression

In [None]:
model = smf.ols('DES_Score ~ Age + Weekday_Use + DED', data=df).fit()
print(model.summary())

# Assumption checks
resid = model.resid
print('Shapiro p:', shapiro(resid).pvalue)
print('Breusch‑Pagan p:', het_breuschpagan(resid, model.model.exog)[1])

## 8. Stratified Regression

In [None]:
for val, sub in df.groupby('DED'):
    print(f'\nDED={val}')
    print(smf.ols('DES_Score ~ Age + Weekday_Use', data=sub).fit().summary())

## 9. Visualisations

In [None]:
# Scatterplot Age vs DES, colored by screen use
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Age', y='DES_Score', hue='Weekday_Use', size='Weekday_Use', sizes=(40,200), palette='cool')
plt.title('Age vs DES Score – Colored by Screen Use (Weekdays)')
plt.legend(title='Screen Use (hrs)', bbox_to_anchor=(1.05,1))
plt.show()