# A.1. SAR Model Analysis of Student Well-being and Academic Performance

This Python script loads a dataset of student well-being and academic performance, processes spatial
information related to student seating arrangements, constructs a spatial weights matrix, and applies
a SAR model using the statsmodels library to analyze the influence of various predictors on student
outcomes. The script concludes with visualizing the model’s coefficients to understand the impact of
each variable:

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'C:\\Users\\home_folder\\students_wellbeing_digital.xlsx'
df = pd.read_excel(file_path)

# Function to parse M1 and create spatial coordinates
def parse_m1(m1_str):
    if pd.isna(m1_str):
        return None, None
    matrix_size, position = m1_str.split('=')
    cols, rows = map(int, matrix_size.split('x'))
    col, row = map(int, position.split('.'))
    return (col, row)

df['M1_col'], df['M1_row'] = zip(*df['M1'].apply(parse_m1))
df['M1_col'].fillna(99, inplace=True)
df['M1_row'].fillna(99, inplace=True)

# Create spatial weights matrix
coords = df[['M1_col', 'M1_row']].values
unique_coords = np.unique(coords, axis=0)
n = len(unique_coords)

W = np.zeros((n, n))
for i, coord in enumerate(unique_coords):
    for j, other in enumerate(unique_coords):
        if np.linalg.norm(coord - other) == 1:
            W[i, j] = 1

W = W / W.sum(axis=1, keepdims=True)

# Prepare SAR model inputs
y = df['2G'].values
X = df.drop(columns=['ID', '2G', 'M1', 'M1_col', 'M1_row']).select_dtypes(include=[np.number]).values

X = SimpleImputer(strategy='mean').fit_transform(X)
X = StandardScaler().fit_transform(X)
X = sm.add_constant(X)

# Fit SAR model (using OLS as approximation)
W = sm.add_constant(W)
sar_model = sm.OLS(y, X)
sar_results = sar_model.fit()
print(sar_results.summary())

# Extract and plot coefficients
results = pd.DataFrame({
    'Variable': ['Intercept'] + list(df.drop(columns=['ID','2G','M1','M1_col','M1_row']).select_dtypes(include=[np.number]).columns),
    'Coefficient': sar_results.params
})

plt.figure(figsize=(10, 6))
plt.barh(results['Variable'], results['Coefficient'], color='skyblue')
plt.xlabel('Coefficient')
plt.ylabel('Variable')
plt.title('SAR Model Coefficients')
plt.grid(True)
plt.show()

# Display results table
results