In [6]:
import pandas as pd

# Load the dataset into the 'data' variable
data = pd.read_csv('C:\\Users\\nicol\\Desktop\\INF2178_A3\\INF2178_A3_data.csv')

# Display the first few rows of the dataset to understand its structure
print(data.head())


   fallreadingscore  fallmathscore  fallgeneralknowledgescore  \
0             36.58          39.54                     33.822   
1             50.82          44.44                     38.147   
2             40.68          28.57                     28.108   
3             32.57          23.57                     15.404   
4             31.98          19.65                     18.727   

   springreadingscore  springmathscore  springgeneralknowledgescore  \
0               49.37            50.10                       40.533   
1               83.50            58.93                       37.409   
2               40.41            32.81                       29.312   
3               34.14            35.25                       27.382   
4               32.84            23.60                       26.977   

   totalhouseholdincome  incomeinthousands  incomegroup  
0              140000.0              140.0            3  
1              120000.0              120.0            3  
2       

In [7]:
# Basic statistical summary of the dataset
data.describe()

# Check for missing values in the dataset
data.isnull().sum()


fallreadingscore               0
fallmathscore                  0
fallgeneralknowledgescore      0
springreadingscore             0
springmathscore                0
springgeneralknowledgescore    0
totalhouseholdincome           0
incomeinthousands              0
incomegroup                    0
dtype: int64

In [8]:
from statsmodels.formula.api import ols
import statsmodels.api as sm

# ANCOVA for Reading scores
ancova_reading = ols('springreadingscore ~ incomegroup + fallgeneralknowledgescore', data=data).fit()
ancova_reading_table = sm.stats.anova_lm(ancova_reading, typ=2)
print(ancova_reading_table)


                                 sum_sq       df            F        PR(>F)
incomegroup                1.988014e+04      1.0   121.600734  3.859810e-28
fallgeneralknowledgescore  3.512812e+05      1.0  2148.679176  0.000000e+00
Residual                   1.950400e+06  11930.0          NaN           NaN


In [10]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Calculate the change in scores from fall to spring
data['reading_score_change'] = data['springreadingscore'] - data['fallreadingscore']
data['math_score_change'] = data['springmathscore'] - data['fallmathscore']

# Perform the ANCOVA for Reading scores
ancova_model_reading = ols('reading_score_change ~ C(incomegroup) + fallgeneralknowledgescore', data=data).fit()
ancova_results_reading = sm.stats.anova_lm(ancova_model_reading, typ=2)
print(ancova_results_reading)

# Perform the ANCOVA for Math scores
ancova_model_math = ols('math_score_change ~ C(incomegroup) + fallgeneralknowledgescore', data=data).fit()
ancova_results_math = sm.stats.anova_lm(ancova_model_math, typ=2)
print(ancova_results_math)


                                  sum_sq       df           F        PR(>F)
C(incomegroup)                287.485906      2.0    2.251247  1.053126e-01
fallgeneralknowledgescore   14054.124684      1.0  220.110317  2.354473e-49
Residual                   761671.036393  11929.0         NaN           NaN
                                  sum_sq       df           F         PR(>F)
C(incomegroup)                 55.879616      2.0    0.624286   5.356614e-01
fallgeneralknowledgescore   22425.932956      1.0  501.083959  9.425259e-109
Residual                   533880.499781  11929.0         NaN            NaN


In [11]:
# Perform the ANCOVA for Spring Reading scores
ancova_spring_reading = ols('springreadingscore ~ C(incomegroup) + fallgeneralknowledgescore', data=data).fit()
ancova_results_spring_reading = sm.stats.anova_lm(ancova_spring_reading, typ=2)
print(ancova_results_spring_reading)


                                 sum_sq       df            F        PR(>F)
C(incomegroup)             2.044032e+04      2.0    62.526295  9.694407e-28
fallgeneralknowledgescore  3.518276e+05      1.0  2152.459429  0.000000e+00
Residual                   1.949840e+06  11929.0          NaN           NaN


In [12]:
# Perform the ANCOVA for Spring Math scores
ancova_spring_math = ols('springmathscore ~ C(incomegroup) + fallgeneralknowledgescore', data=data).fit()
ancova_results_spring_math = sm.stats.anova_lm(ancova_spring_math, typ=2)
print(ancova_results_spring_math)


                                 sum_sq       df            F        PR(>F)
C(incomegroup)             1.281978e+04      2.0    67.934909  4.602575e-30
fallgeneralknowledgescore  4.524467e+05      1.0  4795.235313  0.000000e+00
Residual                   1.125542e+06  11929.0          NaN           NaN


In [13]:
# Perform the ANCOVA for Spring Reading scores with fall reading score as covariate
ancova_spring_reading_fall = ols('springreadingscore ~ C(incomegroup) + fallreadingscore', data=data).fit()
ancova_results_spring_reading_fall = sm.stats.anova_lm(ancova_spring_reading_fall, typ=2)
print(ancova_results_spring_reading_fall)


                        sum_sq       df             F    PR(>F)
C(incomegroup)    5.131201e+02      2.0      4.055660  0.017348
fallreadingscore  1.547042e+06      1.0  24455.397576  0.000000
Residual          7.546256e+05  11929.0           NaN       NaN


In [14]:
# Perform the ANCOVA for Spring Math scores with fall math score as covariate
ancova_spring_math_fall = ols('springmathscore ~ C(incomegroup) + fallmathscore', data=data).fit()
ancova_results_spring_math_fall = sm.stats.anova_lm(ancova_spring_math_fall, typ=2)
print(ancova_results_spring_math_fall)


                      sum_sq       df             F        PR(>F)
C(incomegroup)  1.712758e+03      2.0     18.523585  9.284861e-09
fallmathscore   1.026489e+06      1.0  22203.081238  0.000000e+00
Residual        5.514994e+05  11929.0           NaN           NaN
