In [6]:
import pandas as pd

# Load datasets into DataFrames
df1 = pd.read_csv('ca1-dataset.csv')
df2 = pd.read_csv('ca2-dataset.csv')

# Check column names
print(df1.columns)
print(df2.columns)

# Check a few rows
print(df1.head())
print(df2.head())

df1.columns = df1.columns.str.strip()
df2.columns = df2.columns.str.strip()

# Merge datasets based on UniqueID
df_merged = pd.merge(df1, df2, on='Unique-id', how='inner')

df_merged.to_csv('merged_dataset.csv', index=False)


Index(['Unique-id', 'namea', 'OffTask', 'Avgright', 'Avgbug', 'Avghelp',
       'Avgchoice', 'Avgstring', 'Avgnumber', 'Avgpoint', 'Avgpchange',
       'Avgtime', 'AvgtimeSDnormed', 'Avgtimelast3SDnormed',
       'Avgtimelast5SDnormed', 'Avgnotright', 'Avghowmanywrong-up',
       'Avghelppct-up', 'Avgwrongpct-up', 'Avgtimeperact-up',
       'AvgPrev3Count-up', 'AvgPrev5Count-up', 'Avgrecent8help',
       'Avg recent5wrong', 'Avgmanywrong-up', 'AvgasymptoteA-up',
       'AvgasymptoteB-up'],
      dtype='object')
Index(['Row', 'lesson', 'namea', 'prod', 'cell', 'right', 'bug', 'help',
       'choice', 'string', 'number', 'point', 'pknow-1', 'Pknow-2', 'pchange',
       'time', 'timeSDnormed', 'timelast3SDnormed', 'timelast5SDnormed',
       'notright', 'howmanywrong-up', 'helppct-up', 'wrongpct-up',
       'timeperact-up', 'Prev3Count-up', 'Prev5Count-up', 'recent8help',
       ' recent5wrong', 'manywrong-up', 'asymptoteA-up', 'asymptoteB-up',
       'Behaviour', 'Coder', 'Unique-id'],
 

In [23]:
#Help percentage change calculates the change in the amount of help needed from the previous activity
#this can be helpful in understanding if the amount of help needed is decreasing over the course of the questions to evaluate if the questions are increasing understandability in students
df_merged['helpPercentageChange'] = df_merged['Avghelppct-up'].diff().mean()
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,Prev3Count-up,Prev5Count-up,recent8help,recent5wrong,manywrong-up,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,0,1,0,0,0,ON TASK,awagner,0.0
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,0,1,0,0,0,ON TASK,awagner,0.0
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,ON TASK,awagner,0.0
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,ON TASK,awagner,0.0
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,0,0,1,0,0,0,ON TASK,awagner,0.0


In [26]:
#Average Time taken to complete the actions that the students got correct
#this is a great way to understand how long it took a student to understand and execute the questions they are getting correct
df_merged['avgTimeCorrectAction'] = df_merged['Avgtime'] / df_merged['Avgright']
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,Prev5Count-up,recent8help,recent5wrong,manywrong-up,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,1,0,0,0,ON TASK,awagner,0.0,12.0
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,1,0,0,0,ON TASK,awagner,0.0,12.0
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,ON TASK,awagner,0.0,7.5
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,0,ON TASK,awagner,0.0,7.5
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,0,1,0,0,0,ON TASK,awagner,0.0,25.333333


In [30]:
#Ratio of Help related actions to the total number of actions takes
#Can help us understand how often students are asking/needing help
df_merged['helpActionsRatio'] = df_merged['Avghelp'] / (df_merged['Avgright'] + df_merged['Avgbug'] + df_merged['Avghelp'])
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,recent8help,recent5wrong,manywrong-up,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,1,0,0,0,ON TASK,awagner,0.0,12.0,0.0
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,1,0,0,0,ON TASK,awagner,0.0,12.0,0.0
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,ON TASK,awagner,0.0,7.5,0.0
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,0,ON TASK,awagner,0.0,7.5,0.0
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,1,0,0,0,ON TASK,awagner,0.0,25.333333,0.0


In [32]:
#The percent of recent actions that students have gotten wrong
#Can help us understand how much students are answering recent questions incorrectly to understand better how to help them going forward
df_merged['percentageRecent5Wrong'] = df_merged['Avg recent5wrong'] / 5
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,recent5wrong,manywrong-up,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,1,0,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,1,0,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,1,0,0,0,ON TASK,awagner,0.0,25.333333,0.0,0.2


In [33]:
#the change in time per action compared to the previous action
#Can help us understand if a certain action is taking longer than another. Taking the average is better than looking at the exact time because some students may be slower workers than others and an average can guide us in understanding this better.
df_merged['avgTimeChange'] = df_merged['Avgtime'].diff().mean()
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,manywrong-up,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,0,0,ON TASK,awagner,0.0,25.333333,0.0,0.2,-0.000573


In [36]:
#calculating the weighted average of different action types to find the overall correctness
#this number can help us understand how students are performing overall and could possibly evaluate if the students understand the action
df_merged['overallCorrectness'] = (
    0.2 * df_merged['Avgright'] +
    0.1 * df_merged['Avgbug'] +
    0.1 * df_merged['Avghelp'] +
    0.1 * df_merged['Avgchoice'] +
    0.1 * df_merged['Avgstring'] +
    0.1 * df_merged['Avgnumber'] +
    0.1 * df_merged['Avgpoint'] +
    0.1 * df_merged['Avgpchange']
)
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,asymptoteA-up,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,0,ON TASK,awagner,0.0,25.333333,0.0,0.2,-0.000573,0.2


In [37]:
#comparing how the overall correctness is changing for every action based on the previous action
#this can help us evaluate if students are performing better on a certain action versus another
df_merged['correctnessChange'] = df_merged['overallCorrectness'].diff().mean()
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,asymptoteB-up,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness,correctnessChange
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0,ON TASK,awagner,0.0,25.333333,0.0,0.2,-0.000573,0.2,-1.9e-05


In [38]:
#how efficient students are with their time based on the overall correctness divided by the average time they are spending
#if students are being methodical and systematic in their approach while solving an action
df_merged['timeEfficiency'] = df_merged['overallCorrectness'] / df_merged['Avgtime']
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,Behaviour,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness,correctnessChange,timeEfficiency
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,ON TASK,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,ON TASK,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,ON TASK,awagner,0.0,25.333333,0.0,0.2,-0.000573,0.2,-1.9e-05,0.007895


In [40]:
#multiplying the pknow-2 and the overall correctness to understand if there is an increase in knowledge based on the actions
df_merged['knowledgeGain'] = df_merged['Pknow-2'] * df_merged['overallCorrectness']
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,Coder,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness,correctnessChange,timeEfficiency,knowledgeGain
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,awagner,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.197693
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,awagner,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.177657
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,awagner,0.0,25.333333,0.0,0.2,-0.000573,0.2,-1.9e-05,0.007895,0.195941


In [43]:
#what is the change in recent correctness to better understand if students are gradually getting more questions correctly based on the help
df_merged['recentCorrectnessChange'] = (df_merged['Avgrecent8help'] - df_merged['Avg recent5wrong']).diff()
df_merged.head()

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness,correctnessChange,timeEfficiency,knowledgeGain,recentCorrectnessChange
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657,
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657,0.0
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.197693,1.0
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.177657,0.0
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0.0,25.333333,0.0,0.2,-0.000573,0.2,-1.9e-05,0.007895,0.195941,-1.0


In [44]:
display(df_merged.head())

Unnamed: 0,Unique-id,namea_x,OffTask,Avgright,Avgbug,Avghelp,Avgchoice,Avgstring,Avgnumber,Avgpoint,...,helpPercentageChange,avgTimeCorrectAction,helpActionsRatio,percentageRecent5Wrong,avgTimeChange,overallCorrectness,correctnessChange,timeEfficiency,knowledgeGain,recentCorrectnessChange
0,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657,
1,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZgy46jl,N,1.0,0.0,0,0,0,0,0,...,0.0,12.0,0.0,0.2,-0.000573,0.2,-1.9e-05,0.016667,0.177657,0.0
2,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.197693,1.0
3,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ77be0l,N,1.0,0.0,0,0,0,0,0,...,0.0,7.5,0.0,0.0,-0.000573,0.2,-1.9e-05,0.026667,0.177657,0.0
4,awagner-closeloop-ins_h1zaz4-03.30.2011_at_13:...,stuZ5lp7k7,N,1.0,0.0,0,0,0,0,0,...,0.0,25.333333,0.0,0.2,-0.000573,0.2,-1.9e-05,0.007895,0.195941,-1.0


In [45]:
df_merged.to_csv('merged_dataset_with_new_features.csv', index=False)

In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

# Load your data
data = pd.read_csv("merged_dataset_with_new_features.csv")

# Encode the target variable
le = LabelEncoder()
data['OffTask'] = le.fit_transform(data['OffTask'])

# Select features
selected_features = ["Avgright", "Avgbug", "Avghelp", "Avgchoice", "Avgstring", "Avgnumber", "Avgpoint", "Avgpchange", "Avgtime", "AvgtimeSDnormed", "Avgtimelast3SDnormed", "Avgtimelast5SDnormed", "Avgnotright", "Avghowmanywrong-up", "Avghelppct-up", "Avgwrongpct-up", "Avgtimeperact-up", "AvgPrev3Count-up", "AvgPrev5Count-up", "Avgrecent8help", "Avg recent5wrong", "Avgmanywrong-up", "AvgasymptoteA-up", "AvgasymptoteB-up", "helpPercentageChange", "helpActionsRatio", "percentageRecent5Wrong", "avgTimeChange", "overallCorrectness", "correctnessChange", "knowledgeGain", "recentCorrectnessChange"]
X = data[selected_features]
y = data['OffTask']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Initialize and train the classifier using imputed data
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_imputed, y_train)

# Make predictions using imputed data
y_pred = clf.predict(X_test_imputed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
clf_report = classification_report(y_test, y_pred)
kappa_score = cohen_kappa_score(y_test, y_pred)
f1_value = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)


print("Classification Report:\n", clf_report)
print("Cohen's Kappa:", kappa_score)
print("Accuracy:", accuracy)
print("F1 Score", f1_value)
print("Precision", precision)

Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00       346
           1       1.00      0.33      0.50         3

    accuracy                           0.99       349
   macro avg       1.00      0.67      0.75       349
weighted avg       0.99      0.99      0.99       349

Cohen's Kappa: 0.49784172661870496
Accuracy: 0.994269340974212
F1 Score 0.5
Precision 1.0


In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

# Load your data
data = pd.read_csv("merged_dataset_with_new_features.csv")

# Encode the target variable
le = LabelEncoder()
data['OffTask'] = le.fit_transform(data['OffTask'])

# Select features
selected_features = ["Avgright", "Avgbug", "Avghelp", "Avgchoice", "Avgstring", "Avgnumber", "Avgpoint", "Avgpchange", "Avgtime", "AvgtimeSDnormed", "Avgtimelast3SDnormed", "Avgtimelast5SDnormed", "Avgnotright", "Avghowmanywrong-up", "Avghelppct-up", "Avgwrongpct-up", "Avgtimeperact-up", "AvgPrev3Count-up", "AvgPrev5Count-up", "Avgrecent8help", "Avg recent5wrong", "Avgmanywrong-up", "AvgasymptoteA-up", "AvgasymptoteB-up", "helpPercentageChange", "helpActionsRatio", "percentageRecent5Wrong", "avgTimeChange", "overallCorrectness", "correctnessChange", "knowledgeGain", "recentCorrectnessChange"]
X = data[selected_features]
y = data['OffTask']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Initialize and train the classifier using imputed data
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train_imputed, y_train)

# Make predictions using imputed data
y_pred = clf.predict(X_test_imputed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
clf_report = classification_report(y_test, y_pred)
kappa_score = cohen_kappa_score(y_test, y_pred)
f1_value = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)


print("Classification Report:\n", clf_report)
print("Cohen's Kappa:", kappa_score)
print("Accuracy:", accuracy)
print("F1 Score", f1_value)
print("Precision", precision)


Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98       346
           1       0.00      0.00      0.00         3

    accuracy                           0.97       349
   macro avg       0.50      0.49      0.49       349
weighted avg       0.98      0.97      0.98       349

Cohen's Kappa: -0.012661566868900076
Accuracy: 0.9684813753581661
F1 Score 0.0
Precision 0.0
