In [1]:
import numpy as np
import pandas as pd
import math
from itertools import combinations

In [2]:
student_counts = pd.read_csv("../Data/student_counts.csv")
pair_counts = pd.read_csv("../Data/module_pairs.csv")
assignments = pd.read_csv("../Data/Assignments.csv")

In [3]:
assignments["Start Date"] = pd.to_datetime(assignments["Start Date"])
assignments["Due Date"] = pd.to_datetime(assignments["Due Date"])
assignments = assignments.sort_values(by=["Due Date"], na_position="last")

In [4]:
# Stress Score calculation = [weight(level)*1] + [weight(i_or_g)*2] + [weight(type)*3] + [weight(gap)*4] + weightage/100
# then standardize it to range 0 to 10

In [5]:
# how important/stressful each of these factors are to students
# sum to 10?
# level: 1
# i_or_g: 2
# assignment_type: 3
# gap: 4

In [6]:
# how stressful each level within the factors are
weights = {
    "level": {
        "level_1k": 0.1,
        "level_2k": 0.2,
        "level_3k": 0.3,
        "level_4k": 0.4
    },
    "i_or_g": {
        "I": 0.3,
        "G": 0.4,
        "I&G": 0.3
    },
    "type": {
        "Presentation": 0.1,
        "Project": 0.25,
        "Participation": 0.05,
        "Quiz": 0.1,
        "Assignment": 0.15,
        "Exam": 0.35
    },
    "gap": {
        "One Week": 0.4,
        "Two Weeks": 0.3,
        "More Than Two Weeks": 0.15,
        "Others": 0.15 # assume weight to be 0.15 if gap is not known eg. start date is NA
    }
}

# Individual Stress Score Per Assignment

### Assumptions

In [7]:
# max_weightage = by_date["Weightage"].max()
# max weightage is 70 based on the data
# we will assume that weightage for any graded components will not exceed 70% based on department's guidelines

max_weightage = 70

# max_stress is assuming that this is the maximum possible stress for a student,so we use the highest weights for each factor
max_stress = max_weightage/100* 0.4*1 + 0.4*2 + 0.35*3 + 0.4*4

def normalized_score(score):
    return (score)/(max_stress)*10

In [8]:
def get_gap(due_date, start_date):
    if pd.isnull(start_date):
        return "Others"
    else:
        gap = (due_date - start_date).days
        if gap <= 7:
            return "One Week"
        elif gap <= 14:
            return "Two Weeks"
        else:
            return "More Than Two Weeks"

### Input is a df, output is a df

In [9]:
def indiv_score_df(df):
    for i in range(0, len(df)):
        weightage = df.loc[i,"Weightage"]
        assignment_type = df.loc[i,"Type"]
        i_g = df.loc[i,"Group or Individual"]
        level = df.loc[i,"Level"]
        gap = get_gap(df.loc[i,"Due Date"], df.loc[i,"Start Date"])
        stress_score = weightage/100*weights['type'][assignment_type] * 3 + weights['i_or_g'][i_g] * 2 + weights['level'][level] * 1 + weights['gap'][gap] * 4
        normalized_stress = normalized_score(stress_score)
        df.loc[i, "Stress"] = normalized_stress
    return df

In [10]:
example = indiv_score_df(assignments)
example

Unnamed: 0,Module Code,Semester,Name,Weightage,Type,Group or Individual,Start Date,Due Date,Level,Stress
4,DSA2101,2020,Quiz 1,5.0,Quiz,I,2021-01-12,2021-01-19,level_2k,6.474531
5,DSA2101,2020,Assignment 1,2.5,Assignment,I,2021-01-19,2021-01-26,level_2k,6.464477
6,DSA2101,2020,Assignment 2,2.5,Assignment,I,2021-02-09,2021-02-16,level_2k,6.464477
9,DSA2101,2020,Take-Home Midterms,30.0,Assignment,I,2021-03-02,2021-03-06,level_2k,6.796247
7,DSA2101,2020,Assignment 3,2.5,Assignment,I,NaT,2021-03-23,level_2k,3.783512
...,...,...,...,...,...,...,...,...,...,...
88,ST4253,2210,Participation,10.0,Participation,I,NaT,NaT,level_4k,4.329759
89,ST4253,2210,Tutorial Presentation,10.0,Presentation,I,NaT,NaT,level_4k,4.369973
101,ST4248,2220,Presentation,10.0,Presentation,G,NaT,NaT,level_4k,4.906166
103,ST3247,2220,Tutorial Attendance,10.0,Participation,I,NaT,NaT,level_3k,4.061662


### Input is the assignment details, output is the stress score

In [11]:
def indiv_score(weightage, assignment_type, i_g, level, start_date=None, due_date=None):
    gap = get_gap(due_date, start_date)
    stress_score = weightage/100*weights['type'][assignment_type] * 3 + weights['i_or_g'][i_g] * 2 + weights['level'][level] * 1 + weights['gap'][gap] * 4
    normalized_stress = normalized_score(stress_score)
    return normalized_stress

In [12]:
example2 = indiv_score(7.5, "Assignment", "I", "level_3k")
example2

4.111930294906166

# Pairwise Stress Score Calculations for Clashes

## For >= 2 clashes

In [13]:
def get_pairwise_stress(data, date, module):
    index = data[data['Due Date'] == date].index.values[0]
    stress_list = data.loc[index,"Stress"]
    if (data.loc[index, 'Module 1'] == module):
        if (math.isnan(data.loc[index, 'Module 1 Count']) | math.isnan(data.loc[index, 'Pair Count'])):
            pairwise_stress = data['Pair Count'].mean()/data['Module 1 Count'].mean()*sum(stress_list)+stress_list[0]
        else:
            pairwise_stress = data.loc[index, 'Pair Count']/data.loc[index, 'Module 1 Count']*sum(stress_list)+stress_list[0]
    else:
        if (math.isnan(data.loc[index, 'Module 2 Count']) | math.isnan(data.loc[index, 'Pair Count'])):
            pairwise_stress = data['Pair Count'].mean()/data['Module 2 Count'].mean()*sum(stress_list)+stress_list[1]
        else:
            pairwise_stress = data.loc[index, 'Pair Count']/data.loc[index, 'Module 2 Count']*sum(stress_list)+stress_list[1]
    return pairwise_stress

In [14]:
def clash_pairs(df):
    indiv_df = indiv_score_df(df)
    indiv_df.groupby('Due Date').filter(lambda x: len(x) >= 2)

    dates = list(indiv_df['Due Date'].unique())

    data = []
    for date in dates:
        assignments = list(indiv_df[indiv_df['Due Date']==date].index)
        assignment_combinations = list(combinations(assignments, 2))
        for comb in assignment_combinations:
            assignment1 = comb[0]
            assignment2 = comb[1]
            scores = indiv_df.loc[indiv_df.index.isin([assignment1, assignment2]),'Stress'].tolist()
            modules = indiv_df.loc[indiv_df.index.isin([assignment1, assignment2]),'Module Code'].tolist()
            semester = indiv_df.loc[assignment1, "Semester"].tolist()
            data.append({'Due Date': date, 'Semester': semester, 'Module Code': modules, 'Assignment Index': [assignment1, assignment2], 'Stress': scores})

    output_df = pd.DataFrame(data)
    output_df[['Module 1', 'Module 2']] = pd.DataFrame(output_df['Module Code'].tolist(), index=output_df.index)
    output_df.drop('Module Code', axis=1, inplace=True)
    output_df = output_df.merge(pair_counts, how='left', on=['Semester', 'Module 1', 'Module 2'])
    output_df = pd.merge(output_df, student_counts, left_on=['Semester', 'Module 1'], right_on=['Semester', 'Module Code'], how = 'left')
    output_df = pd.merge(output_df, student_counts, left_on=['Semester', 'Module 2'], right_on=['Semester', 'Module Code'], how = 'left')
    output_df.rename(columns={"Count_x":"Pair Count", "Count_y":"Module 1 Count", "Count":"Module 2"}, inplace=True)
    output_df.drop(["Module Code_x", "Module Code_y", "Level_x", "Level_y"], axis=1, inplace=True)
    
    for i in range(0, len(output_df)):
        date = output_df.loc[i,"Due Date"]
        module1 = output_df.loc[i,"Module 1"]
        module2 = output_df.loc[i,"Module 2"]
        output_df.loc[i, "Module 1 Pair Stress"] = get_pairwise_stress(output_df, date, module1)
        output_df.loc[i, "Module 2 Pair Stress"] = get_pairwise_stress(output_df, date, module2)
    return output_df

In [15]:
test1 = clash_pairs(assignments)
test1

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().