In [2]:
import numpy as np
import pandas as pd
import math
from itertools import combinations
from datetime import datetime, timedelta

In [3]:
student_counts = pd.read_csv("../Data/student_counts.csv")
pair_counts = pd.read_csv("../Data/module_pairs.csv")
assignments = pd.read_csv("../Data/Assignments.csv")

In [5]:
instructors = pd.read_csv("../Data/instructors.csv")
instructors = instructors.drop(['Days and Times'], axis = 1)
instructors = instructors.drop_duplicates()

In [6]:
assignments["Start Date"] = pd.to_datetime(assignments["Start Date"])
assignments["Due Date"] = pd.to_datetime(assignments["Due Date"])
assignments = assignments.sort_values(by=["Due Date"], na_position="last")

In [5]:
# Stress Score calculation = [weight(level)*1] + [weight(i_or_g)*2] + [weight(type)*3] + [weight(gap)*4] + weightage/100
# then standardize it to range 0 to 10

In [6]:
# how important/stressful each of these factors are to students
# sum to 10?
# level: 1
# i_or_g: 2
# assignment_type: 3
# gap: 4

In [7]:
# how stressful each level within the factors are
weights = {
    "level": {
        "level_1k": 0.1,
        "level_2k": 0.2,
        "level_3k": 0.3,
        "level_4k": 0.4
    },
    "i_or_g": {
        "I": 0.3,
        "G": 0.4,
        "I&G": 0.3
    },
    "type": {
        "Presentation": 0.1,
        "Project": 0.25,
        "Participation": 0.05,
        "Quiz": 0.1,
        "Assignment": 0.15,
        "Exam": 0.35
    },
    "gap": {
        "One Week": 0.4,
        "Two Weeks": 0.3,
        "More Than Two Weeks": 0.15,
        "Others": 0.15 # assume weight to be 0.15 if gap is not known eg. start date is NA
    }
}

# Individual Stress Score Calculation (Per Assignment)

### Assumptions & Helper Functions

In [8]:
# max_weightage = by_date["Weightage"].max()
# max weightage is 70 based on the data
# we will assume that weightage for any graded components will not exceed 70% based on department's guidelines

max_weightage = 70

# max_stress is assuming that this is the maximum possible stress for a student,so we use the highest weights for each factor
# Stress Score calculation = [weight(level)*1] + [weight(i_or_g)*2] + [weight(type)*3] + [weight(gap)*4] + weightage/100
max_stress = max_weightage/100 + 0.4*1 + 0.4*2 + 0.35*3 + 0.4*4

def normalized_score(score):
    return (score)/(max_stress)*10

In [9]:
def get_gap(due_date, start_date):
    if pd.isnull(start_date):
        return "Others"
    else:
        gap = (due_date - start_date).days
        if gap <= 7:
            return "One Week"
        elif gap <= 14:
            return "Two Weeks"
        else:
            return "More Than Two Weeks"

### Single function that takes in assignment details, outputs a stress score

In [10]:
def indiv_score(weightage, assignment_type, i_g, level, start_date=None, due_date=None):
    gap = get_gap(due_date, start_date)
    stress_score = weightage/100 + weights['type'][assignment_type] * 3 + weights['i_or_g'][i_g] * 2 + weights['level'][level] * 1 + weights['gap'][gap] * 4
    normalized_stress = normalized_score(stress_score)
    return normalized_stress

In [11]:
example2 = indiv_score(7.5, "Assignment", "I", "level_3k", None, None)
example2

4.45054945054945

### Single function that takes in a df, outputs a df with stress score column

In [14]:
def indiv_score_df(df):
    for i in range(0, len(df)):
        weightage = df.loc[i,"Weightage"]
        assignment_type = df.loc[i,"Type"]
        i_g = df.loc[i,"Group or Individual"]
        level = df.loc[i,"Level"]
        start_date = df.loc[i,"Start Date"]
        due_date = df.loc[i,"Due Date"]
        
        stress_score = indiv_score(weightage, assignment_type, i_g, level, start_date, due_date)
        df.loc[i, "Stress"] = stress_score
    return df

In [15]:
indiv_scores = indiv_score_df(assignments)
indiv_scores

Unnamed: 0,Module Code,Semester,Name,Weightage,Type,Group or Individual,Start Date,Due Date,Level,Stress
4,DSA2101,2020,Quiz 1,5.0,Quiz,I,2021-01-12,2021-01-19,level_2k,6.043956
5,DSA2101,2020,Assignment 1,2.5,Assignment,I,2021-01-19,2021-01-26,level_2k,6.318681
6,DSA2101,2020,Assignment 2,2.5,Assignment,I,2021-02-09,2021-02-16,level_2k,6.318681
9,DSA2101,2020,Take-Home Midterms,30.0,Assignment,I,2021-03-02,2021-03-06,level_2k,6.923077
7,DSA2101,2020,Assignment 3,2.5,Assignment,I,NaT,2021-03-23,level_2k,4.120879
...,...,...,...,...,...,...,...,...,...,...
88,ST4253,2210,Participation,10.0,Participation,I,NaT,NaT,level_4k,4.065934
89,ST4253,2210,Tutorial Presentation,10.0,Presentation,I,NaT,NaT,level_4k,4.395604
101,ST4248,2220,Presentation,10.0,Presentation,G,NaT,NaT,level_4k,4.835165
103,ST3247,2220,Tutorial Attendance,10.0,Participation,I,NaT,NaT,level_3k,3.846154


# Pairwise Stress Score Calculations for Clashes

## For >= 2 clashes

In [17]:
def get_pairwise_stress(pair_cnt, class_size, clash_basescore):
    pairwise_stress = (pair_cnt/class_size) * clash_basescore
    
    return pairwise_stress

In [18]:
# returns all clash pairs on the specified date
def clash_pairs(df, date):
    data = []
    idx = list(df.index)
    assignment_combinations = list(combinations(idx, 2))
    
    # if no clashes
    if not assignment_combinations:
        return None
    else:
        for comb in assignment_combinations:
            assignment1 = comb[0]
            assignment2 = comb[1]
            scores = df.loc[df.index.isin([assignment1, assignment2]),'Stress'].tolist()
            modules = df.loc[df.index.isin([assignment1, assignment2]),'Module Code'].tolist()
            semester1 = df.loc[assignment1, "Semester"]
            semester2 = df.loc[assignment2, "Semester"]
            data.append({'Due Date': date, 'Semester': semester1, 'Module Code': modules, 'Assignment Index': [assignment1, assignment2], 'Stress': scores})

        df1 = pd.DataFrame(data)
        df1[['Module 1', 'Module 2']] = pd.DataFrame(df1['Module Code'].tolist(), index=df1.index)
        df1.drop('Module Code', axis=1, inplace=True)
    
        return df1

# Daily Stress Score Calculation

In [19]:
instructor_scores = indiv_scores.merge(instructors, how='left', on = ['Module Code', 'Semester'])
instructor_scores

Unnamed: 0,Module Code,Semester,Name,Weightage,Type,Group or Individual,Start Date,Due Date,Level,Stress,Instructor
0,DSA2101,2020,Quiz 1,5.0,Quiz,I,2021-01-12,2021-01-19,level_2k,6.043956,s/o Gopal Vikneswaran
1,DSA2101,2020,Assignment 1,2.5,Assignment,I,2021-01-19,2021-01-26,level_2k,6.318681,s/o Gopal Vikneswaran
2,DSA2101,2020,Assignment 2,2.5,Assignment,I,2021-02-09,2021-02-16,level_2k,6.318681,s/o Gopal Vikneswaran
3,DSA2101,2020,Take-Home Midterms,30.0,Assignment,I,2021-03-02,2021-03-06,level_2k,6.923077,s/o Gopal Vikneswaran
4,DSA2101,2020,Assignment 3,2.5,Assignment,I,NaT,2021-03-23,level_2k,4.120879,s/o Gopal Vikneswaran
...,...,...,...,...,...,...,...,...,...,...,...
113,ST4253,2210,Participation,10.0,Participation,I,NaT,NaT,level_4k,4.065934,Jialiang Li
114,ST4253,2210,Tutorial Presentation,10.0,Presentation,I,NaT,NaT,level_4k,4.395604,Jialiang Li
115,ST4248,2220,Presentation,10.0,Presentation,G,NaT,NaT,level_4k,4.835165,Ching Hway Lim
116,ST3247,2220,Tutorial Attendance,10.0,Participation,I,NaT,NaT,level_3k,3.846154,Ching Hway Lim


### Given an instructor name and date, get a df with a stress score for that day

In [20]:
# convert a date to its semester code
# to filter out assignments with incorrect due date (due date not in the semester)
def date_to_sem(date):
    year = date.year
    if date.month < 6:
        sem = str(year-1)[-2:] + '20'
    else:
        sem = str(year)[-2:] + '10'
    return int(sem)

In [22]:
# input = { instructor_scores : indiv_scores with additional instructor column,
#                 instructor :  the instructor logged in,
#                 date       :  date to calculate score for }
# output =  { daily_stress : stress score for that day, to be displayed to the given instructor }
def daily_stress_score(instructor_scores, instructor, date):
    # Filter by date
    df = instructor_scores[instructor_scores['Due Date'] == date]
    df = df.drop_duplicates(subset=df.columns.difference(['Instructor']))
    
    # Check that semester is correct
    sem = date_to_sem(date)
    df = df[df['Semester'] == sem]
    
    # if no assignments on that day
    if df.empty:
        return 0
    
    # if there are assignments on that day
    else:
        # check for clashes
        if clash_pairs(df, date) is None:
            return df['Stress'].tolist()[0]

        # if clashes
        else:
            # Get all clash pairs
            df1 = clash_pairs(df, date)

            # Filter clash pairs by instructor to get assignments affected for the instructor
            module_list = list(instructor_scores[instructor_scores['Instructor'] == instructor]['Module Code'].unique())
            df1 = df1[(df1['Module 1'].isin(module_list)) | (df1['Module 2'].isin(module_list))]
            

            # Get pairwise scores
            df1 = df1.merge(pair_counts, how='left', on=['Semester', 'Module 1', 'Module 2'])

            df1 = pd.merge(df1, student_counts, left_on=['Semester', 'Module 1'], right_on=['Semester', 'Module Code'], how = 'left')

            df1 = pd.merge(df1, student_counts, left_on=['Semester', 'Module 2'], right_on=['Semester', 'Module Code'], how = 'left')


            df1.rename(columns={"Count_y":"Module 1 Count", "Count":"Module 2 Count", "Count_x":"Pair Count"}, inplace=True)
            df1.drop(["Level_x", "Level_y", "Module Code_x", "Module Code_y"], axis=1, inplace=True)

            daily_score = 0
            if df1.loc[0, "Module 1"] in module_list:
                daily_score = df1.loc[0,"Stress"][0]
            else:
                daily_score = df1.loc[0,"Stress"][1]
            for i in range(0, len(df1)):
                pair_cnt = df1.loc[i, "Pair Count"]
                if df1.loc[i, 'Module 1'] in module_list:
                    class_size = df1.loc[i, "Module 1 Count"]
                    clash_basescore = df1.loc[i, "Stress"][1]
                else:
                    class_size = df1.loc[i, "Module 2 Count"] 
                    clash_basescore = df1.loc[i, "Stress"][0]

                pairwise_score = get_pairwise_stress(pair_cnt, class_size, clash_basescore) 
                daily_score += pairwise_score
                
            return daily_score

In [23]:
daily_stress_score(instructor_scores, 'Ching Hway Lim', pd.to_datetime('2023-02-07'))

4.45054945054945

In [24]:
daily_stress_score(instructor_scores, "Alexandre Hoang Thiery", pd.to_datetime("2023-04-14"))

10.339578454332552

## Deadline Extension Optimization (for the next 7 days)

### Helper functions

In [25]:
def seven_day_range(date):

    days = []
    current_date = date

    for i in range(0,7):
        days.append(current_date)
        current_date += timedelta(days=1)
    
    return days

In [26]:
seven_day_range(pd.to_datetime('2023-04-10'))

[Timestamp('2023-04-10 00:00:00'),
 Timestamp('2023-04-11 00:00:00'),
 Timestamp('2023-04-12 00:00:00'),
 Timestamp('2023-04-13 00:00:00'),
 Timestamp('2023-04-14 00:00:00'),
 Timestamp('2023-04-15 00:00:00'),
 Timestamp('2023-04-16 00:00:00')]

In [27]:
# assignment input is [module code, semester, name, weightage, type, group or individual, start date, due date, level]

def daily_stress_score_modified(instructor_scores, instructor, date, assignment):
    module_code = assignment[0]
    semester = assignment[1]
    name = assignment[2]
    weightage = assignment[3]
    assignment_type = assignment[4]
    i_or_g = assignment[5]
    start_date = assignment[6]
    due_date = date
    level = assignment[8]
    stress = indiv_score(weightage, assignment_type, i_or_g, level, start_date, due_date)
    
    # Filter by date
    df = instructor_scores[instructor_scores['Due Date'] == date]
    df = df.drop_duplicates(subset=df.columns.difference(['Instructor']))
    row = {'Module Code':module_code, 'Semester':semester, 'Name': name, 'Weightage':weightage, 'Type':assignment_type, 'Group or Individual': i_or_g, 'Start Date':start_date, 'Due Date':due_date, 'Level':level, 'Stress':stress, "Instructor":instructor}
    df = df.append(row, ignore_index=True)
    
    # Check that semester is correct
    sem = date_to_sem(date)
    df = df[df['Semester'] == sem]
    
    # if no assignments on that day
    if df.empty:
        return 0
    
    # if there are assignments on that day
    else:
        # check for clashes
        if clash_pairs(df, date) is None:
            return df['Stress'].tolist()[0]

        # if clashes
        else:
            # Get all clash pairs
            df1 = clash_pairs(df, date)

            # Filter clash pairs by instructor to get assignments affected for the instructor
            module_list = list(instructor_scores[instructor_scores['Instructor'] == instructor]['Module Code'].unique())
            df1 = df1[(df1['Module 1'].isin(module_list)) | (df1['Module 2'].isin(module_list))]
            

            # Get pairwise scores
            df1 = df1.merge(pair_counts, how='left', on=['Semester', 'Module 1', 'Module 2'])

            df1 = pd.merge(df1, student_counts, left_on=['Semester', 'Module 1'], right_on=['Semester', 'Module Code'], how = 'left')

            df1 = pd.merge(df1, student_counts, left_on=['Semester', 'Module 2'], right_on=['Semester', 'Module Code'], how = 'left')


            df1.rename(columns={"Count_y":"Module 1 Count", "Count":"Module 2 Count", "Count_x":"Pair Count"}, inplace=True)
            df1.drop(["Level_x", "Level_y", "Module Code_x", "Module Code_y"], axis=1, inplace=True)

            daily_score = 0
            if df1.loc[0, "Module 1"] in module_list:
                daily_score = df1.loc[0,"Stress"][0]
            else:
                daily_score = df1.loc[0,"Stress"][1]
            for i in range(0, len(df1)):
                pair_cnt = df1.loc[i, "Pair Count"]
                if df1.loc[i, 'Module 1'] in module_list:
                    class_size = df1.loc[i, "Module 1 Count"]
                    clash_basescore = df1.loc[i, "Stress"][1]
                else:
                    class_size = df1.loc[i, "Module 2 Count"] 
                    clash_basescore = df1.loc[i, "Stress"][0]

                pairwise_score = get_pairwise_stress(pair_cnt, class_size, clash_basescore) 
                daily_score += pairwise_score
                
            return daily_score

### Next 7 Days Function

In [28]:
def next_7_days(instructor_scores, instructor, assignment):
    due_date = assignment[7]
    days = seven_day_range(due_date)
    
    data = []
    for d in days:
        date = d
        daily_score = daily_stress_score_modified(instructor_scores, instructor, date, assignment)
        data.append({'Date':date,'Daily Stress Score':daily_score})
        
    daily_scores_df = pd.DataFrame(data)
    return daily_scores_df

In [29]:
assignment_eg = ["ST4248", 2220, "Final Report", 20, "Project", "G", None, pd.to_datetime("2023-04-10"), "level_4k"]

In [30]:
next_7_days(instructor_scores, "Ching Hway Lim", assignment_eg)

  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)


Unnamed: 0,Date,Daily Stress Score
0,2023-04-10,12.087912
1,2023-04-11,6.043956
2,2023-04-12,6.043956
3,2023-04-13,6.043956
4,2023-04-14,25.132932
5,2023-04-15,6.043956
6,2023-04-16,6.043956
