In [1]:
import pandas as pd
import numpy as np
import math
import random

# Inputs

In [2]:
# inCsv = pd.read_excel("../data/InputData.xlsx")
inCsv = pd.read_csv("../data/newSampleInput.csv")

inRooms = {
    "Armes 200": 100,
    "Armes 111": 35,
    "Armes 204": 85,
    "Armes 201": 52,
    "Armes 205": 52,
    "Armes 208": 85,
}

deferralRate = 0.1

examsPerDay = 3
slotNames = {
    0: "Morning",
    1: "Afternoon",
    2: "Evening",
    3: "Night"
}

# Helper Functions

In [3]:
def index_to_timeslot(slotIndex,examsPerDay, slotNames):
    day = math.floor(slotIndex/examsPerDay)
    timePeriod = slotIndex%examsPerDay
    periodName = slotNames[timePeriod]
    timeSlot = f"Day: {day}, {periodName}"
    return timeSlot

In [210]:

def create_JSON_course_data(df, studentHeader, courseHeader):
    retData = {}

    for course in df[courseHeader].unique():
        studentsInCourse = df[df[courseHeader] == course][studentHeader].unique()
        
        if len(course) == 8:
            subj = course[:4].upper()
            if subj == "MBIO":
                subj = "BIOL"
            retData[course] = {
                "students": len(studentsInCourse),
                "conflicts": [],
                "year": int(course[4]),
                "subject": subj,
                "subjectYear": course[:5].upper()
            }
        elif len(course) == 7:
            retData[course] = {
                "students": len(studentsInCourse),
                "conflicts": [],
                "year": int(course[3]),
                "subject": course[:3].upper(),
                "subjectYear": course[:4].upper()
            }
        else:
            print("it's time to contact the developers again")
        for student in studentsInCourse: 
            conflicts = df[(df[studentHeader] == student) & (df[courseHeader] != course)][courseHeader].unique()
            retData[course]['conflicts'].extend(conflicts)
        
        retData[course]['numConflicts'] = len(retData[course]['conflicts'])


    return retData

In [211]:
def dict_to_df_no_rooms(scheduleDict, courseDict, deferralRate):
    listCourses = [(key, value['courses']) for key,value in scheduleDict.items()]
    retDf = pd.DataFrame(listCourses, columns=["Time Slot", "Courses"])
    retDf = retDf.explode("Courses").reset_index(drop=True)
    
    def addExpectedDeferrals(courseName, courseDict, deferralRate):
        return math.ceil(courseDict[courseName]['students']*deferralRate)
    
    retDf['Expected Deferrals'] = retDf['Courses'].apply(addExpectedDeferrals, args=(courseDict, deferralRate))
    return retDf

# Global Variables

In [212]:
coursesJSON = create_JSON_course_data(inCsv, "PIDM", "COURSE_IDENTIFICATION")

In [213]:
sortedCourses = dict(sorted(coursesJSON.items(), key=lambda item: item[1]['students'], reverse=True))

In [214]:
rooms = {key: value for key, value in sorted(inRooms.items(), key=lambda item: item[1], reverse=True)}

# Algorithms To Test

- [Done]        Algo 1 - Graph Coloring
- Algo 2 - ??
- Algo 3 - Recursive (Check every possible combination) ??

Then Perform Analysis - Which one is more optimal: 
- least conflicts
- uses number of rooms correctly
- shortest schedule
- Quickest to run

# Graph Coloring Scheduler

### Helper Functions

In [215]:
def json_to_adj_list(courseDict, nameIndex):
    vertices = list(courseDict.keys())
    adjList = [[] for _ in vertices]

    for key, value in courseDict.items():
        vertex = nameIndex[key]
        for conflict in value['conflicts']:
            adjList[vertex].append(nameIndex[conflict])

    return adjList

In [216]:
def greedy_colouring(adjList):
    numVertices = len(adjList)
    result = [-1]*numVertices

    result[0] = 0

    available = [False]*numVertices

    for vertex in range(1, numVertices):
        
        for i in adjList[vertex]:
            if(result[i] != -1):
                available[result[i]] = True

        timeSlot = 0
        while timeSlot < numVertices:
            if available[timeSlot] == False:
                break

            timeSlot += 1

        result[vertex] = timeSlot

        for i in adjList[vertex]:
            if result[i] != -1:
                available[result[i]] = False

    return result

In [217]:
def color_to_schedule(graphColor, indexName, courseDict, deferralRate):
    numVertices = len(graphColor)
    retDict = {}
    for vertex in range(numVertices):
        timeSlot = index_to_timeslot(graphColor[vertex], examsPerDay, slotNames)
        courseName = indexName[vertex]
        if timeSlot in retDict:
            retDict[timeSlot]['courses'].append(courseName)
            retDict[timeSlot]['numStudents'] += math.ceil(courseDict[courseName]['students']*deferralRate)
        else:
            retDict[timeSlot] = {
                'courses': [courseName],
                'numStudents': math.ceil(courseDict[courseName]['students']*deferralRate)
            }
    return retDict

### Main Function

In [218]:
def graph_coloring_schedule(courseDict, deferralRate):
    nameIndex = {name: i for i, name in enumerate(courseDict.keys())}
    indexName = {index: name for name, index in nameIndex.items()}

    courseAdjList = json_to_adj_list(courseDict, nameIndex)
    
    graphColor = greedy_colouring(courseAdjList)

    schedule = color_to_schedule(graphColor, indexName, courseDict, deferralRate)
    
    return schedule

### Running & Exporting Data

In [26]:
gcSchedule = graph_coloring_schedule(sortedCourses, deferralRate)
gcDict = dict_to_df_no_rooms(gcSchedule, coursesJSON, deferralRate)
gcDict.to_excel('../data/Outputs/New Sample Graph Coloring.xlsx', index=False, sheet_name='Schedule')

### Notes

Graph Coloring algorithm solves the conflict problem and gives a schedule with no conflicts. 
- Is it conflict free? Yes
- Is it the most optimal one? Probably not
- Does it work? Yes
- Is it quick? From start to finish the code runs under 1 min


# Greedy Knapsack Scheduler

In [251]:
def box_score(inCourse, selectedBox, coursesJSON):
    if inCourse not in selectedBox['conflicts']:
        return 1000
    else:
        score = 0
        for course in selectedBox['scheduled']:
            if inCourse in coursesJSON[course]['conflicts']:
                courseScore = -10
                if coursesJSON[inCourse]['subject'] == coursesJSON[course]['subject']:
                    courseScore = courseScore*4
                if coursesJSON[inCourse]['year'] == coursesJSON[course]['year']:
                    courseScore = courseScore*math.pow(coursesJSON[inCourse]['year'], 4)
                yearDiff = abs( coursesJSON[inCourse]['year'] - coursesJSON[course]['year'])
                courseScore += 5 * yearDiff

                score += courseScore
        return courseScore

In [252]:
def find_random_max_box(boxScores):
    maxScore = max(boxScores.values())
    maxBoxes = [box for box, score in boxScores.items() if score == maxScore]
    return random.choice(maxBoxes)


In [253]:
def add_conflicts_in_box(courses, box):
    for course in box['scheduled']:
        for conflict in courses[course]['conflicts']:
            if conflict in box['scheduled'] and conflict not in box['conflictsInSchedule']:
                box['conflictsInSchedule'].append(conflict)
                box['numConflicts'] += 1

In [254]:
def fill_boxes(courses, numBoxes):
    schedule = {f'Box {i+1}': {'scheduled': [], 'conflicts': [], 'conflictsInSchedule': [], 'numConflicts': 0} for i in range(numBoxes)}

    for course_id, course_info in courses.items():
        boxScores = {box_id: box_score(course_id, box, courses) for box_id, box in schedule.items()}
        
        targetBoxID = find_random_max_box(boxScores)
        
        schedule[targetBoxID]['scheduled'].append(course_id)
        schedule[targetBoxID]['conflicts'] = list(set(schedule[targetBoxID]['conflicts']) | set(course_info['conflicts']))        
        
    for _, box in schedule.items():
        add_conflicts_in_box(sortedCourses, box)
    return schedule

In [255]:
filledBoxes = fill_boxes(sortedCourses, 15)

In [256]:
filledBoxes

{'Box 1': {'scheduled': ['MBIO1220',
   'STAT2150',
   'SCI3990',
   'MATH2720',
   'COMP4050',
   'BIOL4542',
   'MATH2160',
   'COMP4360',
   'BIOL2600',
   'CHEM3700',
   'SCI1002',
   'BIOL7100',
   'SCI4980',
   'BIOL4220',
   'BIOL2892',
   'BIOL4554',
   'COMP4060',
   'COMP7210',
   'STAT7270',
   'CHEM4610',
   'BIOL4556',
   'MATH7260',
   'MATH3360',
   'SCI3002',
   'PHYS7400',
   'MATH8210'],
  'conflicts': ['CHEM1110',
   'MBIO3032',
   'CHEM2100',
   'CHEM1018',
   'MATH2740',
   'COMP3040',
   'MATH2130',
   'MATH1520',
   'MBIO4700',
   'CHEM2730',
   'PHYS2260',
   'COMP2160',
   'COMP4510',
   'BIOL2200',
   'MBIO7020',
   'MBIO1220',
   'BIOL3542',
   'BIOL3360',
   'BIOL3350',
   'MBIO7010',
   'BIOL1020',
   'MATH4260',
   'COMP4300',
   'COMP3190',
   'MATH2160',
   'BIOL2410',
   'BIOL2600',
   'MBIO4540',
   'BIOL4890',
   'COMP3020',
   'STAT7260',
   'PHYS4386',
   'MATH1018',
   'CHEM3320',
   'MBIO3282',
   'MBIO1010',
   'BIOL2520',
   'COMP4050',
   'STAT