In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter


def preprocess(filename):
    df = pd.read_excel(filename, header=None)
    df= df.replace(to_replace = {'Business Analytics':'c1', 
                             'Cloud Computing' : 'c2',
                             'Machine Learning' : 'c3',
                             'Data Analytics': 'c4',
                             'Optimization': 'c5',
                             'Stochastic': 'c6',
                             'Simulation': 'c7',
                             'Computational Discrete Optimization': 'c8'
                             },
               value = None)

    # extract group 1 and group 2 data from combined df
    df_group1 = df[df.iloc[:,3]=="Even (0, 2, 4, 6, 8)"].drop(columns=range(20,40)).replace(to_replace={'Even (0, 2, 4, 6, 8)':'Group1'})
    df_group2 = df[df.iloc[:,3]=="Odd (1, 3, 5, 7, 9)"].drop(columns=range(4,20)).replace(to_replace={'Odd (1, 3, 5, 7, 9)':'Group2'})

    # set colnames
    df_group1_colnames = ['Timestamp','Student','Gender','Group',
                          'c1','c2','c3','c4','c5','c6','c7','c8', # bids on courses
                          'R1','R2','R3','R4','R5','R6','R7','R8'  # ranks
                          ]
    df_group2_colnames = ['Timestamp','Student','Gender','Group',
                          'R1','R2','R3','R4','R5','R6','R7','R8', # ranks
                          'c1','c2','c3','c4','c5','c6','c7','c8', # bids on courses
                          't1','t2','t3','t4'                      # bids on time slots
                          ]

    df_group1.columns = df_group1_colnames
    df_group2.columns = df_group2_colnames

    # change datatype of bids from str to int
    df_group1 = df_group1.apply(pd.to_numeric, downcast='integer', errors='ignore')
    df_group2 = df_group2.apply(pd.to_numeric, downcast='integer', errors='ignore')

    # index each student using their UNI
    df_group1.index = df_group1.Student.str[:6]
    df_group1.index.name = 'UNI'
    df_group2.index = df_group2.Student.str[:6]
    df_group2.index.name = 'UNI'

    # check bid criteria is met
    df_group1['CourseBidCriteria'] = (df_group1.loc[:,'c1':'c8'].sum(axis=1) == 100)
    df_group2['CourseBidCriteria'] = (df_group2.loc[:,'c1':'c8'].sum(axis=1) == 100)
    df_group2['TimeBidCriteria'] = (df_group2.loc[:,'t1':'t4'].sum(axis=1) == 100)

    return df_group1, df_group2

def get_pref(df, sc=False):
    '''
    Returns a dictionary of students' preferences, with student UNI as the key
    sc=False gives all courses, sc=True gives only semi-core courses
    '''
    pref_dict = {}
    if sc:
        for UNI, row in df.loc[:,'R1':'R8'].iterrows():
            sc_list = []
            for c in row.values:
                if c in ['c1','c3','c5','c6']: # if course is semi-core
                    sc_list.append(c)
            pref_dict[UNI] = sc_list

    else:  
        for UNI, row in df.loc[:,'R1':'R8'].iterrows():
            pref_dict[UNI] = list(row.values)

    return pref_dict

def modified_bid(df, seed = 42):
    '''
    Adds a random real number x drawn from uniform distribution for each student-course pair
    Modifies each positive bid b>0 as b'=b+x
    Returns a modified bid matrix
    '''
    np.random.seed(seed)
    df_ = df.loc[:,'c1':'c8']
    X = np.random.uniform(size=df_.shape)
    mod_bids = df_ + X
    # mod_bids[mod_bids < 1] = 0
    return mod_bids

filename = "StudentForm (Combined) (Responses).xlsx"
(df_group1, df_group2) = preprocess(filename)

In [2]:
sc = ['c1','c3','c5','c6']
course = ['c1','c2','c3','c4','c5','c6','c7','c8']
hd = ['c1','c2','c3','c4']
ld = ['c5','c6','c7','c8']

# Generate course capacities
def capacity(df):
    cap = {c: 0 for c in course}
    
    # 4 semi-core courses take at least len(df) people
    capOfSC = round(len(df)/4)
    lastSC = len(df) - 3*capOfSC
    for c in sc:
        cap[c] += capOfSC
    cap[sc[0]] = lastSC
    
    remainTotalSeats = 3*len(df) - 3*capOfSC - lastSC
    capOfAll = round(remainTotalSeats/8)
    for c in course:
        cap[c] += capOfAll

    ldCap = {c: 11 for c in ld}
    cap.update(ldCap)

    return cap

# Test 2

In [3]:
def newCap(c, k, bid_sc_k):
    a = 0
    if bid_sc_k.get(c):
        a = len(bid_sc_k.get(c))
    return k - a

# assign 1 semi-core
def assignSC(df):
    stop = False
    r = 0                             # starts with 0 (first round)
    cap = capacity(df)                # init capacity
    pref_sc = get_pref(df, sc=True)   # get group 1's pref list of sc
    pref_sc_z = {}                    # pref to be updated each round
    rejected = pref_sc.keys()
    
    while not stop:
        # get first(round#) sc course on each one's pref list
        pref_sc_r = {u: x[r] for (u,x) in pref_sc.items() if u in rejected}
        # modify pref list
        pref_sc_z.update(pref_sc_r)   # here because later updating bid list would be the same

        # rank students for each sc by bidding
        bid_sc = {c: sorted([((modified_bid(df)).loc[u,c], u) 
                      for u in pref_sc_z.keys() 
                      if pref_sc_z[u] == c], reverse=True) 
                  for c in sc}

        # keep top k students
        bid_sc_k = {c: s[:cap[c]] for (c, s) in bid_sc.items()}
        
        # find the list of unmatched student unis
        rejected = [i[1] for l in [s[cap[c]:] for (c, s) in bid_sc.items()] for i in l]

        if rejected: # not empty
            r += 1
        else:
            stop = True
    
    # update capacity
    cap = {c: newCap(c, k, bid_sc_k) for (c, k) in cap.items()}
    
    return bid_sc_k, cap

In [4]:
def courseToStudentView(courseView):
    courseViewUni = {c: [s[1] for s in courseView[c]] for c in courseView.keys()}
    studenView = {u: [] for l in courseViewUni.values() for u in l}
    for c in courseViewUni.keys():
        for u in courseViewUni[c]:
            studenView[u].append(c)
    return studenView

def resolveTimeConflict(df, assignCourseView):
    # update preference list
    updatedPref = get_pref(df)
    
    # time conflit course pair
    coursePair = [['c1', 'c4'], ['c2', 'c8'], ['c3', 'c5'], ['c6', 'c7']]
    assignStudentView = courseToStudentView(assignCourseView)
    for u in assignStudentView.keys():
        assignedSC = assignStudentView[u]
        for pair in coursePair:
            if assignedSC in pair:
                updatedPref[u] = [i for i in updatedPref[u] if i not in pair]
                
    return updatedPref

In [5]:
(assignCourseView, cap) = assignSC(df_group1)
updatedPref = resolveTimeConflict(df_group1, assignCourseView)

In [6]:
# general assignment (2 courses default)
def assign(df, pref, courseNum=2):
    stop = False
    rejected = pref.keys()
    r = 0
    nextProposeQuota = {u: courseNum for u in rejected}
    propose = {u: [] for u in pref.keys()}
    
    while not stop:
        # first propose to 2 courses
        # then propose to quota courses
        newPropose = {u: x[r:r+nextProposeQuota[u]] for (u,x) in pref.items() if u in set(rejected)}
        # update propose
        for u in newPropose.keys():
            propose[u].extend(newPropose[u])

        # index bids
        bid1 = {c: [((modified_bid(df)).loc[u,c], u) for u in propose.keys() if propose[u][0] == c] 
               for c in course}
        bid2 = {c: [((modified_bid(df)).loc[u,c], u) for u in propose.keys() if propose[u][1] == c]
               for c in course}
        bid = {c: sorted(l + bid2[c], reverse=True) for (c,l) in bid1.items()}
        
        # keep top k students
        bid_k = {c: s[:cap[c]] for (c, s) in bid.items()}
        
        # find the list of unmatched student unis
        rejected = [i[1] for l in [s[cap[c]:] for (c, s) in bid.items()] for i in l]
        
        propose = {u: [] for u in pref.keys()}
        for c in bid_k.keys():
            for (_, u) in bid_k[c]:
                propose[u].append(c)
                

        if rejected: # not empty
            r += 1
            nextProposeQuota = Counter(rejected)
        else:
            stop = True

    
    return bid_k

In [7]:
a = assign(df_group1, updatedPref)
a

{'c1': [],
 'c2': [(100.70807257779605, 'zs2440'),
  (65.52475643163224, 'xt2230'),
  (49.81546142845483, 'atc214')],
 'c3': [(95.03438852111522, 'mz2776'),
  (45.19967378215836, 'sjl222'),
  (45.19967378215836, 'sjl222')],
 'c4': [(50.77513282336111, 'tg2718'),
  (25.80219698075404, 'jy3026'),
  (1.7290071680409873, 'atc214')],
 'c5': [(15.30461376917337, 'zl2856'),
  (15.30461376917337, 'zl2856'),
  (10.939498941564189, 'tg2718'),
  (1.0745506436797707, 'jy3026'),
  (0.8324426408004217, 'zs2440')],
 'c6': [(1.662522284353982, 'mz2776')],
 'c7': [(20.729606178338063, 'wx2226'),
  (20.729606178338063, 'wx2226'),
  (7.0580836121682, 'qt2131')],
 'c8': [(72.86617614577493, 'qt2131'),
  (35.36636184329369, 'xt2230'),
  (0.3567533266935893, 'rg3266'),
  (0.3567533266935893, 'rg3266')]}

In [8]:
b = courseToStudentView(a)
b

{'zs2440': ['c2', 'c5'],
 'xt2230': ['c2', 'c8'],
 'atc214': ['c2', 'c4'],
 'mz2776': ['c3', 'c6'],
 'sjl222': ['c3', 'c3'],
 'tg2718': ['c4', 'c5'],
 'jy3026': ['c4', 'c5'],
 'zl2856': ['c5', 'c5'],
 'wx2226': ['c7', 'c7'],
 'qt2131': ['c7', 'c8'],
 'rg3266': ['c8', 'c8']}