Make sure the required packages are installed
(e.g., pip install pyyaml)

In [None]:
import urllib
import yaml
import pandas as pd
import random
import numpy as np
from opcode import haslocal
from pprint import pprint


In [None]:
url = '../Data/datasetA_with_roommate_pref.csv'
student_df = pd.read_csv(url, index_col= False)

In [None]:
url = '../Data/final_room_data.csv'
rooms_df = pd.read_csv(url, index_col= False)

In [None]:
# sort the student data set into hierarchy in V1 diagram
# AKA establishing the dictatorship for our serial dictatorship

# translating the string student_year to ints for sorting purposes
student_year_preference = {
    'Freshman': 4,
    'Postdoctoral studies': 1,
    'Doctorate' : 2,
    'Masters' : 3,
    'Senior' : 5,
    'Junior' : 6,
    'Sophomore' : 7
}

is_grad_student = {
    'Freshman': 0,
    'Postdoctoral studies': 1,
    'Doctorate' : 1,
    'Masters' : 1,
    'Senior' : 0,
    'Junior' : 0,
    'Sophomore' : 0
}
student_df['student_year_int'] = student_df['student_year'].map(student_year_preference)
student_df['is_grad_student'] = student_df['student_year'].map(is_grad_student)

# carry out the actual sorting
# STUDENT DATAFRAME HIERARCHY WILL MIRROR V1 DIAGRAM
dictatorship = ['is_grad_student', 'accessibility_need', 'max_price', 'student_year_int']

student_df = student_df.sort_values(by = dictatorship, ascending = [False, False, True, True]).set_index('RUID')


# student_df = student_df.reset_index(drop = True)

student_df.to_csv('tempStudentDf.csv', index=False)

In [None]:
allocating = dict()
available_rooms = rooms_df['hall_id'].value_counts().to_dict()
#adding new column (if the room is available or not)
rooms_df.loc[0:5330,['is_available']] = [1]

In [None]:
# preference_list = dict()
student_data = dict()
for RUID, row in student_df.iterrows():
    # preference_list[RUID] = row['preferred_hall_ids'].replace(' ', ",").replace('[','').replace(']','').replace('\'','').split(',') #string split
    student_data[RUID] = {
        "preferred_hall_ids": row['preferred_hall_ids'].replace(' ', ",").replace('[','').replace(']','').replace('\'','').split(','),
        "student_year": row['student_year'],
        "accessibility_need": row['accessibility_need'],
        "low_income_status": row['low_income_status'],
        "max_price": row['max_price']
    }

In [None]:
for i in range(13):
    print("price: $", i*1000, "-- # rooms: ", len(rooms_df.loc[(rooms_df['price'] > (i-1)*1000) & (rooms_df['price'] <= i*1000)]))

price: $ 0 -- # rooms:  0
price: $ 1000 -- # rooms:  0
price: $ 2000 -- # rooms:  0
price: $ 3000 -- # rooms:  0
price: $ 4000 -- # rooms:  0
price: $ 5000 -- # rooms:  167
price: $ 6000 -- # rooms:  1145
price: $ 7000 -- # rooms:  0
price: $ 8000 -- # rooms:  976
price: $ 9000 -- # rooms:  1043
price: $ 10000 -- # rooms:  1435
price: $ 11000 -- # rooms:  180
price: $ 12000 -- # rooms:  384


In [None]:
result = dict()
non_assigned_students = dict()

assigned = 0;
not_assigned = 0;

for ruid, preferences in student_data.items():
    is_pref_found = False
    room_details = []
    available_rooms_df = pd.DataFrame()

    for pref in preferences['preferred_hall_ids']:
        if (preferences['student_year'] in ["Freshman", "Sophomore", "Junior", "Senior"]):
            housing_types = ["Undergraduate Academic Year (Two Semesters)", "Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)"]
        else:
            housing_types = ["Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)"]

        available_rooms_df = rooms_df.loc[
            (rooms_df['hall_id'] == pref)
            & (rooms_df['is_available'] == 1)
            & (rooms_df['has_accessibility_ramps'] >= preferences['accessibility_need'])
            & (rooms_df['price'] <= preferences['max_price'])
            & rooms_df['room_contract_type'].isin(housing_types)
        ]

        if not available_rooms_df.empty:
            #try allocating first room in the df
            is_pref_found = True
            break

    if not is_pref_found:
        #allocate random available room
        available_rooms_df = rooms_df.loc[
            (rooms_df['is_available'] == 1)
            & (rooms_df['has_accessibility_ramps'] >= preferences['accessibility_need'])
            & (rooms_df['price'] <= preferences['max_price'])
            & rooms_df['room_contract_type'].isin(housing_types)]

    if not available_rooms_df.empty:
        #try allocating first room in the df
        allocated_room = available_rooms_df.iloc[0]

        room_details.append(allocated_room['hall_id'])
        room_details.append(allocated_room['building_id'])
        room_details.append(allocated_room['room_id'])

        rooms_df.loc[allocated_room.name,'is_available'] = 0
        result[ruid] = room_details

        assigned+=1

    else:
        non_assigned_students[ruid] = preferences
        not_assigned+=1

df = pd.DataFrame(list(non_assigned_students.items()), columns=['RUID', 'preferences'])
df.to_csv('non_assigned_students.csv')
print(assigned)
print(not_assigned)


5263
67


In [None]:
df = pd.DataFrame(list(result.items()), columns=['RUID', 'Assigned Rooms'])
df.to_csv('allocated.csv')

In [None]:
#sanity check
check_set = set()
counter = 0
for ruid, rooms in result.items():
    _str = str(rooms[0]) + '_' + str(rooms[1]) + '_' + str(rooms[2])
    if _str in check_set:
        print("already contains ", _str)
    check_set.add(_str)
    counter += 1
print(counter)

print(len(check_set))

5263
5263
