In [8]:
input_file = '../data/details.json'
output_file = '../data/details_cleaned.json'
classroom_json = '../data/classroom_data.json'
import numpy as np
import json
from datetime import datetime

<h1>Data Cleaning</h1>

In [None]:
def clean_large_json(input_file, output_file, keys_to_remove):
    with open(input_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def remove_keys(obj, keys):
        if isinstance(obj, dict):
            for key in keys:
                obj.pop(key, None)
            for value in obj.values():
                remove_keys(value, keys)
        elif isinstance(obj, list):
            for item in obj:
                remove_keys(item, keys)
    remove_keys(data, keys_to_remove)
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4, ensure_ascii=False)
keys_to_remove = ["additionalLinks", "bookstore", "cfg", "catalog_descr", "materials", "enrollment_information", "reserve_caps", "catalog_descr", "messages", "notes"]
clean_large_json(input_file, output_file, keys_to_remove)


In [None]:
def remove_class_capacity_999():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    def remove_capacity_999(obj):
        if isinstance(obj, dict):
            if obj.get("class_capacity") == "999" or obj.get("class_capacity") == 999:
                return None
            new_obj = {}
            for key, value in obj.items():
                result = remove_capacity_999(value)
                if result is not None:
                    new_obj[key] = result
            return new_obj
        elif isinstance(obj, list):
            return [remove_capacity_999(item) for item in obj if remove_capacity_999(item) is not None]
        else:
            return obj
    cleaned_data = remove_capacity_999(data)

    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
remove_class_capacity_999()


In [None]:
def remove_online_instruction_mode():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def remove_online_mode(obj):
        if isinstance(obj, dict):
            if obj.get("instruction_mode") == 'Online':
                return None
            new_obj = {}
            for key, value in obj.items():
                result = remove_online_mode(value)
                if result is not None:
                    new_obj[key] = result
            return new_obj
        elif isinstance(obj, list):
            return [remove_online_mode(item) for item in obj if remove_online_mode(item) is not None]
        else:
            return obj
    cleaned_data = remove_online_mode(data)

    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
remove_online_instruction_mode()


In [None]:
def remove_tba_instructors():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def clean_instructors(obj):
        if isinstance(obj, dict):
            if "meetings" in obj:
                for meeting in obj["meetings"]:
                    if "instructors" in meeting:
                        meeting["instructors"] = [
                            instructor for instructor in meeting["instructors"]
                            if instructor.get("name") != "To Be Announced"
                        ]
            for value in obj.values():
                clean_instructors(value)
        elif isinstance(obj, list):
            for item in obj:
                clean_instructors(item)
    clean_instructors(data)
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4, ensure_ascii=False)
remove_tba_instructors()


In [None]:
def clean_empty_instructors_tba_meets_and_empty_times():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def clean_data(obj):
        if isinstance(obj, dict):
            if "instructors" in obj and isinstance(obj["instructors"], list) and not obj["instructors"]:
                return None
            if obj.get("meets") == "TBA":
                return None
            if "meeting_time_start" in obj and obj["meeting_time_start"] == "":
                return None
            if "meeting_time_end" in obj and obj["meeting_time_end"] == "":
                return None
            new_obj = {}
            for key, value in obj.items():
                result = clean_data(value)
                if result is not None:
                    new_obj[key] = result
            return new_obj
        elif isinstance(obj, list):
            return [clean_data(item) for item in obj if clean_data(item) is not None]
        else:
            return obj
    cleaned_data = clean_data(data)
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
clean_empty_instructors_tba_meets_and_empty_times()


In [None]:
def clean_classroom_names():
    with open(classroom_json, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def remove_invalid_classrooms(obj):
        if isinstance(obj, dict):
            if "Name" in obj and '/' in obj["Name"]:
                return None
            new_obj = {}
            for key, value in obj.items():
                result = remove_invalid_classrooms(value)
                if result is not None:
                    new_obj[key] = result
            return new_obj
        elif isinstance(obj, list):
            return [remove_invalid_classrooms(item) for item in obj if remove_invalid_classrooms(item) is not None]
        else:
            return obj
    cleaned_data = remove_invalid_classrooms(data)
    with open(classroom_json, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
clean_classroom_names()


In [None]:
import re
def clean_classroom_names():
    with open(classroom_json, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    def process_classroom_names(obj):
        if isinstance(obj, dict):
            if "Name" in obj:
                cleaned_name = obj["Name"].replace('-', ' ')
                cleaned_name = re.sub(r'\(.*$', '', cleaned_name).strip()
                obj["Name"] = cleaned_name
            for key, value in obj.items():
                process_classroom_names(value)
        elif isinstance(obj, list):
            for item in obj:
                process_classroom_names(item)
    process_classroom_names(data)
    with open(classroom_json, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4, ensure_ascii=False)
clean_classroom_names()


In [None]:
def clean_none_and_no_room_classrooms():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    cleaned_data = []
    for course in data:
        section_info = course.get("section_info", {})
        meetings = section_info.get("meetings", [])
        if any(meeting.get("room") in [None, "NO ROOM"] for meeting in meetings):
            continue
        else:
            cleaned_data.append(course)
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
clean_none_and_no_room_classrooms()


In [None]:
def clean_empty_meetings():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    cleaned_data = []
    for course in data:
        section_info = course.get("section_info", {})
        meetings = section_info.get("meetings", [])
        if meetings:
            cleaned_data.append(course)
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(cleaned_data, outfile, indent=4, ensure_ascii=False)
clean_empty_meetings()


In [74]:
import json
with open(output_file, 'r') as file:
    data = json.load(file)
remove_bldg_cd = ["ALB", "CTC", "INS", "SPH", "XBG", "MED", "FAB", "FCB", "FCC", "HAW", "GDS", "FPH", "EVN"]
for item in data:
    if 'section_info' in item and 'meetings' in item['section_info']:
        item['section_info']['meetings'] = [
            meeting for meeting in item['section_info']['meetings']
            if meeting.get('bldg_cd') not in remove_bldg_cd
        ]
    if 'similar_classes' in item:
        for similar_class in item['similar_classes']:
            if 'meeting_patterns' in similar_class:
                similar_class['meeting_patterns'] = [
                    pattern for pattern in similar_class['meeting_patterns']
                    if pattern.get('bldg_cd') not in remove_bldg_cd
                ]
with open(output_file, 'w') as file:
    json.dump(data, file, indent=4)


In [75]:
import json
with open(classroom_json, 'r') as file:
    classrooms = json.load(file)
cleaned_classrooms = [
    classroom for classroom in classrooms
    if not any(classroom.get('Name', '').startswith(prefix) for prefix in remove_bldg_cd)
]
with open(classroom_json, 'w') as file:
    json.dump(cleaned_classrooms, file, indent=4)


In [26]:
def clean_schedule(professor_schedule):
    cleaned_schedule = {}
    for key, schedule in professor_schedule.items():
        merged_dict = {}
        for item in schedule:
            start_end = (item[0], item[1])
            if start_end in merged_dict:
                merged_dict[start_end] += item[2]
            else:
                merged_dict[start_end] = item[2]
        cleaned_schedule[key] = [(start, end, count) for (start, end), count in merged_dict.items()]

    return cleaned_schedule

In [105]:
import json

# 读取 JSON 文件
with open(classroom_json, 'r') as file:
    classrooms = json.load(file)

# 删除包含 "Medical Campus" 的整个教室条目
cleaned_classrooms = [
    classroom for classroom in classrooms
    if not ("Details" in classroom and
            "Classroom Tag" in classroom["Details"] and
            "Medical Campus" in classroom["Details"]["Classroom Tag"])
]

# 将清理后的数据写回文件
with open(classroom_json, 'w') as file:
    json.dump(cleaned_classrooms, file, indent=4)


In [107]:
import json

# 读取 JSON 文件
with open(classroom_json, 'r') as file:
    classrooms = json.load(file)

# 删除包含 "Fenway Campus" 的整个教室条目
cleaned_classrooms = [
    classroom for classroom in classrooms
    if not ("Details" in classroom and
            "Classroom Tag" in classroom["Details"] and
            any("Fenway Campus" in tag for tag in classroom["Details"]["Classroom Tag"]))
]

# 将清理后的数据写回文件
with open(classroom_json, 'w') as file:
    json.dump(cleaned_classrooms, file, indent=4)


<h1>Debugger, ignore this part</h1>

In [None]:
def count_and_print_top_ten_class_capacities():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    capacities = []
    capacity_999_count = 0

    def find_class_capacities(obj):
        nonlocal capacity_999_count
        if isinstance(obj, dict):
            # 检查并记录 class_capacity
            if "class_capacity" in obj:
                try:
                    capacity = int(obj["class_capacity"])
                    capacities.append(capacity)
                    if capacity == 999:
                        capacity_999_count += 1
                except ValueError:
                    pass
            for value in obj.values():
                find_class_capacities(value)
        elif isinstance(obj, list):
            for item in obj:
                find_class_capacities(item)

    find_class_capacities(data)

    print("前十节课的 class_capacity:", capacities[:10])
    print("class_capacity 为 999 的数量:", capacity_999_count)
count_and_print_top_ten_class_capacities()


In [None]:
def count_unique_instruction_modes():
    with open(output_file, 'r', encoding='utf-8') as outfile:
        data = json.load(outfile)
    instruction_modes = set()
    def find_instruction_modes(obj):
        if isinstance(obj, dict):
            if "instruction_mode" in obj:
                instruction_modes.add(obj["instruction_mode"])
            for value in obj.values():
                find_instruction_modes(value)
        elif isinstance(obj, list):
            for item in obj:
                find_instruction_modes(item)

    find_instruction_modes(data)

    print("不同的 instruction_mode 数量:", len(instruction_modes))
    print("不同的 instruction_mode 值:", instruction_modes)
count_unique_instruction_modes()


In [None]:
def display_sample_professor_schedule(professor_schedule, sample_size=10):
    print(f"Displaying schedule for the first {sample_size} professors:")
    for professor_id in range(sample_size):
        if professor_id in professor_schedule:
            print(f"\nProfessor ID {professor_id}:")
            for schedule in professor_schedule[professor_id]:
                start_time, end_time, capacity = schedule
                print(f"  Start Time (in 5-min units): {start_time}, "
                      f"End Time (in 5-min units): {end_time}, "
                      f"Capacity: {capacity}")
        else:
            print(f"\nProfessor ID {professor_id}: No schedule available")
display_sample_professor_schedule(professor_schedule)


In [None]:
def decode_time(value):
    minutes_per_day = 24 * 60 // 5
    day = value // minutes_per_day
    time_in_day = value % minutes_per_day

    hours = (time_in_day * 5) // 60
    minutes = (time_in_day * 5) % 60
    day_mapping = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
    day_name = day_mapping[day]

    return f"{day_name} {hours:02}:{minutes:02}"
print(decode_time(978))

In [None]:
def find_courses_for_instructor(instructor_name):
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    instructor_courses = []

    def search_for_instructor(obj):
        if isinstance(obj, dict):
            if "meetings" in obj:
                for meeting in obj["meetings"]:
                    if "instructors" in meeting:
                        for instructor in meeting["instructors"]:
                            if instructor.get("name") == instructor_name:
                                instructor_courses.append(obj)
                                return

            for value in obj.values():
                search_for_instructor(value)
        elif isinstance(obj, list):
            for item in obj:
                search_for_instructor(item)


    search_for_instructor(data)


    print(f" '{instructor_name}' ")
    for course in instructor_courses:
        print(json.dumps(course, indent=4, ensure_ascii=False))

    return instructor_courses
find_courses_for_instructor("Min Ye")


In [None]:
def find_room(keyword):
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    rooms_with_keyword = []

    def search_meetings(obj):
        if isinstance(obj, dict):
            if "meetings" in obj:
                for meeting in obj["meetings"]:
                    if "room" in meeting and keyword in meeting["room"]:
                        rooms_with_keyword.append(meeting)
            for value in obj.values():
                search_meetings(value)
        elif isinstance(obj, list):
            for item in obj:
                search_meetings(item)
    search_meetings(data)
    print(f"包含 '{keyword}' 的房间信息:", rooms_with_keyword)
    return rooms_with_keyword
rooms = find_room("CFA 154")


In [101]:
import numpy as np

# 示例：假设 walking_cost 是一个 2D float 矩阵
# walking_cost = np.array([...]) # 请替换为你的实际矩阵

# 查找距离大于 4000.0 的索引对
indices = np.argwhere(walking_cost > 4000.0)

# 将 classroom_mapping 的键值对反转
index_to_classroom = {v: k for k, v in classroom_mapping.items()}

# 转换索引对为教室名称
classroom_pairs = [(index_to_classroom[i], index_to_classroom[j]) for i, j in indices]

# 示例 classroom_pairs 结果
# classroom_pairs = [('ABG 101', 'AGG C205'), ('ABG 101', 'AGG G105'), ('ABG 101', 'AGG G171'), ...]  # 你的数据

# 提取前三个字母并去重
unique_pairs = set((a[:3], b[:3]) for a, b in classroom_pairs)

# 转换为列表并输出结果
unique_pairs = list(unique_pairs)
print("Unique classroom prefix pairs:", unique_pairs)

from collections import Counter

# 示例 classroom_pairs 结果
# classroom_pairs = [('ABG 101', 'AGG C205'), ('ABG 101', 'AGG G105'), ('ABG 101', 'AGG G171'), ...]  # 你的数据

# 提取所有前缀
prefixes = [a[:3] for a, b in classroom_pairs] + [b[:3] for a, b in classroom_pairs]

# 统计前缀出现频率
prefix_counts = Counter(prefixes)

# 按频率从高到低排序
sorted_prefix_counts = prefix_counts.most_common()

# 输出结果
print("Prefix frequencies sorted from high to low:", sorted_prefix_counts)


Unique classroom prefix pairs: []
Prefix frequencies sorted from high to low: []


<h1>Data Processing</h1>

<h2>capacities: int[]</h2>

In [121]:
def extract_capacity_from_additional_info():
    with open(classroom_json, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    capacities = []


    def find_capacity(obj):
        if isinstance(obj, dict):
            if "AdditionalInfo" in obj and "Capacity" in obj["AdditionalInfo"]:
                try:
                    capacities.append(int(obj["AdditionalInfo"]["Capacity"]))
                except ValueError:
                    pass

            for value in obj.values():
                find_capacity(value)
        elif isinstance(obj, list):
            for item in obj:
                find_capacity(item)

    find_capacity(data)

    return capacities
capacities = extract_capacity_from_additional_info()

<h2>name_capacity_dict: dict</h2>

In [122]:
def extract_name_capacity_dict():
    with open(classroom_json, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    name_capacity_dict = {}
    def find_name_capacity(obj):
        if isinstance(obj, dict):
            if "Name" in obj and "AdditionalInfo" in obj and "Capacity" in obj["AdditionalInfo"]:
                try:
                    name_capacity_dict[obj["Name"]] = int(obj["AdditionalInfo"]["Capacity"])
                except ValueError:
                    pass
            for value in obj.values():
                find_name_capacity(value)
        elif isinstance(obj, list):
            for item in obj:
                find_name_capacity(item)

    find_name_capacity(data)
    return name_capacity_dict
name_capacity_dict = extract_name_capacity_dict()

<h2>professor_mapping: dict</h2>

In [123]:
def extract_professor_mapping():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    professor_mapping = {}
    professor_id_counter = 0


    def find_instructors(obj):
        nonlocal professor_id_counter
        if isinstance(obj, dict):

            if "meetings" in obj:
                for meeting in obj["meetings"]:
                    if "instructors" in meeting:
                        for instructor in meeting["instructors"]:
                            name = instructor.get("name")

                            if name and name not in professor_mapping:
                                professor_mapping[name] = professor_id_counter
                                professor_id_counter += 1

            for value in obj.values():
                find_instructors(value)
        elif isinstance(obj, list):
            for item in obj:
                find_instructors(item)


    find_instructors(data)


    return professor_mapping
professor_mapping = extract_professor_mapping()


<h2>professor_schedule: dict</h2>

In [124]:
def build_professor_schedule():
    with open(output_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    professor_schedule = {}


    day_mapping = {
        "Mo": 0, "Tu": 1, "We": 2, "Th": 3, "Fr": 4, "Sa": 5, "Su": 6
    }


    def parse_time(time_str, day):
        time_obj = datetime.strptime(time_str, "%I:%M%p")
        minutes = time_obj.hour * 60 + time_obj.minute

        return (minutes // 5) + day * (24 * 60 // 5)


    def find_meetings(obj):
        if isinstance(obj, dict):

            if "meetings" in obj and "class_availability" in obj:

                capacity = obj["class_availability"].get("class_capacity")
                if capacity is not None:
                    capacity = int(capacity)

                for meeting in obj["meetings"]:
                    if "instructors" in meeting and "days" in meeting and "meeting_time_start" in meeting and "meeting_time_end" in meeting:

                        days_str = meeting["days"]
                        days = [day_mapping[days_str[i:i+2]] for i in range(0, len(days_str), 2) if days_str[i:i+2] in day_mapping]


                        for instructor in meeting["instructors"]:
                            professor_name = instructor.get("name")
                            professor_id = professor_mapping.get(professor_name)

                            if professor_id is not None:
                                if professor_id not in professor_schedule:
                                    professor_schedule[professor_id] = []


                                for day in days:
                                    start_time = parse_time(meeting["meeting_time_start"], day)
                                    end_time = parse_time(meeting["meeting_time_end"], day)
                                    professor_schedule[professor_id].append((start_time, end_time, capacity))


            for value in obj.values():
                find_meetings(value)
        elif isinstance(obj, list):
            for item in obj:
                find_meetings(item)


    find_meetings(data)


    return professor_schedule
professor_schedule = build_professor_schedule()
professor_schedule = clean_schedule(professor_schedule)

<h2>classroom_mapping: dict</h2>

In [125]:
def create_classroom_mapping():
    with open(classroom_json, 'r', encoding='utf-8') as infile:
        data = json.load(infile)

    classroom_mapping = {}
    classroom_id_counter = 0


    def find_classrooms(obj):
        nonlocal classroom_id_counter
        if isinstance(obj, dict):
            if "Name" in obj:
                classroom_name = obj["Name"]

                if classroom_name not in classroom_mapping:
                    classroom_mapping[classroom_name] = classroom_id_counter
                    classroom_id_counter += 1

            for value in obj.values():
                find_classrooms(value)
        elif isinstance(obj, list):
            for item in obj:
                find_classrooms(item)


    find_classrooms(data)


    return classroom_mapping
classroom_mapping = create_classroom_mapping()

<h2>professor_courses: int[][][]</h2>

In [126]:
N = len(professor_mapping)
M = len(classroom_mapping)
T = 7 * 24 * 60 // 5
professor_courses = np.zeros((N, M, T), dtype=int)
def parse_time_to_5_min_units(time_str):
    time_obj = datetime.strptime(time_str, "%I:%M%p")
    minutes = time_obj.hour * 60 + time_obj.minute
    return minutes // 5
with open(output_file, 'r', encoding='utf-8') as infile:
    data = json.load(infile)
    for course in data:
        section_info = course.get("section_info", {})
        meetings = section_info.get("meetings", [])
        for obj in meetings:
            room_field = obj.get("room", "")
            room_parts = room_field.split()
            room_name = room_parts[-2] + " " + room_parts[-1] if len(room_parts) >= 2 else None
            if room_name in [None, "NO ROOM"]:
                continue
            instructors = obj.get("instructors", [])
            for instructor in instructors:
                professor_name = instructor.get("name")
                professor_id = professor_mapping.get(professor_name)
                room_id = classroom_mapping.get(room_name)
                if professor_id is None:
                    print(f"教授 '{professor_name}' 未找到对应的 ID")
                if room_id is None:
                    print(f"Room '{room_name}' can't find ID")
                if professor_id is not None and room_id is not None:
                    days_str = obj.get("days", "")
                    start_time = parse_time_to_5_min_units(obj["meeting_time_start"])
                    end_time = parse_time_to_5_min_units(obj["meeting_time_end"])
                    day_mapping = {"Mo": 0, "Tu": 1, "We": 2, "Th": 3, "Fr": 4, "Sa": 5, "Su": 6}
                    for day_abbr in [days_str[i:i+2] for i in range(0, len(days_str), 2)]:
                        day = day_mapping.get(day_abbr)
                        if day is not None:
                            start_k = start_time + day * (24 * 60 // 5)
                            end_k = end_time + day * (24 * 60 // 5)
                            for k in range(start_k, end_k):
                                professor_courses[professor_id][room_id][k] = 1


Room 'REL 404' can't find ID
Room 'CGS 427' can't find ID
Room 'WED 411' can't find ID
Room 'EVN 201' can't find ID
Room 'CFA 354' can't find ID
Room 'CFA 352' can't find ID
Room 'Health Ctr/Underserved' can't find ID
Room 'Health Ctr/Underserved' can't find ID
Room 'LAW 508' can't find ID
Room 'LAW 508' can't find ID
Room 'SAR 236' can't find ID
Room 'CGS 427' can't find ID
Room 'YAW 419' can't find ID
Room 'HAR 658' can't find ID
Room 'REL 404' can't find ID
Room 'PHO 207' can't find ID
Room 'Medical Center' can't find ID
Room 'Medical Center' can't find ID
Room 'CFA 352' can't find ID
Room 'LAW 513' can't find ID
Room 'LSE 904' can't find ID
Room 'MCH 102' can't find ID
Room 'CGS 417' can't find ID
Room 'HAR 419' can't find ID
Room 'HAR 419' can't find ID
Room 'PLS 512' can't find ID
Room 'LAW 203' can't find ID
Room 'LAW 420' can't find ID
Room 'LAW 420' can't find ID
Room 'STH 541' can't find ID
Room 'PHO 207' can't find ID
Room 'Auburn Hospital' can't find ID
Room 'Auburn Hospita

<h2>walking_cost: float[][], time cost matrix</h2>

In [114]:
import pandas as pd
b2b_distance = pd.read_csv("../data/b2b_walking_distance.csv")
buildings = {name.split()[0]: idx for name, idx in classroom_mapping.items()}
num_classrooms = len(classroom_mapping)
walking_cost = np.full((num_classrooms, num_classrooms), np.inf)
for i in range(num_classrooms):
    for j in range(num_classrooms):
        if i == j:
            walking_cost[i][j] = 0
        elif list(classroom_mapping.keys())[i].split()[0] == list(classroom_mapping.keys())[j].split()[0]:
            walking_cost[i][j] = 0
for _, row in b2b_distance.iterrows():
    building_a, building_b, distance = row['abbreviationA'], row['abbreviationB'], row['distance']
    for classroom_a, idx_a in classroom_mapping.items():
        for classroom_b, idx_b in classroom_mapping.items():
            if classroom_a.split()[0] == building_a and classroom_b.split()[0] == building_b:
                walking_cost[idx_a][idx_b] = distance
                walking_cost[idx_b][idx_a] = distance
inf_positions = np.where(walking_cost == np.inf)
if inf_positions[0].size > 0:
    for i, j in zip(inf_positions[0], inf_positions[1]):
        print(f"Inf found at walking_cost[{i}][{j}]")
else:
    print("No Inf values found in walking_cost matrix.")

No Inf values found in walking_cost matrix.


<h1>Visualizaiton</h1>

In [115]:
classroom_mapping

{'AGG C205': 0,
 'AGG G105': 1,
 'AGG G171': 2,
 'BAB 121': 3,
 'BAB 140': 4,
 'BAB 141': 5,
 'BAB 148': 6,
 'BCN 115': 7,
 'BCN 208': 8,
 'BRB 113': 9,
 'BRB 121': 10,
 'BRB 122': 11,
 'BRB B25': 12,
 'CAS 114A': 13,
 'CAS 114B': 14,
 'CAS 116': 15,
 'CAS 201': 16,
 'CAS 203': 17,
 'CAS 204A': 18,
 'CAS 204B': 19,
 'CAS 208': 20,
 'CAS 211': 21,
 'CAS 212': 22,
 'CAS 213': 23,
 'CAS 214': 24,
 'CAS 216': 25,
 'CAS 218': 26,
 'CAS 220': 27,
 'CAS 222': 28,
 'CAS 223': 29,
 'CAS 224': 30,
 'CAS 225': 31,
 'CAS 226': 32,
 'CAS 227': 33,
 'CAS 228': 34,
 'CAS 229': 35,
 'CAS 233': 36,
 'CAS 235': 37,
 'CAS 237': 38,
 'CAS 303A': 39,
 'CAS 306': 40,
 'CAS 310': 41,
 'CAS 312': 42,
 'CAS 313': 43,
 'CAS 314': 44,
 'CAS 315': 45,
 'CAS 316': 46,
 'CAS 318': 47,
 'CAS 320': 48,
 'CAS 322': 49,
 'CAS 323A': 50,
 'CAS 323B': 51,
 'CAS 324': 52,
 'CAS 325': 53,
 'CAS 326': 54,
 'CAS 327': 55,
 'CAS 330': 56,
 'CAS 335': 57,
 'CAS 415': 58,
 'CAS 424': 59,
 'CAS 425': 60,
 'CAS 426': 61,
 'CAS 42

In [116]:
name_capacity_dict

{'AGG G171': 105,
 'BAB 121': 50,
 'BAB 140': 90,
 'BAB 141': 63,
 'BAB 148': 60,
 'BCN 115': 120,
 'BCN 208': 110,
 'BRB 113': 48,
 'BRB 121': 30,
 'BRB 122': 35,
 'BRB B25': 24,
 'CAS 114A': 17,
 'CAS 114B': 17,
 'CAS 116': 40,
 'CAS 201': 40,
 'CAS 203': 50,
 'CAS 204A': 38,
 'CAS 204B': 34,
 'CAS 208': 35,
 'CAS 211': 112,
 'CAS 212': 22,
 'CAS 213': 51,
 'CAS 214': 35,
 'CAS 216': 63,
 'CAS 218': 35,
 'CAS 220': 32,
 'CAS 222': 36,
 'CAS 224': 133,
 'CAS 226': 58,
 'CAS 303A': 20,
 'CAS 306': 45,
 'CAS 310': 20,
 'CAS 312': 20,
 'CAS 313': 112,
 'CAS 314': 20,
 'CAS 315': 54,
 'CAS 316': 20,
 'CAS 318': 20,
 'CAS 320': 28,
 'CAS 322': 24,
 'CAS 323A': 20,
 'CAS 323B': 18,
 'CAS 324': 39,
 'CAS 325': 24,
 'CAS 326': 57,
 'CAS 327': 38,
 'CAS 330': 24,
 'CAS 335': 30,
 'CAS 415': 16,
 'CAS 424': 18,
 'CAS 425': 20,
 'CAS 426': 48,
 'CAS 427': 18,
 'CAS 430': 25,
 'CAS 502': 40,
 'CAS 521': 16,
 'CAS 522': 208,
 'CAS 530': 20,
 'CAS 534': 18,
 'CAS 538': 18,
 'CAS B06A': 38,
 'CAS B0

In [117]:
professor_mapping

{'Min Ye': 0,
 'Sorcha Martin': 1,
 'Max Anzede': 2,
 'Roberto Tron': 3,
 'Libang Wang': 4,
 'Kathleen Corriveau': 5,
 'Ava Greene': 6,
 'Aiman Abilova': 7,
 'Assaf Kfoury': 8,
 'William Letizia': 9,
 'Diana Lobel': 10,
 'Nilay Kafali': 11,
 'Joshua Benton': 12,
 'John McGinnis': 13,
 'Mark Stanley': 14,
 'Manuel Ramirez': 15,
 'Alina Ene': 16,
 'Tiago Januario': 17,
 'Patrice Oppliger': 18,
 'Katelyn Bird': 19,
 'Joseph Russo': 20,
 'Sandra Buerger': 21,
 'Bjorn Persson': 22,
 'Rebecca Gebert': 23,
 'Christine Hamel': 24,
 'Pipier Smith-Mumford': 25,
 'Tanima Chatterjee': 26,
 'Christine Papadakis-Kanaris': 27,
 'Hongwei Xi': 28,
 'Yi Grace Ji': 29,
 'Ara Sarkissian': 30,
 'Edward Kearns': 31,
 'Rachel Mesch': 32,
 'Jerome Mertz': 33,
 'Sree Kumar Valath Bhuan Das': 34,
 'Sally Sedgwick': 35,
 'Ronald Czik': 36,
 'Ken Chung': 37,
 '-': 38,
 'Gregg Jaeger': 39,
 'Brian Kellum': 40,
 'Jeffrey Leonard': 41,
 'Rebecca Roesler': 42,
 'Lorenzo Sanchez-Gatt': 43,
 'Lance Galletti': 44,
 'Pet

In [118]:
professor_schedule

{0: [(1014, 1047, 23), (402, 417, 46), (978, 993, 46)],
 1: [(150, 168, 31),
  (96, 117, 90),
  (672, 693, 90),
  (122, 143, 31),
  (186, 204, 30),
  (168, 186, 29),
  (978, 993, 32)],
 2: [(737, 747, 25), (750, 760, 25)],
 3: [(474, 495, 50), (1050, 1071, 50)],
 4: [(187, 197, 10), (763, 773, 10)],
 5: [(384, 417, 30)],
 6: [(1274, 1284, 25), (1248, 1258, 25), (1261, 1271, 25)],
 7: [(148, 158, 26), (161, 171, 26), (135, 145, 26)],
 8: [(402, 417, 35), (978, 993, 35), (750, 760, 35)],
 9: [(402, 417, 35), (978, 993, 35)],
 10: [(161, 171, 14),
  (737, 747, 14),
  (1313, 1323, 14),
  (456, 471, 14),
  (1032, 1047, 14)],
 11: [(161, 171, 75),
  (737, 747, 75),
  (1313, 1323, 76),
  (122, 132, 40),
  (698, 708, 40),
  (1274, 1284, 40),
  (135, 145, 76),
  (711, 721, 76),
  (1287, 1297, 76)],
 12: [(222, 255, 15)],
 13: [(1265, 1298, 20),
  (198, 213, 22),
  (774, 789, 22),
  (113, 146, 24),
  (798, 831, 22),
  (222, 255, 22)],
 14: [(698, 719, 9),
  (438, 471, 30),
  (750, 783, 20),
  (6

In [119]:
classroom_mapping

{'AGG C205': 0,
 'AGG G105': 1,
 'AGG G171': 2,
 'BAB 121': 3,
 'BAB 140': 4,
 'BAB 141': 5,
 'BAB 148': 6,
 'BCN 115': 7,
 'BCN 208': 8,
 'BRB 113': 9,
 'BRB 121': 10,
 'BRB 122': 11,
 'BRB B25': 12,
 'CAS 114A': 13,
 'CAS 114B': 14,
 'CAS 116': 15,
 'CAS 201': 16,
 'CAS 203': 17,
 'CAS 204A': 18,
 'CAS 204B': 19,
 'CAS 208': 20,
 'CAS 211': 21,
 'CAS 212': 22,
 'CAS 213': 23,
 'CAS 214': 24,
 'CAS 216': 25,
 'CAS 218': 26,
 'CAS 220': 27,
 'CAS 222': 28,
 'CAS 223': 29,
 'CAS 224': 30,
 'CAS 225': 31,
 'CAS 226': 32,
 'CAS 227': 33,
 'CAS 228': 34,
 'CAS 229': 35,
 'CAS 233': 36,
 'CAS 235': 37,
 'CAS 237': 38,
 'CAS 303A': 39,
 'CAS 306': 40,
 'CAS 310': 41,
 'CAS 312': 42,
 'CAS 313': 43,
 'CAS 314': 44,
 'CAS 315': 45,
 'CAS 316': 46,
 'CAS 318': 47,
 'CAS 320': 48,
 'CAS 322': 49,
 'CAS 323A': 50,
 'CAS 323B': 51,
 'CAS 324': 52,
 'CAS 325': 53,
 'CAS 326': 54,
 'CAS 327': 55,
 'CAS 330': 56,
 'CAS 335': 57,
 'CAS 415': 58,
 'CAS 424': 59,
 'CAS 425': 60,
 'CAS 426': 61,
 'CAS 42

In [38]:
professor_courses

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [120]:
walking_cost

array([[   0.  ,    0.  ,    0.  , ..., 1805.07, 1805.07, 1805.07],
       [   0.  ,    0.  ,    0.  , ..., 1805.07, 1805.07, 1805.07],
       [   0.  ,    0.  ,    0.  , ..., 1805.07, 1805.07, 1805.07],
       ...,
       [1805.07, 1805.07, 1805.07, ...,    0.  ,    0.  ,    0.  ],
       [1805.07, 1805.07, 1805.07, ...,    0.  ,    0.  ,    0.  ],
       [1805.07, 1805.07, 1805.07, ...,    0.  ,    0.  ,    0.  ]])

<h1>Export</h1>

In [127]:
import pickle
data_to_export = {
    "professor_schedule": professor_schedule,
    "professor_mapping": professor_mapping,
    "classroom_mapping": classroom_mapping,
    "professor_courses": professor_courses,
    "capacities": capacities,
    "walking_cost": walking_cost
}

with open("data_export.pkl", "wb") as file:
    pickle.dump(data_to_export, file)


In [39]:
import pickle

# 加载 pkl 文件
with open("../data/exported_data.pkl", "rb") as file:
    data = pickle.load(file)

# 展示数据内容
print(data)


{'capacities_top10': [269, 12, 12, 12, 12, 16, 12, 8, 105, 320], 'professor_courses_top5': {1: [(150, 168, 31), (96, 117, 90), (672, 693, 90), (122, 143, 31), (186, 204, 30), (168, 186, 29), (978, 993, 32)], 2: [(180, 202, 21), (156, 178, 31), (1272, 1294, 55)], 4: [(474, 495, 50), (1050, 1071, 50)], 5: [(187, 197, 10), (763, 773, 10)], 6: [(384, 417, 30)]}, 'walking_cost_df_top10':           ABG 101  ABG 301A  ABG 406  ABG 408  ABG 409  ABG 409A  ABG 410  \
ABG 101      0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 301A     0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 406      0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 408      0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 409      0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 409A     0.00      0.00     0.00     0.00     0.00      0.00     0.00   
ABG 410      0.00      0.00     0.00     0.00     0.00      0.00     0.00  