In [1]:
import pandas as pd
import numpy as np
import json, requests

In [2]:
mappings = pd.read_csv("cleaned_mappings.csv")
mappings.head()

Unnamed: 0,Faculty,Partner University,PU Module 1,PU Module 1 Title,PU Mod1 Credits,PU Module 2,PU Module 2 Title,PU Mod2 Credits,NUS Module 1,NUS Module 1 Title,NUS Mod1 Credits,NUS Module 2,NUS Module 2 Title,NUS Mod2 Credits,Pre Approved?
0,Faculty of Arts & Social Sci,The Hong Kong Polytechnic University,CBS241,Elementary Chinese II (for Non-Chinese speakin...,1.0,,,,LAC2731,Department Exchange Module,3.0,,,,Y
1,Faculty of Arts & Social Sci,The Hong Kong Polytechnic University,CC2C08,Mutual Impressions of China and the West,3.0,,,,PS2238,Int'l Politics of NE Asia,4.0,,,,Y
2,Faculty of Arts & Social Sci,Hong Kong University of Science & Technology,LANG1120,Chinese for Non-Chinese Language Background St...,1.0,,,,LAC1731,Department exchange module,3.0,,,,Y
3,Faculty of Arts & Social Sci,City University of Hong Kong,AIS3126,International Political Economy,3.0,,,,PS3238,Int'l Political Economy,4.0,,,,Y
4,Faculty of Arts & Social Sci,City University of Hong Kong,GE2210,China: A Socio-Political Transformation,3.0,,,,PS2248,Chinese Politics,4.0,,,,Y


In [3]:
with open('equivalentModuleMappings.json','r') as f:
    equivalent_module_mappings = json.load(f)

In [7]:
def get_equivalent_modules(modules, equivalent_module_mappings=equivalent_module_mappings):
    output = set()
    for module in modules:
        output.update(set(equivalent_module_mappings[module]))
    return output

### Note:
This list is created manually. I could not think of any fast/efficient way to determine if 2 schools
are the same entity

In [8]:
equivalent_schools_mapping = {
    'Aalto University': ['Aalto University, Helsinki'],
    'Aarhus School of Business': ['Aarhus University'],
    'Cornell University': ['Cornell Univ Coll of Agriculture & Life Sciences', 'Cornell Univ Coll of Human Ecology'],
    'Georgetown University': ['Georgetown University Law Center','Georgetown University, Washington D.C.'],
    'Humboldt University of Berlin': ['Humboldt-Universitaet zu Berlin'],
    'Imperial College London': ['Imperial College Business School'],
    'Leiden University': ['Leiden University Medical Center (LUMC)'],
    'University College London': ['University College London, University of London'],
    'Universite Catholique De Louvain': ['Universite Catholique de Louvain'],
}
schools = list(equivalent_schools_mapping.keys())
for school in schools:
    for equiv in equivalent_schools_mapping[school]:
        equivalent_schools_mapping[equiv] = equivalent_schools_mapping[school]

def get_equivalent_schools(schools, equivalent_schools_mapping=equivalent_schools_mapping):
    output = set()
    for school in schools:
        output.update(set(equivalent_schools_mapping[school]))
    return output

In [6]:
def get_department(title):
    end_index = 0
    for i, char in enumerate(title):
        if not char.isalpha():
            end_index = i
            break
    return title[:end_index]
    
def department_filter(departments, mappings=mappings):
    if len(departments) == 0:
        return mappings

    filter_function = lambda title : (type(title) == str) and (get_department(title) in departments)
    return mappings[mappings['NUS Module 1'].map(filter_function) | mappings['NUS Module 2'].map(filter_function)]


def module_filter(modules, mappings=mappings):
    if len(modules) == 0:
        return mappings
    
    equivalent_modules = get_equivalent_modules(modules)
    filter_function = lambda title: (title in equivalent_modules) or (title[-1] in equivalent_modules)
    return mappings[mappings['NUS Module 1'].map(filter_function) | mappings['NUS Module 2'].map(filter_function)]

def school_filter(schools, mappings=mappings):
    if len(schools) == 0:
        return mappings
    
    equivalent_schools = get_equivalent_schools(schools)
    filter_function = lambda school: school in equivalent_schools
    return mappings[mappings['Partner University'].map(filter_function)]