In [1]:
import requests
import json
from collections import defaultdict

## Keys for filtering

In [2]:
sidebar_keys = {
    
    "subject_id",
    
    "title",
    "description",
    
    "offered_fall",
    "offered_spring",
    "offered_summer",
    "offered_IAP",
    
    "gir_attribute",
    "instructors",
    "total_units",
    "level",
    "prerequisites",
    "corequisites"
    
}

graph_keys = {
    
    "subject_id",
    "prerequisites",
    "corequisites",
    "related_subjects",
    "gir_attribute"
              
}

## API requests

In [3]:
def filter_dictionary(all_info_dict, which_keys):
    
    """
    Filters a dict to only include certain keys.
    Inputs:
        all_info_dict, a Dictionary of the result of parsing JSON from API request
        which_keys, a Set of strings.
    Outputs:
        a new filtered Dictionary
    """

    new_dict = {}
    
    for this_class_info in all_info_dict:

      this_class = this_class_info['subject_id']
      new_dict[this_class] = { k : v
          for k, v in this_class_info.items()
          if k in which_keys
          }

    return new_dict

def convert_list_graph_info(raw_graph_info):
  """
  Takes in input of filter_dictionary on graph information.
  Return new_dict with pre/coreqs transformed to List form.
  """
  new_dict = {
      "subject_id": raw_graph_info['subject_id'],
      "prerequisites": get_prereqs(raw_graph_info),
      "corequisites": get_coreqs(raw_graph_info), #Still need to do GIR parsing here.
      "related_subjects": raw_graph_info['related_subjects'][:] if 'related_subjects' in raw_graph_info else []
  }
  
  if 'gir_attribute' in raw_graph_info:
    new_dict['gir_attribute'] = raw_graph_info['gir_attribute']

  return new_dict

def get_course_basic_filter(request_url):
    """
    Requests and returns sidebar/graph information to be stored in database.
    Inputs:
        request_url, a str, the API url to request from
    Outputs:
        sidebar_info, graph_info,
            Dictionaries with lists, str that specify data to be stored on database.
        gir_attr_to_class_info,
            a Dict (str->str) of GIR attributes (without "GIR:")
                to the decimal str names of the classes that fulfill that GIR.
    """
    
    # Got help from this tutorial on how to make request
    # https://realpython.com/api-integration-in-python/
    all_info = requests.get(request_url).json()

    sidebar_info = filter_dictionary(all_info, sidebar_keys)
    graph_info = {k : convert_list_graph_info(v) for k, v in filter_dictionary(all_info, graph_keys).items()}
    
    gir_attr_to_class_info = gir_attr_to_classes(all_info)

    return sidebar_info, graph_info, gir_attr_to_class_info


def gen_usable_graph_info(graph_info, gir_attr_to_class_dict):
    """
    Inputs:
        graph_info, the output of get_course_basic_filter on graph data
              (raw prereqs, coreqs already converted to list)
        gir_attr_to_class_dict, a Dict of str GIR names to str names of classes that fulfill them.
    Outputs:
        a Dict of graph_info, with the following updated:
            (1) The GIR classes and the references to them will have the individual GIR class information,
                not the GIR attribute.
            (2) Individual GIR classes will receive the pooled list of "after subjects" of their group.
    """
    
    #This will update the pre/coreqs of all GIR classes to have the pooled information.
    graph_info = gir_redistribute_reqs(graph_info, gir_attr_to_class_dict) 

    #After subject calculations MUST be after GIR pooling. 
    #Below: GIR redistribute after subjects is included.

    graph_info = gen_after_subjects(graph_info, gir_attr_to_class_dict)

    return graph_info

## GIR parsing and redistribution

In [4]:
def gir_attr_to_classes(all_girs):
    
    """
    Input:
      Result of all_gir_attr_to_classes, a List of Dictionaries,
          the request to get all GIRs from API.
    Output:
        a Dictionary, str -> str,
            keys: (str), the GIR attribute
            values: (List of str), the class decimal names of this GIR
        Filters out LAB, LAB2, REST.
    """

    gir_ignore = {"LAB", "LAB2", "REST"}
    gir_attr_to_class = defaultdict(list)

    for gir_info in all_girs:

        if not 'gir_attribute' in gir_info: #For graduate classes that meet with REST, e.g. 1.001
            continue

        this_gir_attr = gir_info['gir_attribute']

        if this_gir_attr in gir_ignore:
            continue

        gir_attr_to_class[this_gir_attr].append(gir_info['subject_id'])

    return dict(gir_attr_to_class)

def _pool_reqs(graph_info, attr_to_class):
    
    """
    Inputs:
        graph_info and attr_to_class,
            the graph and GIR-related result of get_course_basic_filter.
    Outputs:
        a Dict,
            keys (str): The GIR attribute
            values: a Dict,
                keys (str): type of requisite
                values (Set): the requisies associated with this "pool" of GIR attribute.
    """

    req_types = ['prerequisites', 'corequisites']

    gir_pooled_reqs = {gir_attr : {req_type : set() for req_type in req_types}
                     for gir_attr in attr_to_class}

    for gir_attr, gir_classes in attr_to_class.items():
    #Note that GIR:ATTR is the form of the GIR in the pre/coreqs.
        for gir_class in gir_classes:
            for req_type in req_types:
                gir_pooled_reqs[gir_attr][req_type] |= set(graph_info[gir_class][req_type])
    
    return gir_pooled_reqs
  
def gir_redistribute_reqs(graph_info, attr_to_class):
  """
  Updates classes with respect of GIR-relevant requisites with pooling behavior.
  Inputs:
    attr_to_class, a Dict, output of gir_attr_to_classes
  Outputs:
    graph_info, with the pre/coreq entries of graph_info updated with pooled information.
  """

  req_types = ['prerequisites', 'corequisites']

  # STEP 1: pool requisites

  gir_pooled_reqs = _pool_reqs(graph_info, attr_to_class)

  #STEP 2: redistribute pooled requisites 

  for gir_attr, gir_classes in attr_to_class.items():
    #Note that GIR:ATTR is the form of the GIR in the pre/coreqs.
    for gir_class in gir_classes:
      for req_type in req_types:
        graph_info[gir_class][req_type] = list(gir_pooled_reqs[gir_attr][req_type])

  #Also redistribute pre, coreqs to the non-GIR classes.

  formal_gir_names = {f'GIR:{k}' for k in gir_pooled_reqs.keys()}
  for class_name, class_info in graph_info.items():

    #Isolate the GIR prereqs and effectively remove them.

    for req in req_types:
      this_req_list = class_info[req]

      formal_girs_to_add = list(filter(lambda x : x in formal_gir_names, this_req_list))
      non_gir_reqs = list(filter(lambda x : x not in formal_gir_names, this_req_list))

      #Effectively remove the old formal girs.
      class_info[req] = non_gir_reqs[:]
      
      for formal_gir in formal_girs_to_add:
        gir_attr = formal_gir.split("GIR:")[1]
        graph_info[class_name][req].extend(attr_to_class[gir_attr][:]) #Add in the actual classes rather than the GIRs.

  return graph_info

def _gir_redistribute_after_subjects(graph_info, after_subjects, attr_to_class):
  """
  Gives pooled after subjects of gir attr to the GIR-type classes.
  Inputs:
      all intermediate results of after_subjects.
      This function should not be called independently.
  Outputs:
      graph_info, the modified graph information with GIR pooling applied to after subjects.
  """
  
  for gir_attr, gir_classes in attr_to_class.items():
    #Note that GIR:ATTR is the form of the GIR in the pre/coreqs.
    formal_gir_attr = f"GIR:{gir_attr}"
    for gir_class in gir_classes:
        graph_info[gir_class]['after_subjects'] = sorted(list(after_subjects[formal_gir_attr]))

  return graph_info
   
def all_gir_attr_to_classes():
  """
  No inputs, queries Fireroad API for GIRs.
  Creates a dictionary where:
    keys: the GIR attributes (str)
    values: the decimal names of the classes, List of str
      all LAB and REST are ignored.
  """

  all_girs = requests.get("https://fireroad.mit.edu/api/courses/all?gir=any").json()
  gir_attr_to_class = gir_attr_to_classes(all_girs)

  return gir_attr_to_class

## Pre, coreq, after subject parsing and computation

In [5]:
import string

def parse_and_or(class_info_str):
  """
  Tokenizes the prerequisite statement into elements.
  Cases to consider can be seen in 6.036:
    has comma for "OR" representation, GIR, and common OR statement, and AND statement.
  """

  list_classes = class_info_str.replace(',', '/').replace('(', '/').replace(')', '/').split('/')
  #1/12: For isspace method https://stackoverflow.com/questions/2405292/check-if-string-contains-only-whitespace
  list_classes = list(filter(lambda elem : (not elem.isspace()) and elem, list_classes))
  list_classes = [elem.strip() for elem in list_classes]
  return list_classes 

def get_prereqs(class_info):
  """
  Creates List of all prereqs (treating AND and OR prereqs the same way).
  Parameters:
    class_info, Dictionary value of the graph info from get_all_courses
  """
  #Below line: Treat "or" prerequisites the same as if they were standalone prerequisites.
  #The ( and ) characters indicate an "or" subset, which is ignored.

  if 'prerequisites' not in class_info:
    return [] 
    
  coreq_start = "''Coreq: "
  with_coreqs = parse_and_or(class_info['prerequisites'])
  #Below: filter out Coreqs, in the form of "(6.0001/''Coreq: 6.009'')", taken from Fireroad API query
  without_coreqs = list(filter(lambda word : coreq_start not in word, with_coreqs))

  return without_coreqs

def get_coreqs(class_info):
  """
  Extract Coreqs from Prereqs and pure Coreqs representation and return List of Strings.
    (for example of Coreqs in Prereqs, please see 6.036 data)
  """

  coreq_start = "''Coreq: "
  prereqs_and_coreqs = parse_and_or(class_info['prerequisites']) if 'prerequisites' in class_info else []
  coreq_elements = list(filter(lambda word : coreq_start in word, prereqs_and_coreqs))
  # Take out the number from the statement "Coreq: number"

  coreq_elements = [elem.split(coreq_start)[1].strip("'") for elem in coreq_elements]

  pure_coreqs = parse_and_or(class_info['corequisites']) if 'corequisites' in class_info else []

  return list(set(coreq_elements + pure_coreqs))

In [6]:
def gen_after_subjects(all_graph_info, attr_to_class):
  """
  Updates final_graph_info with after subjects, taking GIR pooling into account.
  Inputs:
   all_graph_info, the output of convert_list_graph_info applied to all courses.
   attr_to_class, the information of which classes satisfy which GIR.
  """

  after_subjects = defaultdict(set)
  for subj, subj_info in all_graph_info.items():
    #For each class subj,
    # For its pre and coreqs,
    #   Indicate that subj is an after subject for those pre and coreqs.

    previous_subjects = subj_info['corequisites'] + subj_info['prerequisites']
    
    for previous in previous_subjects:
      after_subjects[previous].add(subj)

  for k in all_graph_info:
    all_graph_info[k]['after_subjects'] = list(after_subjects[k])

  final_graph_info = _gir_redistribute_after_subjects(all_graph_info, after_subjects, attr_to_class)
  
  return final_graph_info

## Tests

In [12]:
def get_class_test_data(data_reqs, as_list = False):
  
  class_test_data = {
    class_id : requests.get(f"https://fireroad.mit.edu/api/courses/lookup/{class_id}").json()
    for class_id in data_reqs
  }

  return class_test_data if not as_list else list(class_test_data.values())


In [10]:
def test_gir_attr_to_classes():

  data_reqs = ["1.00", "8.01", "8.02", "5.111", "3.091"]
  class_test_data = get_class_test_data(data_reqs, as_list = True)

  expected = {
      "CHEM": ["5.111", "3.091"],
      "PHY1": ["8.01"],
      "PHY2": ["8.02"]
  }

  actual = gir_attr_to_classes(class_test_data)
  assert expected == actual
  

def test_get_neighbor_class_funcs():
  
  data_reqs = ["6.036", "6.006", "1.053"]
  class_test_data = get_class_test_data(data_reqs)

  expected = {
      "6.036":{
          'prerequisites': ["GIR:CAL2", "6.0001", "6.01"],
          'corequisites': []
      },
      "6.006":{
          'prerequisites': ["6.0001", "6.042"],
          'corequisites': ["6.009"]
      },
      "1.053":{
          'prerequisites': ["GIR:PHY2"],
          'corequisites': ["2.087", "18.03"]
      }
  }
  

  for key, expected_results in expected.items():
    this_prereqs = get_prereqs(class_test_data[key])
    this_coreqs = get_coreqs(class_test_data[key])

    actual_results = {
        'prerequisites': this_prereqs,
        'corequisites': this_coreqs
    }

    for actual_key in actual_results.items():
      assert all( 
          sorted(actual_results[req_key]) == sorted(expected_results[req_key])
           for req_key in actual_results)

def test_filter_dictionary():

    tests = [
        ['a', 'b', 'c'],
        [],
        ['a']
    ]

    raw_data = [
      {"subject_id": "6.042", "a": 1, "b": 2, "c": 3},
      {"subject_id": "6.009", "a": -1, "b": -2, "c": -3}
    ]


    expected = [
                {"6.042": {"a": 1, "b": 2, "c": 3},
                 "6.009": {"a": -1, "b": -2, "c": -3}},
                {"6.042": {},
                 "6.009": {}},
                {"6.042": {"a": 1},
                 "6.009": {"a": -1}}
    ]

    actual = [filter_dictionary(raw_data, test) for test in tests]

    assert expected == actual

In [13]:
#1/12: For request: https://realpython.com/api-integration-in-python/
def test_graph_info():

  data_reqs = ["18.01", "6.042", "21W.747", "6.0001", "6.009", "6.006"]
  class_test_data = get_class_test_data(data_reqs, as_list = True)

  converted_data = {k : convert_list_graph_info(v) for k, v in filter_dictionary(class_test_data, graph_keys).items()}
  gir_attr_to_class_dict = gir_attr_to_classes(class_test_data)

  actual = gen_usable_graph_info(converted_data, gir_attr_to_class_dict)

  expected = {
      "18.01": {
        "subject_id": "18.01",
        "prerequisites": [],
        "corequisites": [],
        "gir_attribute": "CAL1",
        "related_subjects": ["18.02", "18.112", "18.1121", "18.952", "18.04", "18.306", "18.303", "18.022", "18.676", "18.100P"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/18.01?full=true
        "after_subjects": []
      },
      "6.042":{
        "gir_attribute": "REST",
        "subject_id": "6.042",
        "prerequisites": ["18.01"],
        "corequisites": [],
        "related_subjects": ["18.062", "6.853", "6.254", "6.838", "6.047", "6.440", "6.436", "6.0001", "16.858", "9.07"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/6.042?full=true
        "after_subjects": ["6.006"]
      },
      "21W.747":{
        "subject_id": "21W.747",
        "prerequisites": [],
        "corequisites": [],
        "related_subjects": ["21W.013", "21W.746", "21W.825", "21W.773", "21W.036", "21W.790", "21W.890", "21W.042", "21W.014", "21W.794"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/21W.747?full=true
        "after_subjects": []
      },
      "6.0001":{
        "subject_id": "6.0001",
        "prerequisites": [],
        "corequisites": [],
        "related_subjects": ["6.251", "6.336", "6.031", "6.802", "6.874", "6.401", "6.481", "6.037", "6.178", "6.851"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/6.0001?full=true
        "after_subjects": ["6.009", "6.006"]
      },
      "6.009":{
        "gir_attribute": "LAB",
        "subject_id": "6.009",
        "prerequisites": ["6.0001"],
        "corequisites": [],
        "related_subjects": ["6.9021", "6.031", "6.170", "6.902", "6.025", "6.525", "6.320", "6.302", "6.267", "2.7231"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/6.009?full=true
        "after_subjects": ["6.006"]
      },
       "6.006":{
        "subject_id": "6.006",
        "prerequisites": ["6.042", "6.0001"],
        "corequisites": ["6.009"],
        "related_subjects": ["6.850", "6.854", "6.251", "6.851", "6.856", "6.0001", "6.047", "6.008", "6.046", "6.267"],
        #Above related subjects: https://fireroad.mit.edu/api/courses/lookup/6.006?full=true
        "after_subjects": []
      },
  }


  for a, a_val in actual.items():
    for key in a_val:

      if a_val[key] != expected[a][key]:
        print(f'For {a}, {key}')
        print('\tactual', a_val[key])
        print('\texpected', expected[a][key])
        
  assert expected == actual

def run_tests():

    tests = [
        test_filter_dictionary,
        test_get_neighbor_class_funcs,
        test_gir_attr_to_classes,
        test_graph_info
    ]

    for test in tests:
        test()

run_tests()
print("Tests passed.")

Tests passed.


## Main

In [7]:
def gen_usable_info(save_path, request_url = "https://fireroad.mit.edu/api/courses/all?full=True"):
  """
  The "main" method -> takes in a request url
  does all relevant computations to get the sidebar, graph information to save to JSON
      at save_path (a str).
  """
  sidebar_info, raw_graph_info, gir_attr_to_class_info = get_course_basic_filter(request_url)
  graph_info = gen_usable_graph_info(raw_graph_info, gir_attr_to_class_info)

  # 1/13: JSON tutorial
  # https://appdividend.com/2019/04/15/how-to-convert-python-dictionary-to-json-tutorial-with-example/

  with open("sidebar_info.json", "w") as sidebar_f:
    json.dump(sidebar_info, sidebar_f)

  with open("graph_info.json", "w") as graph_f:
    json.dump(graph_info, graph_f)

  print(f'Wrote graph and sidebar info to destination {save_path}.')

In [8]:
import os

DATA_SAVE_PATH = './data'
if not os.path.exists(DATA_SAVE_PATH):
    os.makedirs(DATA_SAVE_PATH)
    
gen_usable_info(DATA_SAVE_PATH)

Wrote graph and sidebar info to destination ./data.
