### Introduction

I am currently trying to develop an NLP for class recommendations. I first need to scrape the UIUC course explorer, make that into a SQL database. Then I can get into then fun stuff with ML.

In [40]:
import requests
import json
import pandas as pd

# 🔹 Replace with your actual API URL
api_url = "https://waf.cs.illinois.edu/discovery/grade_disparity_between_sections_at_uiuc/data/out_full.json"

# 🔹 Headers (copy from DevTools if needed)
headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://courses.illinois.edu/",
    "Accept": "application/json"
}

# 🔹 Fetch JSON data
response = requests.get(api_url, headers=headers)

# 🔹 Parse JSON
if response.status_code == 200:
    data = response.json()
else:
    print(f"Error {response.status_code}: Failed to fetch data")
    exit()


In [41]:
import json

# Print the first course entry to see the structure
print(json.dumps(data[:3], indent=4)) 

[
    {
        "course": "AAS 100",
        "instructors": [
            {
                "instructor": "All Sections",
                "countGPA": 2339,
                "avgGPA": 3.466451474989311,
                "gpa_1_8": 4,
                "gpa_1_6": 4,
                "topQuartGPA": 4,
                "medianGPA": 4,
                "bottomQuartGPA": 3,
                "gpa_5_6": 3,
                "gpa_7_8": 2.67,
                "gpa_top": 4,
                "gpa_bottom": 0,
                "stddevDiff": 99999,
                "sections": 87,
                "course": "AAS 100",
                "Course Title": "Intro Asian American Studies",
                "Course Subject": "AAS",
                "gpaDist": [
                    1596,
                    487,
                    150,
                    32,
                    74
                ]
            },
            {
                "instructor": "Shin, J.",
                "countGPA": 94,
                "avgGPA": 

In [42]:
# 🔹 Extract Course Details
course_dict = {}  # Using a dictionary to avoid duplicates

for course in data:
    course_code = course.get("course", "Unknown Course")

    # Extract from first instructor entry (since it's the same for all)
    course_title = course["instructors"][0].get("Course Title", "No Title") if course.get("instructors") else "No Title"
    course_subject = course["instructors"][0].get("Course Subject", "Unknown Subject") if course.get("instructors") else "Unknown Subject"

    # Aggregate all instructors
    instructors = []
    median_gpa = "N/A"

    for instructor_data in course.get("instructors", []):
        instructor = instructor_data.get("instructor", "No Instructor")
        instructors.append(instructor)
        median_gpa = instructor_data.get("medianGPA", median_gpa)  # Take last instructor's GPA

    instructor_names = ", ".join(instructors) if instructors else "No Instructor"

    # Store unique courses in dictionary
    course_dict[course_code] = {
        "Course Code": course_code,
        "Title": course_title,
        "Subject": course_subject,
        "Instructors": instructor_names,
        "Median GPA": median_gpa
    }

In [43]:
# 🔹 Convert to DataFrame and Save
df = pd.DataFrame(course_dict.values())  # Extract dictionary values
df.to_csv("uiuc_courses.csv", index=False)

print(f"The length of the dataframe created is {len(df)}")
print(df.head())

The length of the dataframe created is 3490
  Course Code                          Title Subject  \
0     AAS 100   Intro Asian American Studies     AAS   
1     AAS 105       Intro to Arab Am Studies     AAS   
2     AAS 120  Intro to Asian Am Pop Culture     AAS   
3     AAS 200           U.S. Race and Empire     AAS   
4     AAS 215   US Citizenship Comparatively     AAS   

                                         Instructors  Median GPA  
0  All Sections, Shin, J., Rosado-Torres, A., Hor...        3.33  
1                                         Sharif, L.        4.00  
2                All Sections, Tabares, L., Park, D.        3.67  
3  All Sections, Sawada, E., Tabares, L., Park, D...        3.67  
4  All Sections, Kashani, M., Kwon, S., Park, D.,...        3.33  
