In [36]:
import pandas as pd
schedule = pd.read_csv("../data/2026-sp.csv")
schedule

#schedule.columns
schedule.loc[0]

Year                                                                 2026
Term                                                               Spring
YearTerm                                                          2026-sp
Subject                                                               AAS
Number                                                                100
Name                                         Intro Asian American Studies
Description             Interdisciplinary introduction to the basic co...
Credit Hours                                                     3 hours.
Section Info                                                          NaN
Degree Attributes       Social & Beh Sci - Soc Sci, and Cultural Studi...
Schedule Information                                                  NaN
CRN                                                                 30107
Section                                                               AD1
Status Code                           

In [19]:
ns = {"ns2": "http://rest.cis.illinois.edu"}

In [20]:
allSubjects = pd.read_xml("https://courses.illinois.edu/cisapp/explorer/catalog/2026/spring.xml", xpath="/ns2:term/subjects/*", namespaces=ns)
allSubjects

Unnamed: 0,id,href,subject
0,AAS,https://courses.illinois.edu/cisapp/explorer/c...,Asian American Studies
1,ABE,https://courses.illinois.edu/cisapp/explorer/c...,Agricultural and Biological Engineering
2,ACCY,https://courses.illinois.edu/cisapp/explorer/c...,Accountancy
3,ACE,https://courses.illinois.edu/cisapp/explorer/c...,Agricultural and Consumer Economics
4,ACES,https://courses.illinois.edu/cisapp/explorer/c...,"Agricultural, Consumer and Environmental Sciences"
...,...,...,...
180,VCM,https://courses.illinois.edu/cisapp/explorer/c...,Veterinary Clinical Medicine
181,VM,https://courses.illinois.edu/cisapp/explorer/c...,Veterinary Medicine Courses
182,WGGP,https://courses.illinois.edu/cisapp/explorer/c...,Women and Gender in Global Perspectives Program
183,WLOF,https://courses.illinois.edu/cisapp/explorer/c...,Wolof


In [48]:
import requests
import xml.etree.ElementTree as ET
import time

rows = []
for i, subjectRow in allSubjects.iterrows():
  print(f"{i}: {subject}")
  subject = subjectRow["id"]  # ex: "CS"
  subjectName = subjectRow["subject"]  # ex: "Computer Science"
  subjectHref = subjectRow["href"]
  courses = pd.read_xml(subjectHref, xpath="/ns2:subject/courses/*", namespaces=ns)
  time.sleep(1)

  for j, courseRow in courses.iterrows():
    number = courseRow["id"]
    courseHref = courseRow["href"]
    courseName = courseRow["course"]

    row = pd.Series()
    row["Year"] = 2026
    row["Term"] = "Spring"
    row["YearTerm"] = "2026-sp"
    row["Subject"] = subject
    row["Number"] = number
    row["Name"] = courseName

    cachedRows = schedule[ (schedule.Subject == subject) & (schedule.Number == number) ]
    if len(cachedRows) == 0:
      print(f"Fetching: {subject} {number}")      
      courseXML = requests.get(courseHref).text
      time.sleep(1)

      root = ET.fromstring(courseXML)
      course_parsed = {}
      for tag in ["label", "description", "creditHours", "courseSectionInformation", "sectionDegreeAttributes", "classScheduleInformation"]:
        el = root.find(tag)
        course_parsed[tag] = el.text if el is not None else None
      row["Description"] = course_parsed["description"]
      row["Credit Hours"] = course_parsed["creditHours"]
      row["Section Info"] = course_parsed["courseSectionInformation"]
      row["Degree Attributes"] = course_parsed["sectionDegreeAttributes"]
      row["Schedule Information"] = course_parsed["classScheduleInformation"]
    else:
      print(f"Cached: {subject} {number}")
      cache = cachedRows.iloc[0]
      d = {}
      for key in ["Name", "Description", "Credit Hours", "Section Info", "Degree Attributes", "Schedule Information"]:
        row[key] = cache[key]

    rows.append(row)

0: AAS
Cached: AAS 100
Fetching: AAS 105
Fetching: AAS 120
Fetching: AAS 199
Cached: AAS 200
Cached: AAS 201
Fetching: AAS 203
Fetching: AAS 211
Cached: AAS 215
Cached: AAS 246
Fetching: AAS 258
Fetching: AAS 260
Fetching: AAS 275
Fetching: AAS 281
Fetching: AAS 282
Cached: AAS 283
Cached: AAS 286
Cached: AAS 287
Cached: AAS 288
Fetching: AAS 290
Fetching: AAS 291
Cached: AAS 297
Fetching: AAS 299
Cached: AAS 300
Cached: AAS 310
Fetching: AAS 315
Fetching: AAS 317
Fetching: AAS 343
Fetching: AAS 346
Cached: AAS 355
Cached: AAS 357
Fetching: AAS 365
Cached: AAS 370
Fetching: AAS 375
Cached: AAS 390
Fetching: AAS 395
Fetching: AAS 400
Fetching: AAS 402
Fetching: AAS 435
Fetching: AAS 464
Cached: AAS 465
Fetching: AAS 479
Fetching: AAS 490
Fetching: AAS 494
Fetching: AAS 495
Cached: AAS 496
Fetching: AAS 501
Fetching: AAS 539
Cached: AAS 561
Fetching: AAS 589
Fetching: AAS 590
Fetching: AAS 594
Fetching: AAS 596
1: AAS
Fetching: ABE 127
Cached: ABE 128
Cached: ABE 152
Cached: ABE 199
Fetc

In [49]:
df = pd.DataFrame(rows)

In [51]:
df.to_csv("catalog.csv")

{'label': 'Intro Computing: Non-Tech',
 'description': 'Computing as an essential tool of academic and professional activities. Functions and interrelationships of computer system components: hardware, systems and applications software, and networks. Widely used application packages such as spreadsheets and databases. Concepts and practice of programming for the solution of simple problems in different application areas. Intended for non-science and non-engineering majors. Prerequisite: MATH 112.',
 'creditHours': '3 hours.',
 'courseSectionInformation': 'Prerequisite: MATH 112.',
 'sectionDegreeAttributes': 'Quantitative Reasoning I course.',
 'classScheduleInformation': 'Students must register for one lab-discussion and one lecture section.'}

In [45]:
row = pd.Series()
row["Year"] = 2026


In [46]:
row

Year    2026
dtype: int64