## Getting links by sending seperate get requests for each link - faster

In [24]:
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import re

# Setting up Web Driver
options = Options()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
driver.get('https://guide.wisc.edu/courses/')

# Initializing empty dict for class dict 
major_class_dict = {}

course_list = driver.find_element(By.ID, "atozindex")
subjects = course_list.find_elements(By.TAG_NAME, "a")

# Gather all the URLs first
links = [link.get_attribute('href') for link in subjects]
names = [subject.text for subject in subjects]

num_subjects = len(subjects)

# Now visit each URL directly
for i in range(num_subjects):
    class_list = []
    name = names[i].split(' (')[0]
    print(name)
    driver.get(links[i])
    classes = WebDriverWait(driver, 20, ignored_exceptions=(NoSuchElementException, StaleElementReferenceException)).until(EC.presence_of_element_located((By.CLASS_NAME,  "sc_sccoursedescs")))
    titles = classes.find_elements(By.CLASS_NAME, "courseblockcode")
    credits = classes.find_elements(By.CLASS_NAME, "courseblockcredits")
    courses = classes.find_elements(By.CLASS_NAME,  "courseblock")
    num_courses = len(courses)
    for j in range(num_courses):
        class_dict = {}
        class_dict['class'] = titles[j].text
        class_dict['credits'] = int(credits[j].text[:1])

        details = courses[j].find_elements(By.CLASS_NAME, "courseblockextra")
        num_details = len(details)
        class_details = {}
        for k in range(num_details):
            inner_html = details[k].get_attribute('innerHTML')
            soup = BeautifulSoup(inner_html, 'html.parser')
            text = soup.get_text().split(': ')
            label = text[0]
            info = text[1]
            if re.search('\u200b|\xa0', info):
                info = info.replace('\u200b', '').replace('\xa0', ' ')
            class_details[label] = info
        class_dict['class info'] = class_details
        class_list.append(class_dict)
    major_class_dict[name] = class_list

driver.quit()

Accounting and Information Systems
Actuarial Science
African Cultural Studies
Afro-American Studies
Agricultural and Applied Economics
Agroecology
Agronomy
Air Force Aerospace Studies
American Indian Studies
Anatomy
Anatomy & Physiology
Anesthesiology
Animal Sciences
Anthropology
Applied Biotechnology
Art Department
Art Education
Art History
Asian American Studies
Asian Languages and Cultures
Asian Languages and Cultures: Languages
Astronomy
Atmospheric and Oceanic Sciences
Biochemistry
Biological Systems Engineering
Biology
Biology Core Curriculum
Biomedical Engineering
Biomolecular Chemistry
Biostatistics and Medical Informatics
Botany
Cell and Regenerative Biology
Chemical and Biological Engineering
Chemistry
Chicana/o and Latina/o Studies
Civil and Environmental Engineering
Civil Society and Community Studies
Classics
Collaborative Nursing Program
Communication Arts
Communication Sciences and Disorders
Community and Environmental Sociology
Comparative Biosciences
Comparative Litera

In [25]:
import csv
data = major_class_dict
# Open a new CSV file for writing
with open('courses.csv', 'w', newline='') as file:
    writer = csv.writer(file)

    # Write the header
    writer.writerow(['Department', 'Class', 'Credits', 'Requisites',  'Course Designation', 'Repeatable for Credit', 'Last Taught'])

    # Iterate over the departments and their courses
    for department, courses in data.items():
        for course in courses:
            class_info = course['class info']
            # Extract data, providing default values if any key is missing
            requisites = class_info.get('Requisites', '')
            repeatable = class_info.get('Repeatable for Credit', '')
            last_taught = class_info.get('Last Taught', '')
            course_designation = class_info.get('Course Designation', '')

            # Write the data to the CSV file
            writer.writerow([department, course['class'], course['credits'], requisites, course_designation, repeatable, last_taught])
