# BU Hub Course Search

# Run the Code Box Below to Initialize the Course Search

## Recommended: Click the box below and then press "Shift" + "Enter/Return"

### This shortcut will run the box and take you to the next code box

In [None]:
# Click here and then press "Shift" + "Enter" to run this code box

# Imports

import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import re

# Initializing DataFrame

pd.set_option('display.max_rows', 40)
pd.set_option('display.max_columns', 30)
pd.set_option('display.width', 100)

# Functions

def scrape_course_website(course_data, hub_data, url):
    r = requests.get(url)

    soup = BeautifulSoup(r.content, "html.parser")

    for course in soup.find_all('div', class_='cf-course-card'):
        
        # course_data
        course_id = course.find('span', class_='cf-course-id')
        course_college, course_dept, course_number = course_id.find_all('span')
        course_name = course.find('h3', class_='bu_collapsible')
        course_details = course.find('p', class_='meta cf-course-info')
        course_credits, course_availability, course_prereqs = course_details.find_all('span')
        course_desc = course.find('p', class_='cf-course-description')
        course_info = [course_id, course_college, course_dept, course_number, course_name, course_credits, 
                       course_availability, course_prereqs, course_desc]
        course_data.append([ele.get_text() for ele in course_info])
        
        # hub_data
        course_hubs = course.find('ul', class_='cf-hub-offerings')
        for course_hub in course_hubs.find_all('li'):
            hub_info = [course_id, course_hub]
            hub_data.append([ele.get_text() for ele in hub_info])
            
def courses_with_hubs(how_many, needed_hubs, display_all=False):    
    for idx, _ in enumerate(needed_hubs):
        for key, value in hub_nickname_dict.items():
            needed_hubs[idx] = needed_hubs[idx].upper().replace(key.upper(), value.upper())
    
    # Exceptions
    if type(how_many) != int:
        if type(how_many) != float:
            raise TypeError("first parameter should be an integer")
        else:
            how_many = int(how_many)
    elif how_many < 1:
        raise Exception("first parameter should be a positive number")
    elif how_many > np.unique(hub_df["Course_id"], return_counts=True)[1].max():
        raise Exception("There are no classes with " + str(how_many) + " or more hubs")
    
    eligible_courses = []
    for Course_id in course_df["Course_id"]:
        needed_hubs_counter = 0
        Course_hubs = ""
        for hub_abbr in hub_df[hub_df["Course_id"] == Course_id]["Hub_abbr"]:
            Course_hubs += hub_abbr + " "
            if hub_abbr in needed_hubs:
                needed_hubs_counter += 1
        if needed_hubs_counter >= how_many:
            
            eligible_courses.append([Course_id,Course_hubs])
    eligible_courses = np.array(eligible_courses)
    
    if display_all:
        pd.set_option('display.max_rows', len(eligible_courses))
    else:
        pd.set_option('display.max_rows', 40)
    
    if eligible_courses.size == 0: # In case there are no eligible courses
        raise ValueError("No classes matching this criteria")
    
    selected_df = course_df.loc[course_df['Course_id'].isin(eligible_courses[:,0])].copy()
    selected_df["Hubs"] = eligible_courses[:,1]
    selected_df = selected_df.sort_values(by=['Course_id'])
    selected_df = selected_df.reset_index(drop=True)
    return selected_df

# Creating tables

course_data = []
hub_data = []
URLs = ['https://www.bu.edu/hub/hub-courses/philosophical-aesthetic-and-historical-interpretation/', 
        'https://www.bu.edu/hub/hub-courses/scientific-and-social-inquiry/', 
        'https://www.bu.edu/hub/hub-courses/quantitative-reasoning/', 
        'https://www.bu.edu/hub/hub-courses/diversity-civic-engagement-and-global-citizenship/', 
        'https://www.bu.edu/hub/hub-courses/communication/', 
        'https://www.bu.edu/hub/hub-courses/intellectual-toolkit/']

for url in URLs:
    scrape_course_website(course_data, hub_data, url)

max_description_len = max(list(map(lambda x: len(x), np.array(course_data)[:,8])))
pd.set_option('display.max_colwidth', max_description_len)

course_df = pd.DataFrame(course_data, columns=["Course_id", "College", "Department", "Number", "Title", "Credits", 
                                               "Availability", "Prerequisites", "Description"])
course_df = course_df.drop_duplicates(ignore_index=True)
hub_df = pd.DataFrame(hub_data, columns=["Course_id","Hub"])
hub_df = hub_df.drop_duplicates(ignore_index=True)

hub_nickname_dict = {
    "PLM": "Philosophical Inquiry and Life's Meanings",
    "AEX": "Aesthetic Exploration",
    "HCO": "Historical Consciousness",
    "SI1": "Scientific Inquiry I",
    "SI2": "Scientific Inquiry II",
    "SO1": "Social Inquiry I",
    "SO2": "Social Inquiry II",
    "QR1": "Quantitative Reasoning I",
    "QR2": "Quantitative Reasoning II",
    "IIC": "The Individual in Community",
    "GCI": "Global Citizenship and Intercultural Literacy",
    "ETR": "Ethical Reasoning",
    "FYW": "First-Year Writing Seminar",
    "WRI": "Writing, Research, and Inquiry",
    "WIN": "Writing-Intensive Course",
    "OSC": "Oral and/or Signed Communication",
    "DME": "Digital/Multimedia Expression",
    "CRT": "Critical Thinking",
    "RIL": "Research and Information Literacy",
    "TWC": "Teamwork/Collaboration",
    "CRI": "Creativity/Innovation" 
}

hub_nickname_dict = {v: k for k, v in hub_nickname_dict.items()}

hub_df['Hub_abbr'] = hub_df.replace({'Hub': hub_nickname_dict}).Hub

# Write the Full Name or Abbreviation of each Hub You Need Fulfilled Below

Replace the ____ with your Hubs in the format seen in the samples

Format of your_needed_hubs: ["Hub 1 Name or Abbreviation", "Hub 2 Name or Abbreviation"] 

# Write the Minimum Number of Needed Hubs Per Class Below

Replace the ____ with a number 1-4

#### Optional: Change display_all to True - Replace False with True if you want to see all classes listed, otherwise you will be given a preview of up to 10 classes

# When ready, select on the code box and press "Shift" + "Enter" or the "▶ Run" button near the top of the screen.

#### Recommendation: Use abbreviations rather than full names

#### NOTE: If the left of your code box looks like this `In [*]`, your program is loading

# Hub Abbreviations to Hub Names

`Abbr: Name
"PLM": "Philosophical Inquiry and Life's Meanings",
"AEX": "Aesthetic Exploration",
"HCO": "Historical Consciousness",
"SI1": "Scientific Inquiry I",
"SI2": "Scientific Inquiry II",
"SO1": "Social Inquiry I",
"SO2": "Social Inquiry II",
"QR1": "Quantitative Reasoning I",
"QR2": "Quantitative Reasoning II",
"IIC": "The Individual in Community",
"GCI": "Global Citizenship and Intercultural Literacy",
"ETR": "Ethical Reasoning",
"FYW": "First-Year Writing Seminar",
"WRI": "Writing, Research, and Inquiry",
"WIN": "Writing-Intensive Course",
"OSC": "Oral and/or Signed Communication",
"DME": "Digital/Multimedia Expression",
"CRT": "Critical Thinking",
"RIL": "Research and Information Literacy",
"TWC": "Teamwork/Collaboration",
"CRI": "Creativity/Innovation"
`

In [None]:
# Replace the _____ spaces. Then, press "Ctrl" + "Enter"

your_needed_hubs = [______] # <-- !YOUR INPUT HERE!

min_number_of_hubs = ______ # <-- !YOUR INPUT HERE!

display_all = False # <-- !OPTIONAL: Change if you want more than 10 classes to appear!

# Samples
zachs_needed_hubs = ["PLM", "AEX", "IIC", "GCI", "WRI", "WIN", "OSC", "RIL", "CRI"] # What Hubs I need to graduate
ellas_needed_hubs = ["Quantitative Reasoning I", "SO2", "Scientific Inquiry II", "WIN", "WRI"]
graces_needed_hubs = ["IIC", "Writing-Intensive Course"]

courses_with_hubs(min_number_of_hubs, your_needed_hubs, display_all)