# CMSC 320 - Final Tutorial - UMD Subreddit

In [1]:
import requests

In [2]:
def make_get_request(URL, headers=None, given_params=None):
    """Sends a GET request to the given URL.
    
    Parameters
    ----------
    URL : str
        The url to send a GET request
    given_params : dictionary, optional
        A dictionary of any additional parameters (default is None)
        
        
    Returns
    -------
    dictionary
        A dictionary containing the JSON response
    """
    
    SUCCESS = 200
    response = requests.get(URL, headers=headers, params=given_params)
    
    if (response.status_code == SUCCESS):
        return response
    else:
        return {};

## Scraping Professor Names

### Attempted to use umd.io, but it appears to be rather glitchy

In [3]:
"""
umd_professor_url = "https://api.umd.io/v1/professors";
page = 1
professorNames = set() 
params = {'departments': 'CMSC', 'page': page}

json = make_get_request(umd_professor_url, params)
professorNames.update([professor['name'] for professor in json])

while json is not None:
    page = page + 1
    params['page'] = page
    
    json = make_get_request(umd_professor_url, params)
    if json:
        professorNames.update([professor['name'] for professor in json])
        
    print(professorNames)
        
# print(professorNames)
"""
print()




### Doing it myself

In [4]:
from bs4 import BeautifulSoup

# Headers for the request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0",
    "Access-Control-Allow-Origin": "*",
    "Access-Control-Allow-Headers": "Content-Type",
    "Access-Control-Allow-Methods": "GET"
}

faculty_url = "https://academiccatalog.umd.edu/undergraduate/administrators-officials-faculty/"
response = make_get_request(faculty_url, headers=headers)

soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
faculty_blocks = soup.find_all("p", class_="faculty-item")

# Keeping track of only CS and ENGR professors, but this is arbitrary
CMNS_profs = set()
ENGR_profs = set()
all_profs = set()

for block in faculty_blocks:
    
    # Names are stored within the <strong/> tag
    name = block.strong.string 
    
    # content structure: space,  name, space, <br/>, description
    contents = block.contents
    description = contents[4]
    
    all_profs.add(name)
    
    if "CMNS" in description:
        CMNS_profs.add(name)
    
    elif "ENGR" in description:
        ENGR_profs.add(name)
        
print(CMNS_profs)

{'Losert, Wolfgang ', 'Delwiche, Charles Francis ', "O'Brien, Tammatha ", 'Abadi, Daniel J ', 'Nemes, Peter ', 'Diener, Theodor O. ', 'Hasan, Nur A ', 'Hamby, Kelly A ', 'Song, Wenxia ', 'Benesch, William M. ', 'Freed, Eric O. ', 'Salawitch, Ross J. ', 'Liang, Xin-Zhong ', 'Cook, Thomas M. ', 'Falk, David S. ', 'Raupp, Michael J. ', 'Miller, Michael Coleman ', 'Wang, Chunsheng ', 'von Petersdorff, Tobias ', 'Golbeck, Jennifer Ann ', 'Opoku-Edusei, Justicia ', 'Pierce, Brian ', 'Sisler, Hugh D. ', 'Neri, Umberto ', 'Lett, Paul David ', 'JaJa, Joseph F. ', 'Presson, Joelle C. ', 'Lindvall, Mikael ', 'Roesch, Matthew Ryan ', 'Sze, Heven ', 'Mariuzza, Roy A. ', 'Khurana, Archana ', 'Haines, Thomas J. ', 'Carpuat, Marine ', 'Minker, Jack ', 'Carleton, Karen ', 'Yoon, Ilchul ', 'Murtugudde, Raghu Gopalrao ', 'Gezari, Suvi ', 'Rolston, Steven ', 'Ricotti, Massimo ', 'Cremins, Casey ', 'Jawahery, Abolhassan ', 'Rubinstein, Yanir A. ', 'Brush, Stephen G. ', 'Butts, Daniel A. ', 'Paik, Ho Jung '

## Requesting Data from the UMD Subreddit with PushShift API

In [14]:
import praw
from psaw import PushshiftAPI

api = PushshiftAPI()

query="teli|(mohammmad+teli)"
subs_gen = api.search_submissions(q=query, subreddit='umd', filter=['title', 'selftext'], limit=10)

for sub in subs_gen:
    print(sub)

submission(created_utc=1605375823, selftext='Who should i take, Justin or teli', title='Cmsc 351', created=1605393823.0, d_={'created_utc': 1605375823, 'selftext': 'Who should i take, Justin or teli', 'title': 'Cmsc 351', 'created': 1605393823.0})
submission(created_utc=1605373752, selftext="Just noticed Justin was teaching CMSC351. I'm taking it with Teli but Justin is highly rated. Any thoughts?", title='Justin Wyss-Gallifent for CMSC351', created=1605391752.0, d_={'created_utc': 1605373752, 'selftext': "Just noticed Justin was teaching CMSC351. I'm taking it with Teli but Justin is highly rated. Any thoughts?", 'title': 'Justin Wyss-Gallifent for CMSC351', 'created': 1605391752.0})
submission(created_utc=1605332596, selftext="Originally I signed up for cmsc417 with marsh and cmsc426 with teli. I decided I wanted to take cmsc475 graph theory also. So I filled out the google doc to get rid of cmsc417 and replaced it with cmsc475. The thing is I didn't really want to drop cmsc417 thoug