# CMSC 320 - Final Tutorial - UMD Subreddit

In [16]:
import requests

In [2]:
def make_get_request(URL, headers=None, given_params=None):
    """Sends a GET request to the given URL.
    
    Parameters
    ----------
    URL : str
        The url to send a GET request
    given_params : dictionary, optional
        A dictionary of any additional parameters (default is None)
        
        
    Returns
    -------
    dictionary
        A dictionary containing the JSON response
    """
    
    SUCCESS = 200
    response = requests.get(URL, headers=headers, params=given_params)
    
    if (response.status_code == SUCCESS):
        return response
    else:
        return {};

## Scraping Professor Names

### Attempted to use umd.io, but it appears to be rather glitchy

In [3]:
"""
umd_professor_url = "https://api.umd.io/v1/professors";
page = 1
professorNames = set() 
params = {'departments': 'CMSC', 'page': page}

json = make_get_request(umd_professor_url, params)
professorNames.update([professor['name'] for professor in json])

while json is not None:
    page = page + 1
    params['page'] = page
    
    json = make_get_request(umd_professor_url, params)
    if json:
        professorNames.update([professor['name'] for professor in json])
        
    print(professorNames)
        
# print(professorNames)
"""
print()




### Doing it myself

In [4]:
from bs4 import BeautifulSoup

# Headers for the request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0",
    "Access-Control-Allow-Origin": "*",
    "Access-Control-Allow-Headers": "Content-Type",
    "Access-Control-Allow-Methods": "GET"
}

faculty_url = "https://academiccatalog.umd.edu/undergraduate/administrators-officials-faculty/"
response = make_get_request(faculty_url, headers=headers)

soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
faculty_blocks = soup.find_all("p", class_="faculty-item")

# Keeping track of only CS and ENGR professors, but this is arbitrary
CMNS_profs = set()
ENGR_profs = set()
all_profs = set()

for block in faculty_blocks:
    
    # Names are stored within the <strong/> tag
    name = block.strong.string 
    
    # content structure: space,  name, space, <br/>, description
    contents = block.contents
    description = contents[4]
    
    all_profs.add(name)
    
    if "CMNS" in description:
        CMNS_profs.add(name)
    
    elif "ENGR" in description:
        ENGR_profs.add(name)
        

In [13]:
def split_name(name):
    # Name structure: <last name>, <first name> (<middle name/initial>)
    split = name.split(',')
    last_name = split[0]
    
    first_name = split[1].split()[0]
    
    return (first_name, last_name)

### Create a database per professor to hold the comments and submissions mentioning them

In [None]:
import sqlite3
from sqlite3 import Error

In [None]:
def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)
    finally:
        if conn:
            conn.close()

In [None]:
def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

In [None]:
def create_professor_table(conn, professor_name):
    professor_table = """ CREATE TABLE IF NOT EXISTS {professor} (
                            id integer PRIMARY KEY,
                            first_name text NOT NULL,
                            last_name text NOT NULL,
                            sub_id text NOT NULL,
                            body text NOT NULL,
                            is_comment integer NOT NULL,
                            title text,
                            score integer NOT NULL
                        )""".format(professor=professor_name)
    
    create_table(conn, professor_table)

## Requesting Data from the UMD Subreddit with PushShift API

In [6]:
import praw
from psaw import PushshiftAPI

api = PushshiftAPI()

query="teli|(mohammmad+teli)"
subs_gen = api.search_submissions(q=query, subreddit='umd', filter=['title', 'selftext'], limit=10)

#for sub in subs_gen:
#    print(sub)