### SoC database interface, updating all course information when called 

In [1]:
import requests
import os,json
from os.path import join, isdir
from pprint import pprint
import itertools
import dask
import re

class gateway2SoC:
    def __init__(self, outputDir):
        if not isdir(outputDir): # check directory
            os.mkdir(outputDir)
        self.outputDir = outputDir
        self.server = 'http://web-app.usc.edu/web/soc/api'
        self.deptDict  ={}
    def update_course_info(self, term):
        deptList = self.get_dept_list(term)
        multithreader = []
        for dept in deptList:
            multithreader.append(dask.delayed(self.get_dept_course)(dept['code'],dept['name'],term))
        dask.compute(*multithreader)
        self.__save_file(self.deptDict, name='deptDictionary',path = self.outputDir)
    # get a list of departments offering courses
    def get_dept_list(self, term):
        url = '/'.join([self.server,'depts', term])
        data = self.__make_request(url)['department']
        deptList = list(map(self.__dept_extractor, data))
        return list(itertools.chain.from_iterable(deptList))
    # get infor about one course
    def get_one_course(self, courseName, term):
        deptName  = " ".join(re.findall("[a-zA-Z]+", courseName))
        self.get_dept_course(deptName,term, interested = courseName)
        
    # get all courses under one department 
    def get_dept_course(self,deptCode, deptName, term, interested= ''):
        print ('processing courses for: ',deptCode)
        url = '/'.join([self.server,'classes' ,deptCode, term])
        courseList = self.__make_request(url)['OfferedCourses']['course']
        deptDir = join(self.outputDir,deptCode)
        self.deptDict [deptCode] =deptName # update dept dictionary
        if not isdir(deptDir) and len(courseList)>0:
            os.mkdir (deptDir)
        if type(courseList) == dict:
            courseList = [courseList]
        for course in courseList:
            courseName = course['PublishedCourseID']
            if interested != '' and courseName !=interested:
                continue 
            try:
                sessionData = list(map(self.__course_extractor, course['CourseData']['SectionData']))
            except:
                sessionData = [self.__course_extractor(course['CourseData']['SectionData'])]
            # create a file 

            self.__save_file(sessionData, courseName, deptDir) 
                
    #-----private helper
    # make request and return json data
    def __make_request(self, url):
        try:
            response = requests.post(url)
        except: # in case of time-out, reconnect
            response = requests.post(url)
        return response.json()
    def __course_extractor(self, jsonData):
        wantedFeature = ['type', 'day','start_time','end_time','spaces_available','number_registered','units','dclass_code']
        if jsonData['canceled'] == 'Y' or jsonData['canceled'] == 'y':
            return {}# return an empty dict if the session is cancled
        def helper(feature,jsonData): # handle unavailable information 
            try:
                return (feature,jsonData[feature])
            except:
                return (feature, '')
        extracted = dict((helper(feature,jsonData)) for feature in wantedFeature)
        if 'instructor' in jsonData:
            try:
                extracted['instructorName'] =[','.join([jsonData['instructor']['last_name'] ,jsonData['instructor']['first_name']])]
            except: # multiple prof for the same section
                extracted['instructorName'] =[','.join([prof['last_name'] ,prof['first_name']])
                                             for prof in jsonData['instructor']]
        else:
            extracted['instructorName'] = ''
        return extracted
    # extract department names
    def __dept_extractor(self, jsonData):
        try:
            if type(jsonData['department']) == list:
                return jsonData['department']
            else:
                return [jsonData['department']]
        except:
            return [jsonData] # some program might not have sub-department
    # save session data to a text file
    def __save_file(self, data, name,path): 
        #f =  open(join(path,courseName+'.json'), 'w') # rewrite files
        with open(join(path,name+'.json'), 'w') as fout:
            json.dump(data, fout)

In [2]:
gateway = gateway2SoC('./tempStorage/')
gateway.update_course_info('20193')

processing courses for: processing courses for: processing courses for: processing courses for:  NSCI
 MPEM
  ASTRPJMT

processing courses for:  MOR
processing courses for:  MUCM
processing courses for:  LIM
processing courses for:  RUSS
processing courses for:  HEBR
processing courses for:  DPHR
processing courses for:  DSR
processing courses for:  SCOR
processing courses for:  ITP
processing courses for:  DES
processing courses for:  CHE
processing courses for:  PHBI
processing courses for:  CNB
processing courses for:  ADNT
processing courses for:  PAIN
processing courses for:  MDES
processing courses for:  CHEM
processing courses for:  PHRD
processing courses for:  WCT
processing courses for:  ENE
processing courses for:  FSEM
processing courses for:  THTR
processing courses for:  AME
processing courses for:  SANA
processing courses for:  PSCI
processing courses for:  OT
processing courses for:  CLAS
processing courses for:  NAUT
processing courses for:  COLT
processing courses for

### RMP interface, collecting information about a given professor when called 

In [1]:
from bs4 import BeautifulSoup
class gateway2RMP:
    def __init__(self, outputDir):
        if not isdir(outputDir):
            os.mkdir(outputDir)
        self.server = 'https://www.ratemyprofessors.com'
    def get_prof_rating(self, name, department):
        url = ''.join([self.server,
               '/search.jsp?queryoption=HEADER&queryBy=teacherName&schoolName=University+of+Southern+California&schoolID=1381&query=',
               '+'.join(name)])
        soup = BeautifulSoup(self.__make_request(url), 'html.parser')
        relatedProf = []
        for listings in  soup.find_all('li', attrs={'class':'listing PROFESSOR'}):
            link = listings.find('a', href=True)['href']
            name = listings.find('span', attrs = {'class':'main'}).text
            dept = listings.find('span', attrs = {'class':'sub'}).text
            relatedProf.append((link, name,dept))
        if len(relatedProf) == 0:
            print ('not found',name)
        if len(relatedProf) >1:
            print ('multiple', name)
    # make request and return json data
    def __make_request(self, url):
        try:
            response = requests.post(url)
        except: # in case of time-out, reconnect
            response = requests.post(url)
        return response.text

In [215]:
gateway = gateway2RMP('D:/Data/SCheduler/RMPData')
for name in names:
    text = gateway.get_prof_rating(name[0].split(','), department='')

not found ['Sanchez', 'Carlos']
not found ['Mak', 'Chiho']
not found ['Wang', 'Danny']
not found ['Kuo', 'Chiaoyun']
not found ['Roster', 'Michael']
not found ['Schier', 'Lindsey']
not found ['Morlacco', 'Monica']
not found ['Nolan', 'Ginger']
not found ['Quezada', 'Wanda']
not found ['Maibaum', 'Paul']
not found ['Hoyt', 'Justin']
not found ['Waite', 'Barry']
not found ['Patel', 'Vishal']
not found ['Jonas', 'Anthony']
not found ['Mc Gene', 'Juliana']
not found ['Rasmussen', 'Robert']
not found ['Wilson Sweeney', 'Zivia']
not found ['Childress', 'Amy']
not found ['Saran', 'Ama']
not found ['Moss', 'Kelsey']
not found ['Hasan', 'Angela']
not found ['Moatasim', 'Faiza']
not found ['Nguyen', 'Anna Quyen']
not found ['Thomas', 'Duncan']
not found ['Moss', 'Karen']
not found ['Page', 'Tim']
not found ['Cho', 'Nam']
not found ['Curiel', 'Yajaira']
not found ['Filback', 'Robert']
not found ['Latham', 'Joseph']
not found ['Dalton', 'Philip']
not found ['Lovick', 'Jack']
not found ['Haleblian'

not found ['Sanchez', 'Raquel']
not found ['Thomas', 'Ladd']
not found ['Annovi', 'Gian-Maria']
not found ['Tajima', 'Takako']
not found ['Sahibzada', 'Khatera']
not found ['Glasgow', 'David']
not found ['Webb', 'Christina']
not found ['Lund', 'Dorothy']
not found ['Vives', 'Eva']
not found ['Coleman', 'Catherine']
not found ['Jones', 'John']
multiple Fellenzer, Jeff 
not found ['Kartun', 'Allan']
not found ['Wills', 'Leslie']
not found ['Yamashiro', 'Stan']
not found ['Aguilar', 'Stephen']
not found ['Schneiderman', 'Janet']
not found ['Foley', 'Paul']
not found ['Gear Rich', 'Camille']
not found ['Schneider', 'Ed']
not found ['Rogers', 'Philip']
not found ['Westwick', 'Peter']
not found ['Kurlat', 'Pablo']
not found ['Li', 'Timothy']
not found ['Ramsey', 'Robert']
not found ['Iezman', 'Stanley']
not found ['Wang', 'Joseph']
not found ['Shapiro', 'Lauren']
not found ['Block', 'Richard']
not found ['Buchanan', 'Denise']
not found ['Haworth', 'Ian']
multiple Hernandez, Robert 
not found

not found ['Hentis', 'Marisa']
not found ['Mc Curdy', 'Ron']
not found ['Globe', 'Anne']
not found ['Hodge', 'Candace']
not found ['McNiff', 'P T']
not found ['An', 'Woojin']
not found ['Patnoe-Woodley', 'Paula']
not found ['Saavedra', 'Juan']
not found ['Scott', 'Matthew']
not found ["O'Neill", 'Sharon']
not found ['Kang', 'Hyo']
not found ['Walker', 'Jim']
not found ['Lagatta', 'Anne']
not found ['Kaslow', 'Harvey']
not found ['Pfizenmaier', 'Laureen']
not found ['Alfaro', 'Ange-Marie']
multiple Coombs, Michael 
not found ['Al-Eryani', 'Kamal']
not found ['Patel', 'Tanvi']
not found ['Dass', 'Kaali']
not found ['Hollingshead', 'Andrea']
not found ['Krausas', 'Veronika']
multiple Brown, Bruce 
not found ['Dubois', 'Michel']
not found ['Mayer', 'Doe']
not found ['Livingston', 'Larry']
not found ['Lundeen', 'Rebecca']
not found ['Vasconcellos', 'Marcio']
not found ['Manetti', 'Carolyn']
not found ['Qin', 'Joe']
not found ['Mc Curdy', 'Ron']
not found ['Roberts', 'Lindsay']
not found ['P

not found ['Tabancay Jr', 'Angel P']
not found ['Baum', 'Carol']
not found ['Wang', 'Hue']
not found ['Sellers', 'Jefferey']
not found ['Sloan', 'Richard']
not found ['Fried', 'Rachel']
not found ['Mulligan', 'Roseann']
not found ['Yang', 'Xinhui']
not found ['Thangavelu', 'Madhu']
not found ['Kahane', 'Jeffrey']
not found ['Fisher', 'Beth']
not found ['Boorn', 'Marylynne']
multiple Pate, Leah 
not found ['Reisman', 'Garrett']
not found ['Wang', 'Ying']
not found ['Redfearn', 'Christian']
not found ['Kao', 'Steve']
not found ['Seidman', 'Lisa']
not found ['Floto', 'Jennifer']
not found ['Tardibuono', 'Joan']
not found ['Harris', 'Susan']
not found ['Sandoval', 'Oscar']
not found ['Park', 'Susie']
not found ['Jaque', 'Diana']
not found ['Raff', 'Elisabeth']
multiple Fuhrman, Jed 
not found ['Datta', 'Mini']
not found ['Curtis', 'James']
not found ['Handley', 'William']
not found ['Holt', 'Sean']
not found ['Easterling', 'Bret']
not found ['Altman', 'Scott']
not found ['Schonthal', 'Axel

not found ['Jann', 'Kay']
not found ['Kaldjian', 'Kevin']
not found ['Hill', 'Robert H']
not found ['Adibe', 'Bryant']
not found ['Hewitt', 'Donald']
not found ['Makijan', 'Narineh']
not found ['Potenza', 'Frank']
not found ['Yip', 'Felix']
not found ['Lytal', 'Cristy']
not found ['Rohrbach', 'Luanne']
not found ['De Dominic', 'Nicholas']
not found ['Kaye', 'Barry']
not found ['Russell-Schlesinger', 'Charles']
not found ['Olesen', 'Eric']
not found ['Nikolaidis', 'Stefanos']
not found ['Timm', 'Joel']
not found ['Neubauer', 'Catherine']
not found ['Hernandez', 'Christine']
not found ['Reynolds', 'Ed']
not found ['Vidale', 'John']
not found ['Tsotsis', 'Theo']
multiple Ramcharan, Rodney 
not found ['Sellami', 'Nadia']
not found ['Santos', 'Oscar']
not found ['Jessen', 'Kristian']
not found ['Hales', 'Chaz']
not found ['Agius Vallejo', 'Jody']
not found ['West', 'Jeffrey']
not found ['Okamoto', 'Curtis']
not found ['Mallikarjun Sharada', 'Shaama']
not found ['Leach', 'William']
not found

not found ['Wolfington', 'Lindsay']
not found ['Belson', 'David']
not found ['Richmond', 'Frances']
not found ['Padilla', 'Mariela']
not found ['Cuccia', 'Nicholas']
not found ['Guyer', 'Cindy']
multiple James, Jane 
not found ['Schneider', 'Lee']
not found ['Dauchot', 'Luke']
not found ['Alfaro', 'Ange-Marie']
not found ['Green', 'Richard']
not found ['Popp', 'Kimberly']
not found ['Sommer', 'S Amy']
not found ['Madni', 'Azad']
not found ['Kasimow', 'Erin']
not found ['Richey', 'Joyce']
not found ['Lee', 'Lucy V']
not found ['Landa', 'Edgar']
not found ['Sloan', 'Nathaniel']
not found ['Sloan', 'Nathaniel']
not found ['Ramos', 'Jennifer']
not found ['Quintana', 'Taqueena']
not found ['Booth', 'Thomas']
not found ['Turner', 'Myra']
not found ['Stanton', 'Timothy']
not found ['Bowles', 'Susan']
not found ['Felt', 'Laurel']
not found ['Botz', 'Hanns']
not found ['Mousli', 'Beatrice']
not found ['Alamo', 'Rosamaria']
not found ['Dodds', 'Kimberly']
not found ['Findeisen', 'Christopher']
n

not found ['Grant', 'Derisa']
not found ['Trovato', 'Steve']
not found ['Karafantis', 'Layne']
not found ['Newmyer', 'Richard']
not found ['Wu', 'Wei']
not found ['Hinton', 'David']
not found ['Kaslow', 'Harvey']
not found ['Pesaran', 'Hashem']
not found ['Chang', 'Jincai']
not found ['Neff', 'Jessica']
not found ['Fife', 'Elizabeth']
not found ['Lasmana', 'Viola']
not found ['Riggio', 'Marsha']
not found ['McGrath', 'Michael']
not found ['Domasin', 'Gary']
not found ['Louie', 'Stan']
not found ['Haraldsson', 'Helga']
not found ['Tan', 'Patricia']
not found ['Patnoe-Woodley', 'Paula']
not found ['Schonthal', 'Axel']
not found ['Pereira', 'Joseph']
not found ['Mc Ilvery', 'Richard']
not found ['Braun', 'Theodore']
not found ['Callaway', 'Trey']
not found ['Levine', 'David']
not found ['Bay', 'Morten']
not found ['Sweeney', 'Mary']
not found ['Lee', 'Daniel']
not found ['Bouchard', 'Sean']
not found ['Ahmadi', 'Shafiqa']
not found ['Morgan', 'Alaina']
not found ['Joe', 'Nam']
not found [

not found ['Schmitt', 'Howard']
not found ['Rawlings', 'Kelly']
not found ['Manos', 'Matthew']
not found ['Cherry', 'Nathan']
not found ['Tescher', 'Woodie']
not found ['Gates', 'Jodie']
not found ['Molina', 'Natalia']
not found ['Bellamy', 'Gordon']
multiple Rollo-Carlson, Cynthia 
not found ['De Leon', 'Adrian']
not found ['Beachem', 'Elizabeth']
not found ['Jolles', 'Monica']
not found ['Hoskin', 'Maia']
not found ['Cooperdock', 'Emily']
not found ['Galloway', 'Scotty']
not found ['Schierle', 'G G']
not found ['Arevalo', 'Lupe']
not found ['Bruckheimer', 'Bonnie']
not found ['Rock', 'Loretta']
not found ['Kar', 'Nikhil']
not found ['Chang', 'Patty']
not found ['Hlavka', 'Jakub']
not found ['Jochai', 'Diana']
not found ['Ramirez', 'Sally']
not found ['MacOn-Oliver', 'Cheryl']
not found ['Raber', 'Jack']
not found ['Simmons', 'William']
not found ['Mc Curdy', 'Ron']
not found ['Maquiling', 'David']
not found ['Holder', 'Bill']
not found ['Boley Cruz', 'Nancy']
not found ['Foster', 'Mi

In [209]:
baseDir = 'D:/Data/SCheduler/SoCData'
deptList = os.listdir(baseDir)
names = []

for dept in deptList:
    try:
        courses = os.listdir(os.path.join(baseDir, dept))
        for course in courses:
            names += list(getProfNames(course,dept, baseDir))
    except:
        print (dept)
        pass
names = [(pair[0][0], pair[1]) for pair in names if pair[0] !='']
names = set(names)

deptDictionary.json


In [210]:
def getProfNames(courseName, deptName,baseDir):
    path = os.path.join(baseDir, deptName, courseName)
    with open(join(baseDir,deptName,courseName), 'r') as fin:
        data = json.load(fin)
    for session in data:
        if session != {}:
            yield (session['instructorName'], deptName)


In [212]:
len(names)

3991