In [1]:
import re
from uO_scrape import get_subjects, get_courses, get_form, get_offerings
from time import sleep
import pandas as pd
import requests
from bs4 import BeautifulSoup
import urllib
import json
import numpy as np

def raze_list(l):
    out = []
    for x in l:
        if type(x) == list:
            out += raze_list(x)
        else:
            out.append(x)
    return out

form = get_form()

In [2]:
subject_data = pd.read_csv('uOttawa_subjects.csv', index_col=False)
subject_data.head()

Unnamed: 0,Subject,Code,Link
0,Aboriginal Studies,eas,https://catalogue.uottawa.ca/en/courses/eas/
1,Accounting,cpt,https://catalogue.uottawa.ca/en/courses/cpt/
2,Administration,adm,https://catalogue.uottawa.ca/en/courses/adm/
3,Anatomy and Neurobiology,ana,https://catalogue.uottawa.ca/en/courses/ana/
4,Anatomy and Physiology,anp,https://catalogue.uottawa.ca/en/courses/anp/


In [3]:
course_data = pd.read_csv('uOttawa_courses.csv', index_col=False)
course_data.head()

Unnamed: 0,code,components,credits,dependencies,desc,prerequisites,title
0,EAS 1101,['Lecture'],3,[],"Introduction to the global, cultural and relig...",,Introduction to Aboriginal Societies and Cultures
1,EAS 1501,['Cours magistral'],3,[],Introduction à la diversité culturelle et reli...,,Introduction aux sociétés et cultures autochtones
2,EAS 2101,['Lecture'],3,[],Study of the concept and history of colonialis...,,Colonialism and Indigenous Peoples
3,EAS 2501,['Cours magistral'],3,[],Étude conceptuelle et historique du colonialis...,,Colonialisme et peuples autochtones
4,EAS 3101,['Lecture'],3,['EAS 2101'],Study and analysis of methodologies associated...,EAS 2101 or 54 university credits,Research and Methodologies in Aboriginal Studies


In [4]:
offering_data = pd.read_csv('uOttawa_offerings_quick.csv', index_col=False)
offering_data.head()

Unnamed: 0,code,department,faculty,link,term,title
0,ADM 1100,Administration (General),Telfer School of Management,Course.aspx?id=000001&term=2189&session=FS,2018 Fall Term,Introduction to Business Management
1,ADM 1101,Administration (General),Telfer School of Management,Course.aspx?id=000002&term=2189&session=FS,2018 Fall Term,Social Context of Business
2,ADM 1300,Administration (General),Telfer School of Management,Course.aspx?id=000003&term=2189&session=FS,2018 Fall Term,Introduction to Business Management
3,ADM 1301,Administration (General),Telfer School of Management,Course.aspx?id=000004&term=2189&session=FS,2018 Fall Term,Social Context of Business
4,ADM 1340,Administration (General),Telfer School of Management,Course.aspx?id=000005&term=2189&session=FS,2018 Fall Term,Financial Accounting


# Getting Offerings

In [5]:
summary = []
for course in course_data.code.tolist():
    summary.append([course, offering_data[offering_data.code.str.contains(course)]['term'].tolist()])
offerings_summary = pd.DataFrame(summary, columns = ['code', 'offered'])
offerings_summary.head(20)

Unnamed: 0,code,offered
0,EAS 1101,[]
1,EAS 1501,[2019 Winter Term]
2,EAS 2101,[]
3,EAS 2501,[]
4,EAS 3101,[]
5,EAS 3102,[]
6,EAS 3103,[]
7,EAS 3501,[]
8,EAS 3502,[]
9,EAS 3503,[]


In [6]:
offerings_summary[offerings_summary.code.str.contains('MAT 3|CSI 2|PHY 3', regex=True)].head(20)

Unnamed: 0,code,offered
2417,CSI 2101,[]
2418,CSI 2110,[2018 Fall Term]
2419,CSI 2120,[]
2420,CSI 2132,[]
2421,CSI 2372,[2018 Fall Term]
2422,CSI 2501,[]
2423,CSI 2510,[2018 Fall Term]
2424,CSI 2520,[]
2425,CSI 2532,[]
2426,CSI 2772,[2018 Fall Term]


# Querying Courses

In [7]:
course_data.dependencies = course_data.dependencies.str.replace("'", '"').apply(json.loads)

In [8]:
course_data['lang'] = np.where(course_data.code.str[5].apply(int) < 5, 'en', 'fr')
course_data.lang = np.where(course_data.code.str[5] == '9', 'enfr', course_data.lang)

In [9]:
course_data.prerequisites = np.where(course_data.prerequisites.isna(), '', course_data.prerequisites)

In [10]:
eng1 = course_data[(course_data.dependencies.str.len() == 0) & (course_data.code.str.startswith('ENG')) \
           & (course_data.lang.str.contains('en')) & (course_data.code.str[4].apply(int) == 1)]
eng1

Unnamed: 0,code,components,credits,dependencies,desc,prerequisites,title,lang
3757,ENG 1100,"['Discussion Group', 'Lecture']",3,[],Intensive practice in academic essay writing. ...,,Workshop in Essay Writing,en
3758,ENG 1112,"['Discussion Group', 'Lecture']",3,[],Practice in the writing of technical reports. ...,,Technical Report Writing,en
3759,ENG 1120,['Lecture'],3,[],Development of critical reading skills and coh...,,Literature and Composition I: Prose Fiction,en
3760,ENG 1121,['Lecture'],3,[],Development of critical reading skills and coh...,,Literature and Composition II: Drama and Poetry,en
3761,ENG 1124,['Lecture'],3,[],Engagement with a specific literary topic in o...,,Engaging with Literature,en
3762,ENG 1131,"['Discussion Group', 'Lecture']",3,[],Development of skills in written communication...,,Effective Business English,en


In [11]:
eng1_offers = [get_offerings(x) for x in eng1.code.tolist()]
eng1_offers = [x for x in raze_list(eng1_offers) if 'Winter' in x['term']]
eng1_offers

[{'term': '2019 Winter Term',
  'link': 'Course.aspx?id=012011&term=2191&session=FS',
  'code': 'ENG1100',
  'title': 'Workshop in Essay Writing',
  'faculty': 'Faculty of Arts',
  'department': 'Department of English'},
 {'term': '2019 Winter Term',
  'link': 'Course.aspx?id=012019&term=2191&session=FS',
  'code': 'ENG1112',
  'title': 'Technical Report Writing',
  'faculty': 'Faculty of Arts',
  'department': 'Department of English'},
 {'term': '2019 Winter Term',
  'link': 'Course.aspx?id=012024&term=2191&session=FS',
  'code': 'ENG1120',
  'title': 'Literature and Composition I: Prose Fiction',
  'faculty': 'Faculty of Arts',
  'department': 'Department of English'},
 {'term': '2019 Winter Term',
  'link': 'Course.aspx?id=012025&term=2191&session=FS',
  'code': 'ENG1121',
  'title': 'Literature and Composition II: Drama and Poetry',
  'faculty': 'Faculty of Arts',
  'department': 'Department of English'},
 {'term': '2019 Winter Term',
  'link': 'Course.aspx?id=012028&term=2191&sess

In [12]:
def is_activity(tag):
    if not(tag.name == 'tr'):
        return False
    if tag.find('td') is None:
        return False
    if not tag.has_attr('class'):
        return True
    for x in tag['class']:
        if 'first' in x or 'hidden' in x:
            return False
    return True

def get_sections(shortlink):
    r = requests.get('https://web30.uottawa.ca/v3/SITS/timetable/'+shortlink)

    sch = BeautifulSoup(r.text, 'html.parser')
    sections_table = sch.find('div', attrs={'id':'schedule'})

    sections_table = sections_table.find_all('div', attrs={'id':re.compile('[0-9]{1,}'), 'class':'schedule'})

    sections = []
    for section in sections_table:
        activities = []
        section = raze_list([list(x.find_all(is_activity)) for x in section.find_all('table')])
        for activity in section:
            tmp = dict()
            try:
                tmp['section'] = activity.find('td', attrs={'class':'Section'}).text
                tmp['activity'] = activity.find('td', attrs={'class':'Activity'}).text
                tmp['time'] = activity.find('td', attrs={'class':'Day'}).text
                tmp['location'] = activity.find('td', attrs={'class':'Place'}).text
                tmp['professor'] = activity.find('td', attrs={'class':'Professor'}).text
            except AttributeError as e:
                print(activity)
                print(activity['class'])
                raise e
            activities.append(tmp)
        sections.append(activities)
        
    return sections

In [13]:
eng1_times = [get_sections(x['link']) for x in eng1_offers]
eng1_times

[[[{'section': 'ENG1100 Y00(January 07 - April 05)',
    'activity': 'LEC',
    'time': 'Tuesday 19:00 - 20:20',
    'location': 'MRT 250',
    'professor': 'Geraldine  Arbach'},
   {'section': 'ENG1100 Y01(January 07 - April 05)',
    'activity': 'DGD',
    'time': 'Tuesday 20:30 - 21:50',
    'location': 'MRT 251',
    'professor': 'Geraldine  Arbach'},
   {'section': 'ENG1100 Y02(January 07 - April 05)',
    'activity': 'DGD',
    'time': 'Tuesday 20:30 - 21:50',
    'location': 'MRT 252',
    'professor': 'Geraldine  Arbach'}],
  [{'section': 'ENG1100 X00(January 07 - April 05)',
    'activity': 'LEC',
    'time': 'Thursday 8:30 - 9:50',
    'location': 'VNR 1095',
    'professor': 'Sandra  MacPherson'},
   {'section': 'ENG1100 X01(January 07 - April 05)',
    'activity': 'DGD',
    'time': 'Thursday 10:00 - 11:20',
    'location': 'LMX 405',
    'professor': 'Sandra  MacPherson'},
   {'section': 'ENG1100 X02(January 07 - April 05)',
    'activity': 'DGD',
    'time': 'Thursday 10:

# Query Future Courses

In [7]:
mpc = course_data[course_data.code.str.contains('MAT [34]|CSI [34]|PHY [34]', regex=True)]

In [8]:
mpc_offers = [get_offerings(x) for x in mpc.code.tolist()]
mpc_offers

AttributeError: 'NoneType' object has no attribute 'text'

In [None]:
json.loads(course_data[course_data.code.str.contains('CSI 3105')].to_json(orient='records'))

In [None]:
def recordify(df):
    return json.loads(df.to_json(orient='records'))

In [None]:
recordify(course_data[course_data.code.str.contains("MAT 33(48|61)", regex=True) & course_data.lang.str.contains('en')])