# 1 - Setting up

Import relevant libraries.

In [1]:
import json
import pandas as pd
import requests

Define all configuration variables.

In [2]:
URL = 'https://exams.is.ed.ac.uk/search/'
FILENAME = 'data.json'

# 2 - Retrieving and cleaning data

Retrieve examination diet data and save the output to `data.json`.

In [3]:
with requests.Session() as s:
    raw_data = s.get(URL)
    with open(FILENAME, 'w') as f:
        d_list = json.loads(raw_data.text)['data']
        f.write(json.dumps(d_list))

Convert the list of dictionaries to a data frame and drop irrelevant columns.

In [70]:
with open(FILENAME, 'r') as f:
    d_list = json.loads(f.read())
    
df = pd.DataFrame(d_list)

In [71]:
df['course_code'], df['course_name'] = df['title'].str.split(' - ', 1).str

df['start_date'] = df.loc[:, ['start_date', 'start_time']].apply(lambda x: ' '.join(x), axis=1)
df['end_date'] = df.loc[:, ['start_date', 'end_time']].apply(lambda x: ' '.join(x), axis=1)
df['start_date'] =  pd.to_datetime(df['start_date'], dayfirst=True)
df['end_date'] =  pd.to_datetime(df['end_date'], dayfirst=True)

df['venues'] = df['venues'].apply(lambda l_of_d: [d['location'] for d in l_of_d])

df['duration'] = pd.to_timedelta(df['duration'].apply(lambda t: '{}:00'.format(t)), unit='h')

df.drop(columns=['end_time', 'hours_since_change', 
                 'last_changed_date', 'last_changed_text', 
                 'start_time', 'title'], inplace=True)

# Re-order columns
col_names = ['course_code', 'course_name', 'subject_area', 
             'start_date', 'end_date', 'duration', 'venues']

df = df.reindex(columns=col_names)

display(df)

Unnamed: 0,course_code,course_name,subject_area,start_date,end_date,duration,venues
0,ACCN08008,Accountancy 1B,Accounting,2019-05-13 14:30:00,2019-05-13 16:30:00,02:00:00,[The Pleasance Sports Hall]
1,ACCN08010,Accountancy 2B,Accounting,2019-04-30 14:30:00,2019-04-30 16:30:00,02:00:00,[Patersons Land Room G1 (Enter via Holyrood R...
2,ACCN10002,Advanced Management Accounting,Accounting,2019-05-15 09:30:00,2019-05-15 11:30:00,02:00:00,[Playfair Library]
3,ACCN10005,Audit Practice,Accounting,2019-05-07 14:30:00,2019-05-07 16:30:00,02:00:00,[Patersons Land Room 1.26 (Enter via Royal M...
4,ACCN10010,Management Accounting Applications,Accounting,2019-05-03 09:30:00,2019-05-03 12:30:00,03:00:00,[McEwan Hall - Foyer Room 3 & 4 (Enter via the...
5,ACCN10014,Accounting and Everyday Life,Accounting,2019-05-09 09:30:00,2019-05-09 11:30:00,02:00:00,[Chrystal MacMillan Seminar Room 1/2]
6,AFRI08003,Introductory Swahili Language and Culture 1B -...,African Studies,2019-05-17 14:30:00,2019-05-17 16:30:00,02:00:00,[Patersons Land Room 1.26 (Enter via Royal M...
7,AFRI08008,Africa in the Contemporary World,African Studies,2019-05-20 14:30:00,2019-05-20 16:30:00,02:00:00,[The Pleasance Sports Hall]
8,ANHI08013,Ancient History 2b: Themes and Theories in Anc...,Ancient History,2019-05-06 14:30:00,2019-05-06 16:30:00,02:00:00,[Appleton Tower Concourse]
9,ANHI10011,Roman Slavery,Ancient History,2019-05-09 14:30:00,2019-05-09 16:30:00,02:00:00,[Patersons Land Room G1 (Enter via Holyrood R...


In [74]:
display(df.describe())

Unnamed: 0,duration
count,991
mean,0 days 02:09:58.486377
std,0 days 00:28:51.131389
min,0 days 01:00:00
25%,0 days 02:00:00
50%,0 days 02:00:00
75%,0 days 02:00:00
max,0 days 07:00:00
