In [None]:
import requests
from IPython.display import GeoJSON
import pandas as pd
import numpy as np
import io
import urllib.request
import gc

In [None]:
## Targeted baseline of Nov-Feb
sensorListNov = ['PER_AIRMON_MONITOR1048100', 
                'PER_AIRMON_MONITOR1056100',
                'PER_AIRMON_MONITOR914', 
                'PER_AIRMON_MONITOR1135100']

## Some sensors don't have data for November, different loop used for the below
sensorListDec = ['PER_AIRMON_MONITOR1156100', 
                'PER_AIRMON_MONITOR1155100']

variables = ['NO2','PM2.5','PM10']

NE_Comb_Baseline = pd.DataFrame()

In [None]:
for s in sensorListNov:
    for v in variables:
        ## Pull Nov, Dec, Jan from cache
        r = requests.get("http://uoweb3.ncl.ac.uk/api/v1.1/sensors/"+s+"/data/cached/"+v+"/2019/11/csv/")
        sensorNov2019 = pd.read_csv(io.StringIO(r.text))
        r = requests.get("http://uoweb3.ncl.ac.uk/api/v1.1/sensors/"+s+"/data/cached/"+v+"/2019/12/csv/")
        sensorDec2019 = pd.read_csv(io.StringIO(r.text))
        r = requests.get("http://uoweb3.ncl.ac.uk/api/v1.1/sensors/"+s+"/data/cached/"+v+"/2020/1/csv/")
        sensorJan2020 = pd.read_csv(io.StringIO(r.text))
        ## Pull Feb from 'live' databases
        dataCall = 'http://uoweb3.ncl.ac.uk/api/v1.1/sensors/'+s+'/data/csv/'
        params = dict(
            starttime='20200201',
            endtime='20200301',
            data_variable=v
        )
        r = requests.get(dataCall,params)
        sensorFeb2020 = pd.read_csv(io.StringIO(r.text))
        ## Join monthly dataframes
        sensorData = pd.concat([sensorNov2019,sensorDec2019, sensorJan2020,sensorFeb2020])
        ## Copy Raw Data into NE Combined dataset
        NE_Comb_Baseline = NE_Comb_Baseline.append(sensorData, ignore_index=True)
        ## Remove flagged data, create weekday and hour columns, remove unneeded columns
        sensorData = sensorData[sensorData["Flagged as Suspect Reading"]==False]
        sensorData.loc[:,"Weekday"] = pd.to_datetime(sensorData['Timestamp']).dt.weekday
        sensorData.loc[:,"Hour"] = pd.to_datetime(sensorData['Timestamp']).dt.hour
        sensorData.drop(columns=['Flagged as Suspect Reading', 
                                 'Location (WKT)',
                                 'Ground Height Above Sea Level',
                                 'Sensor Height Above Ground',
                                 'Broker Name',
                                 'Third Party',
                                 'Sensor Centroid Longitude',
                                 'Sensor Centroid Latitude',
                                 'Raw ID','Sensor Name','Variable','Units','Timestamp'], axis=1, inplace=True)
        ## Aggregate baseline data by day/hour - median, q15, q85
        aggregateColumns = ['Weekday', 'Hour']
        dfBaseMean = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).median()
        ## Currently 15/85th Percentiles to match Luke Smith Plots
        dfBaseLQ = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
        dfBaseHQ = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)
        ## Comb aggregates into final dataframe
        dfComb = pd.DataFrame(columns=["Weekday","Hour","Mean","LQ","HQ"])
        dfComb["Weekday"] = dfBaseMean["Weekday"]
        dfComb["Hour"] = dfBaseMean["Hour"]
        dfComb["Mean"] = dfBaseMean["Value"]
        dfComb["LQ"] = dfBaseLQ["Value"]
        dfComb["HQ"] = dfBaseHQ["Value"]
        ##Pickle out - not used pickle before, need to test -  change to cache when ready
        pd.to_pickle(dfComb, 'C:/Covid_Baseline/baseline_aq_'+s+'_'+v+'.pkl')
        dfComb = None
        gc.collect()

In [None]:
for s in sensorListDec:
    for v in variables:
        ## Pull Dec, Jan from cache
        r = requests.get("http://uoweb3.ncl.ac.uk/api/v1.1/sensors/"+s+"/data/cached/"+v+"/2019/12/csv/")
        sensorDec2019 = pd.read_csv(io.StringIO(r.text))
        r = requests.get("http://uoweb3.ncl.ac.uk/api/v1.1/sensors/"+s+"/data/cached/"+v+"/2020/1/csv/")
        sensorJan2020 = pd.read_csv(io.StringIO(r.text))
        ## Pull Feb from 'live' databases
        dataCall = 'http://uoweb3.ncl.ac.uk/api/v1.1/sensors/'+s+'/data/csv/'
        params = dict(
            starttime='20200201',
            endtime='20200301',
            data_variable=v
        )
        r = requests.get(dataCall,params)
        sensorFeb2020 = pd.read_csv(io.StringIO(r.text))
        ## Join monthly dataframes
        sensorData = pd.concat([sensorDec2019, sensorJan2020,sensorFeb2020])
        ## Copy Raw Data into NE Combined dataset
        NE_Comb_Baseline = NE_Comb_Baseline.append(sensorData, ignore_index=True)
        ## Remove flagged data, create weekday and hour columns, remove unneeded columns
        sensorData = sensorData[sensorData["Flagged as Suspect Reading"]==False]
        sensorData.loc[:,"Weekday"] = pd.to_datetime(sensorData['Timestamp']).dt.weekday
        sensorData.loc[:,"Hour"] = pd.to_datetime(sensorData['Timestamp']).dt.hour
        sensorData.drop(columns=['Flagged as Suspect Reading', 
                                 'Location (WKT)',
                                 'Ground Height Above Sea Level',
                                 'Sensor Height Above Ground',
                                 'Broker Name',
                                 'Third Party',
                                 'Sensor Centroid Longitude',
                                 'Sensor Centroid Latitude',
                                 'Raw ID','Sensor Name','Variable','Units','Timestamp'], axis=1, inplace=True)
        ## Aggregate baseline data by day/hour - median, q15, q85
        aggregateColumns = ['Weekday', 'Hour']
        dfBaseMean = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).median()
        dfBaseLQ = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
        dfBaseHQ = sensorData.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)
        ## Comb aggregates into final dataframe
        dfComb = pd.DataFrame(columns=["Weekday","Hour","Mean","LQ","HQ"])
        dfComb["Weekday"] = dfBaseMean["Weekday"]
        dfComb["Hour"] = dfBaseMean["Hour"]
        dfComb["Mean"] = dfBaseMean["Value"]
        dfComb["LQ"] = dfBaseLQ["Value"]
        dfComb["HQ"] = dfBaseHQ["Value"]
        ##Pickle out - not used pickle before, need to test - change to cache when ready
        pd.to_pickle(dfComb, 'C:/Covid_Baseline/baseline_aq_'+s+'_'+v+'.pkl')
        dfComb = None
        gc.collect()

In [None]:
for v in variables:
    dfTemp = NE_Comb_Baseline[NE_Comb_Baseline["Variable"]==v]
    
    dfTemp = dfTemp[dfTemp["Flagged as Suspect Reading"]==False]
    dfTemp.loc[:,"Weekday"] = pd.to_datetime(dfTemp['Timestamp']).dt.weekday
    dfTemp.loc[:,"Hour"] = pd.to_datetime(dfTemp['Timestamp']).dt.hour
    dfTemp.drop(columns=['Flagged as Suspect Reading', 
                                 'Location (WKT)',
                                 'Ground Height Above Sea Level',
                                 'Sensor Height Above Ground',
                                 'Broker Name',
                                 'Third Party',
                                 'Sensor Centroid Longitude',
                                 'Sensor Centroid Latitude',
                                 'Raw ID','Sensor Name','Variable','Units','Timestamp'], axis=1, inplace=True)
    ## Aggregate baseline data by day/hour - median, q15, q85
    aggregateColumns = ['Weekday', 'Hour']
    dfBaseMean = dfTemp.groupby(aggregateColumns, group_keys=False, as_index=False).median()
    dfBaseLQ = dfTemp.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
    dfBaseHQ = dfTemp.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)
    ## Comb aggregates into final dataframe
    dfComb = pd.DataFrame(columns=["Weekday","Hour","Mean","LQ","HQ"])
    dfComb["Weekday"] = dfBaseMean["Weekday"]
    dfComb["Hour"] = dfBaseMean["Hour"]
    dfComb["Mean"] = dfBaseMean["Value"]
    dfComb["LQ"] = dfBaseLQ["Value"]
    dfComb["HQ"] = dfBaseHQ["Value"]
    ##Pickle out - not used pickle before, need to test - change to cache when ready
    pd.to_pickle(dfComb, 'C:/Covid_Baseline/NE_Aggregated_'+v+'_Baseline.pkl')
    dfComb = None
    gc.collect()