In [None]:
# based on UO-Historical-Noise-Download.ipynb
import requests, os, io
import pandas as pd

from datetime import datetime,timedelta

api_date_string_format = "%Y%m%d%H%M%S"
nsets = [
    [1409, 1410],  # taxi rank at the station
    [1408, 1414],  # lights near the station
    [2604, 2606, 2603, 2602, 2605, ],  # road between uni and USB
    [2753, 2752, ],  # nr gosforth high street
    [2902, 2904, 2903, ],  # regent centre metro + bus stop (3/6)
    [1207, 1206, 1202, ],  # bensham, Symphony Court
    [1006, 1007, 1002, ],  # bensham, St George's Church
    [1701, 1702, ],  # Centre, Westgate-St James Intersection
]
sensorList = []
for subgroup in nsets:
    for sensor_id in subgroup:
        sensorList.append('PER_EMOTE_{id}'.format(id=sensor_id))

base_url = r'https://newcastle.urbanobservatory.ac.uk/api/v1.1/sensors/'


raw_data_cache_path = os.path.join('../cache/','noise', 'raw',)
resampled_path = os.path.join('../cache/','noise', 'processed')
if not os.path.exists(raw_data_cache_path):
    os.makedirs(raw_data_cache_path)
if not os.path.exists(resampled_path):
    os.makedirs(resampled_path)
# Call packaged into monthly chunks
# This takes significant time
print(datetime.now().strftime('%H:%M:%S'))

for sensor_name in sensorList:
    print(sensor_name)
    min_date = datetime(2020,1,1)

    combBaseline = pd.DataFrame()
    pandas_pickle_path = os.path.join(raw_data_cache_path, 'noise-data-' + sensor_name + '.pkl')
    resample_pandas_pickle_path = os.path.join(resampled_path, 'noise-data-' + sensor_name + '.pkl')
    if os.path.exists(pandas_pickle_path):
        try:
            combBaseline = pd.read_pickle(pandas_pickle_path)

            min_date = pd.to_datetime(combBaseline['Timestamp']).max()

            min_date = datetime(min_date.year,min_date.month,1)

            combBaseline = combBaseline[pd.to_datetime(combBaseline['Timestamp']) < min_date]
        except:
            pass


    months = pd.date_range(min_date, datetime.now(),
                           freq='MS').strftime("%Y/%m").tolist()

    for m in months:
        print(m)
        dataCall = base_url + sensor_name + '/data/cached/Sound/{m}/csv/'.format(m=m)


        r = requests.get(dataCall)

        # Check if API call succesfuly, merge into one dataframe
        if r.status_code != 404:
            sensorData = pd.read_csv(io.StringIO(r.text))
            combBaseline = combBaseline.append(sensorData, ignore_index=True)


    min_date = pd.to_datetime(combBaseline['Timestamp']).max()

    data_params = dict(
        data_variable='Sound',
        starttime=min_date.strftime(api_date_string_format),
        endtime=datetime.now().strftime(api_date_string_format)
    )
    print(data_params)

    r = requests.get(base_url + sensor_name + '/data/csv/', data_params)
    print(r.url)

    sensorData = pd.read_csv(io.StringIO(r.text))
    print(sensorData['Timestamp'])

    combBaseline = combBaseline.append(sensorData, ignore_index=True)
    combBaseline = combBaseline[pd.to_datetime(combBaseline['Timestamp']) > datetime(2020,1,1)]
    pd.to_pickle(combBaseline, pandas_pickle_path)
    combBaseline['Value'] = combBaseline['Value'].to_numpy()

    combBaseline.index = pd.to_datetime(combBaseline['Timestamp'])

    combBaseline.sort_index()

    print(combBaseline.index[1],combBaseline.index[0])
    gaps = combBaseline.index[1:]-combBaseline.index[:-1]
    td = timedelta(seconds=int(pd.DataFrame(gaps.total_seconds().to_list()).mode()[0][0]))


    daily_sample = combBaseline.resample('24H')

    expected_readings = (timedelta(hours=24).total_seconds()/td.total_seconds())
    print()
    median = daily_sample.median()
    median['data_prop'] = daily_sample.count()['Variable']/expected_readings
    print(median)

    pd.to_pickle(median,resample_pandas_pickle_path)
# Save as a Pickle file