In [None]:
import requests
from datetime import datetime, timedelta
from copy import deepcopy
import time
import json
import pandas as pd
import numpy as np

### Fetch the weather data

In [None]:
API_KEY = 'API-KEY'
end_date = datetime(2020, 4, 1)
period = timedelta(days=1)
sleep_time = 5

In [None]:
stations = {'Karpos': (42.00666664, 21.38694446, datetime(2011, 7, 24)), 
            'Centar': (41.99249998, 21.4236110, datetime(2011, 7, 24)), 
            'Lisice': (41.9783, 21.47, datetime(2007, 10, 12)), 
            'Rektorat': (41.9992, 21.4408, datetime(2005, 2, 18)), 
            'Miladinovci': (41.9875, 21.6525, datetime(2008, 11, 29))}

In [None]:
for station, (latitude, longitude, start_date) in stations.items():
    curr_date = deepcopy(start_date)
    f = open(f'./data/raw-weather/{station}', 'a')

    while curr_date <= end_date:
        try:
            time.sleep(0.2)
            dt = curr_date.strftime("%Y-%m-%dT%H:%M:%S")
            print(dt, station)

            url = f'https://api.darksky.net/forecast/{API_KEY}/{latitude},{longitude},{dt}' + \
                   '?exclude=[currently,flags,minutely,daily,alerts]'

            r = requests.get(url)
            data = r.json()

            for item in data['hourly']['data']:
                dt = datetime.fromtimestamp(int(item['time']))
                item['time'] = dt.strftime("%Y-%m-%d %H:%M:%S")
                f.write(f'{item}\n')
                f.flush()

            curr_date += period
            sleep_time = 5

        except Exception as e:
            time.sleep(sleep_time)
            sleep_time += 5

    f.close()

### Format the weather data

In [None]:
attributes = ['apparentTemperature', 'cloudCover', 'dewPoint', 'humidity', 'icon', 'ozone',
             'precipAccumulation', 'precipAccumulation', 'precipIntensity', 'precipProbability', 
             'precipType', 'pressure', 'summary', 'temperature', 'uvIndex', 'visibility',
             'windBearing', 'windGust', 'windSpeed']

In [None]:
for station in stations:
    data = {}

    with open(f'./data/raw-weather/{station}', 'r') as f:
        for line in f:
            line = line.replace('\'', '\"')
            item = json.loads(line)
            data[item['time']] = [item[att] if att in item else np.nan for att in attributes]

    df = pd.DataFrame.from_dict(data, orient='index', columns=attributes)
    df.to_csv(f'./data/formatted-weather/{station}', index=True)