In [1]:
import numpy as np
import numpy.matlib
import pandas as pd
import requests
from math import ceil
from bs4 import BeautifulSoup
import json
import matplotlib.pyplot as plt

In [2]:
url='https://www.snowyhydro.com.au/wp-content/themes/snowyhydro/inc/getData.php'

current_year = sorted(list(range(1999,2023,2)),key=int, reverse=True)
past_years =  sorted(list(range(1998,2022,2)),key=int, reverse=True)

# params = {'yearA': 2020, 'yearB': 2021}
# response=requests.get(url, params=params)
# soup = BeautifulSoup(response.text, 'html.parser')
# df_json = json.loads(soup.text.strip())

In [3]:
df_current = []
df_past = []

for x, y in zip(current_year, past_years):
    params = {'yearA': x, 'yearB': y}
    response=requests.get(url, params=params)
    soup = BeautifulSoup(response.text, 'html.parser')
    df_json = json.loads(soup.text.strip())
    Lake_data = df_json[str(x)]['snowyhydro']['level']
    Lake_level = pd.DataFrame(Lake_data)['lake'].dropna()
  
    for i in Lake_level:
        if isinstance(i, dict):
            df_current.append(pd.DataFrame.from_dict(i, orient='index').T)
        else:
            df_current.append(pd.DataFrame.from_dict(i))
            
    Lake_data = df_json[str(y)]['snowyhydro']['level']
    Lake_level = pd.DataFrame(Lake_data)['lake'].dropna()
  
    for i in Lake_level:
        if isinstance(i, dict):
            df_past.append(pd.DataFrame.from_dict(i, orient='index').T)
        else:
            df_past.append(pd.DataFrame.from_dict(i))
                     
df_current = pd.concat(df_current, sort=True)
df_past = pd.concat(df_past, sort=True)

In [4]:
df = pd.concat([df_current,df_past])
df.columns = ['Level', 'Datetime', 'Location']
df.set_index('Datetime', inplace=True)
df.index = pd.DatetimeIndex(df.index)
df.Level = df.Level.astype('float')

In [5]:
Location_Gp = df.groupby('Location')
df_pivot = df.pivot_table(index=df.index, columns='Location', values='Level')

In [6]:
df_pivot

Location,Lake Eucumbene,Lake Jindabyne,Tantangara Reservoir
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-05 00:00:00,83.70,69.20,6.70
1998-01-12 00:00:00,83.10,69.10,6.70
1998-01-19 00:00:00,82.20,69.10,6.60
1998-02-02 00:00:00,80.00,68.60,6.60
1998-02-09 00:00:00,78.70,68.30,6.50
...,...,...,...
2021-12-27 07:00:02,46.77,98.56,25.82
2021-12-28 07:00:02,46.79,98.58,25.51
2021-12-29 07:00:03,46.82,98.56,25.17
2021-12-30 07:00:02,46.86,98.58,24.78


In [7]:
#Extract data for each locations
Lake_Eucumbene = df_pivot['Lake Eucumbene'].dropna()
Lake_Jindabyne = df_pivot['Lake Jindabyne'].dropna()
Tantangara_Reservoir = df_pivot['Tantangara Reservoir'].dropna()

In [11]:
#Extract Overview Stats
df.groupby([df.Location, df.index.year]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Level,Level,Level,Level,Level,Level,Level,Level
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Location,Datetime,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Lake Eucumbene,1998,51.0,73.907843,5.157202,66.80,69.10,73.50,78.950,83.70
Lake Eucumbene,1999,51.0,71.239216,3.450917,66.20,67.80,71.30,73.850,78.90
Lake Eucumbene,2000,46.0,64.630435,5.824350,55.80,59.40,64.50,69.675,73.80
Lake Eucumbene,2001,47.0,69.700000,4.194873,62.50,66.15,70.80,72.750,75.70
Lake Eucumbene,2002,57.0,71.774561,2.687423,66.70,69.40,72.60,74.100,75.60
...,...,...,...,...,...,...,...,...,...
Tantangara Reservoir,2017,365.0,25.557260,5.703917,20.28,21.07,23.76,27.160,42.87
Tantangara Reservoir,2018,365.0,35.767699,8.958454,26.15,26.58,34.46,43.440,54.75
Tantangara Reservoir,2019,365.0,26.750630,11.346418,8.61,19.18,23.24,33.960,50.48
Tantangara Reservoir,2020,366.0,14.715710,5.123201,8.31,10.85,14.36,17.065,28.71
