# Temperature Data
This data is included with the git repository. The data is available for NSW, Victoria, SA and Queensland. I've done the discovery for NSW data however we'll use all the states in the model to get a representative result for Australia.

In [12]:
import pandas as pd
import seaborn as sns
df = pd.read_csv('~/PycharmProjects/ZZSC9020-Group-M/data/NSW/temperature_nsw.csv')
df.columns = [c.lower() for c in df.columns]
df['datetime'] = pd.to_datetime(df['datetime'], format='%d/%m/%Y %H:%M')
df

Unnamed: 0,location,datetime,temperature
0,Bankstown,2010-01-01 00:00:00,23.1
1,Bankstown,2010-01-01 00:01:00,23.1
2,Bankstown,2010-01-01 00:30:00,22.9
3,Bankstown,2010-01-01 00:50:00,22.7
4,Bankstown,2010-01-01 01:00:00,22.6
...,...,...,...
220321,Bankstown,2021-03-17 23:00:00,19.1
220322,Bankstown,2021-03-17 23:20:00,19.0
220323,Bankstown,2021-03-17 23:30:00,18.8
220324,Bankstown,2021-03-17 23:34:00,18.8


In [7]:
df.groupby('location').count()

Unnamed: 0_level_0,datetime,temperature
location,Unnamed: 1_level_1,Unnamed: 2_level_1
Bankstown,220326,220326


In [32]:
from datetime import datetime
def trunc_month(value: datetime):
    return datetime(value.year, value.month, 1)

def trunc_quarter(value: datetime):
    return datetime(value.year, (value.month - 1) // 3 * 3 + 1, 1)

df['date'] = df.apply(lambda x: x['datetime'].date(), axis=1)
df['month'] = df['datetime'].apply(trunc_month)
df['quarter'] = df['datetime'].apply(trunc_quarter)
df

Unnamed: 0,location,datetime,temperature,date,month,quarter
0,Bankstown,2010-01-01 00:00:00,23.1,2010-01-01,2010-01-01,2010-01-01
1,Bankstown,2010-01-01 00:01:00,23.1,2010-01-01,2010-01-01,2010-01-01
2,Bankstown,2010-01-01 00:30:00,22.9,2010-01-01,2010-01-01,2010-01-01
3,Bankstown,2010-01-01 00:50:00,22.7,2010-01-01,2010-01-01,2010-01-01
4,Bankstown,2010-01-01 01:00:00,22.6,2010-01-01,2010-01-01,2010-01-01
...,...,...,...,...,...,...
220321,Bankstown,2021-03-17 23:00:00,19.1,2021-03-17,2021-03-01,2021-01-01
220322,Bankstown,2021-03-17 23:20:00,19.0,2021-03-17,2021-03-01,2021-01-01
220323,Bankstown,2021-03-17 23:30:00,18.8,2021-03-17,2021-03-01,2021-01-01
220324,Bankstown,2021-03-17 23:34:00,18.8,2021-03-17,2021-03-01,2021-01-01


## Maximum and Minimum
These are the maximum and minimum temperatures in each quarter. These are not representative of the quarter because they represent a single day of extreme temperature.

In [None]:
df_max = df[['quarter', 'temperature']].groupby('quarter').max()
df_max['metric'] = 'max'
df_min = df[['quarter', 'temperature']].groupby('quarter').min()
df_min['metric'] = 'min'

df_quarter = pd.concat([df_max, df_min], axis=0)
# df_quarter
sns.lineplot(data=df_quarter, x='quarter', y='temperature', hue='metric')

## Maximum and Minimum Monthly Average
These numbers represent the maximum average monthly temperature for each quarter.
The process for deriving these is:
1. Get the min, max, and mean temperature for each day.
2. Get the monthly averages of the daily min, max and mean.
3. For each quarter:
    - Get the smallest minimum monthly temperature.
    - Get the largest maximum monthly temperature.
    - Get the mean of the mean temperatures of the months.

In [None]:
df_day = df.groupby(['date', 'month', 'quarter']).agg({'temperature': ['min', 'max', 'mean']})
# df_day
df_month = df_day.groupby(['month', 'quarter']).mean() # .agg({'min': 'mean', 'max': 'mean'})
df_month.columns = df_month.columns.droplevel()
# df_month
df_quarter = df_month.groupby('quarter').agg({'min': 'min', 'max': 'max', 'mean': 'mean'})
df_quarter = df_quarter.stack().to_frame().reset_index()
df_quarter.columns = ['quarter', 'metric', 'temperature']
# df_max = df[['quarter', 'temperature']].groupby('quarter').max()
# df_max['metric'] = 'max'
# df_min = df[['quarter', 'temperature']].groupby('quarter').min()
# df_min['metric'] = 'min'

# df_quarter = pd.concat([df_max, df_min], axis=0)
df_quarter
sns.lineplot(data=df_quarter, x='quarter', y='temperature', hue='metric')