In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import datetime
import matplotlib.pyplot as plt

In [None]:
graz_weather_df = pd.read_csv("../../data/graz_weather.csv",delimiter=',',header=9,encoding='UTF-8')


In [None]:
def parse_weather_timestamp(timestamp):
    return(timestamp[6:8]+ '.' + timestamp[4:6] + '.' + timestamp[2:4] + ' ' + timestamp[9:11] + ':' + timestamp[11:])

def timestamp_to_datetime(timestamp):
    date_str = timestamp.split(' ')[0]
    time_str = timestamp.split(' ')[1]
    datetime_obj = datetime.datetime(int('20'+date_str.split('.')[2]), int(date_str.split('.')[1] ), int(date_str.split('.')[0]),int(time_str[:2]), int(time_str[-2:]))
    return datetime_obj

    

In [None]:
graz_weather_df['timestamp'] = graz_weather_df['timestamp'].apply(parse_weather_timestamp).apply(timestamp_to_datetime)

Considering only data for which we have energy data

In [None]:
df = graz_weather_df[['timestamp','Graz Temperature [2 m elevation corrected]', 'Graz Shortwave Radiation', 'Graz Direct Shortwave Radiation',
       'Graz Diffuse Shortwave Radiation', 'Graz Relative Humidity [2 m]']].loc[graz_weather_df['timestamp'] > datetime.datetime(2021,6,13)]
df = df.loc[df['timestamp'] < datetime.datetime(2022,1,2)]

In [None]:
df.describe()

## Checking Temperature outliers

In [None]:
plt.rc('figure',figsize=(16,6))
plt.rc('font',size=15)
fig, ax = plt.subplots()
x = df['timestamp']
y = df['Graz Temperature [2 m elevation corrected]']
ax.plot_date(x, y, color='black',linestyle='--')
fig.autofmt_xdate()
plt.title('Temperature against Time')
plt.ylabel(ylabel='Temperature (℃)')
plt.savefig('plots/weather/temperature/temperature_full.png')
plt.show()

In [None]:
n = 730 #one month
plot = 0

while n-730 <= len(df):
    plt.rc('figure',figsize=(16,6))
    plt.rc('font',size=15)
    fig, ax = plt.subplots()
    x = df['timestamp'][n-730:n] 
    y = df['Graz Temperature [2 m elevation corrected]'][n-730:n]
    ax.plot_date(x, y, color='black',linestyle='--')
    fig.autofmt_xdate()
    plt.title('Temperature against Time')
    plt.ylabel(ylabel='Temperature (℃)')
    plt.savefig('plots/weather/temperature/temperature_'+ str(plot) +'.png')
    plt.show()
    n+= 730
    plot += 1
    


In [None]:
Q1,Q3 = np.percentile(df['Graz Temperature [2 m elevation corrected]'] , [25,75])
IQR = Q3 - Q1
print(IQR)
ul = Q3+1.5*IQR
ll = Q1-1.5*IQR

In [None]:
outliers = df['Graz Temperature [2 m elevation corrected]'][(df['Graz Temperature [2 m elevation corrected]'] > ul) | (df['Graz Temperature [2 m elevation corrected]'] < ll)]


In [None]:
outliers

In [None]:
#sns.boxplot(df['Graz Temperature [2 m elevation corrected]'])

In [None]:
df_no_outliers = pd.merge(df,outliers, indicator=True, how='outer').query('_merge=="left_only"').drop('_merge', axis=1)

In [None]:
sns.boxplot(df_no_outliers['Graz Temperature [2 m elevation corrected]'])

From above we can observe that no significant outliers were detected for Graz Temperature [2 m elevation corrected] using this method. 

## Checking Graz Shortwave Radiation outliers

Since the 'normal' acceptable radiation value varies so much from july to january, outliers will be identified month by month

In [None]:
plt.rc('figure',figsize=(16,6))
plt.rc('font',size=15)
fig, ax = plt.subplots()
x = df['timestamp']
y = df['Graz Shortwave Radiation']
ax.plot_date(x, y, color='black',linestyle='--')

plt.title("Shortwave Radiation")
plt.ylabel('Radiation (W/m²)')
plt.savefig('plots/weather/shortwave_radiation/shortwave_radiation_full.png')
fig.autofmt_xdate()
plt.show()

In [None]:
n = 730 #one month
plot = 0

while n-730 <= len(df):
    
    plt.rc('figure',figsize=(16,6))
    plt.rc('font',size=15)
    
    fig, ax = plt.subplots()
    x = df['timestamp'][n-730:n] 
    y = df['Graz Shortwave Radiation'][n-730:n]
    ax.plot_date(x, y, color='black',linestyle='--')
    
    plt.title("Shortwave Radiation Month: "+str((plot+5)%12 +1))
    plt.ylabel('Radiation (W/m²)')
    fig.autofmt_xdate()
    plt.savefig('plots/weather/shortwave_radiation/shortwave_radiation_'+str(plot)+'.png')
    plt.show()
    n+= 730
    plot += 1



Using the IQR method is not possible for radiation

From the above plots we can see that the following dates have outliers:

2021-07-02 \
2021-07-17 \
2021-07-27 \
2021-07-31 \
2021-08-05 \
2021-08-23 \
2021-09-20 \
2021-09-30 \
2021-10-06 \
2021-10-07 \
2021-10-22 \
2021-11-02 \
2021-11-14 \
2021-11-26 \
2021-11-31 \
2021-12-02 \
2021-12-05 \
2021-12-09 \
2021-12-13 \
2021-12-23

## Checking Graz Direct Shortwave Radiation outliers

In [None]:
plt.rc('figure',figsize=(16,6))
plt.rc('font',size=15)
fig, ax = plt.subplots()
x = df['timestamp']
y = df['Graz Direct Shortwave Radiation']

plt.title("Direct Shortwave Radiation Month: "+str((plot+5)%12 +1))
plt.ylabel('Radiation (W/m^2)')
ax.plot_date(x, y, color='black',linestyle='--')
fig.autofmt_xdate()
plt.savefig('plots/weather/direct_shortwave_radiation/direct_shortwave_full.png')
plt.show()

In [None]:
n = 730 #one month
plot = 0

while n-730 <= len(df):
    
    plt.rc('figure',figsize=(16,6))
    plt.rc('font',size=15)
    
    fig, ax = plt.subplots()
    x = df['timestamp'][n-730:n] 
    y = df['Graz Direct Shortwave Radiation'][n-730:n]
    ax.plot_date(x, y, color='black',linestyle='--')
    
    plt.title("Direct Shortwave Radiation Month: "+str((plot+5)%12 +1))
    plt.ylabel('Radiation (W/m^2)')
    fig.autofmt_xdate()
    plt.show()
    plt.savefig('plots/weather/direct_shortwave_radiation/direct_shortwave_radiation_'+str(plot)+'.png')
    n+= 730
    plot += 1
    


## Checking Graz Diffuse Shortwave Radiation outliers

In [None]:
plt.rc('figure',figsize=(16,6))
plt.rc('font',size=15)
fig, ax = plt.subplots()
x = df['timestamp']
y = df['Graz Diffuse Shortwave Radiation']

plt.title("Diffuse Shortwave Radiation Month: "+str((plot+5)%12 +1))
plt.ylabel('Radiation (W/m^2)')
ax.plot_date(x, y, color='black',linestyle='--')
fig.autofmt_xdate()
plt.savefig('plots/weather/diffuse_shortwave_radiation/diffuse_shortwave_radiation_full.png')
plt.show()

In [None]:
n = 730 #one month
plot = 0

while n-730 <= len(df):
    
    plt.rc('figure',figsize=(16,6))
    plt.rc('font',size=15)
    
    fig, ax = plt.subplots()
    x = df['timestamp'][n-730:n] 
    y = df['Graz Diffuse Shortwave Radiation'][n-730:n]
    ax.plot_date(x, y, color='black',linestyle='--')
    
    plt.title("Diffuse Shortwave Radiation Month: "+str((plot+5)%12 +1))
    plt.ylabel('Radiation (W/m^2)')
    fig.autofmt_xdate()
    plt.savefig('plots/weather/diffuse_shortwave_radiation/diffuse_shortwave_radiation_'+str(plot)+'.png')
    plt.show()
    n+= 730
    plot += 1


## Checking Graz Relative Humidity outliers

In [None]:
plt.rc('figure',figsize=(16,6))
plt.rc('font',size=15)
fig, ax = plt.subplots()
x = df['timestamp']
y = df['Graz Relative Humidity [2 m]']
ax.plot_date(x, y, color='black',linestyle='--')
fig.autofmt_xdate()
plt.savefig('plots/weather/relative_humidity/relative_humidity_full.png')
plt.show()

In [None]:
n = 730 #one month
plot = 0

while n-730 <= len(df):
    
    plt.rc('figure',figsize=(16,6))
    plt.rc('font',size=15)
    
    fig, ax = plt.subplots()
    x = df['timestamp'][n-730:n] 
    y = df['Graz Relative Humidity [2 m]'][n-730:n]
    ax.plot_date(x, y, color='black',linestyle='--')
    
    plt.title("Relative Humidity Month: "+str((plot+5)%12 +1))
    fig.autofmt_xdate()
    plt.savefig('plots/weather/relative_humidity/relative_humidity_'+str(plot)+'.png')
    plt.show()
    n+= 730
    plot += 1


2021-07-02 \
2021-08-05 \
2021-08-27 \
2021-09-20 \
2021-10-03 \



Humidity Outliers:

02/07/2022 \


What should we do with ouliers?

- We cannot remove then - will disrupt the time series pattern
- Replace them with what value then?


I need to plot the graphs of the data before interpolating gaps to fill every 15 mins