In [1]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12.0, 6.0)

import numpy as np
import math
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

pd.set_option('display.float_format', lambda x: '%.5f' % x) # pandas
np.set_printoptions(precision=5, suppress=True) # numpy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# seaborn plotting style
sns.set(style='ticks', context='poster')

Using TensorFlow backend.


In [98]:
from bokeh.plotting import output_file, show, figure
from bokeh.io import output_notebook
output_notebook()

In [25]:
dataset = pd.read_csv('724940_all.csv')
dataset.head()

Unnamed: 0,YYYY-MM-DD,HH:MM (LST),Zenith (deg),Azimuth (deg),ETR (W/m^2),ETRN (W/m^2),Glo Mod (W/m^2),Glo Mod Unc (%),Glo Mod Source,Dir Mod (W/m^2),Dir Mod Unc (%),Dir Mod Source,Dif Mod (W/m^2),Dif Mod Unc (%),Dif Mod Source,Meas Glo (W/m^2),Meas Glo Flg,Meas Dir (W/m^2),Meas Dir Flg,Meas Dif (W/m^2),Meas Dif Flg,TotCC (10ths),Precip Wat (cm),Precip Wat Flg,AOD (unitless),AOD Flg
0,1991-01-01,1:00,99.0,-99.0,0,0,0,0,1,0,0,1,0,0,1,-9900,99,-9900,99,-9900,99,6,0.7,1,0.108,1
1,1991-01-01,2:00,99.0,-99.0,0,0,0,0,1,0,0,1,0,0,1,-9900,99,-9900,99,-9900,99,10,0.7,1,0.108,1
2,1991-01-01,3:00,99.0,-99.0,0,0,0,0,1,0,0,1,0,0,1,-9900,99,-9900,99,-9900,99,10,0.8,1,0.108,1
3,1991-01-01,4:00,99.0,-99.0,0,0,0,0,1,0,0,1,0,0,1,-9900,99,-9900,99,-9900,99,10,0.8,1,0.108,1
4,1991-01-01,5:00,99.0,-99.0,0,0,0,0,1,0,0,1,0,0,1,-9900,99,-9900,99,-9900,99,10,0.8,1,0.108,1


In [26]:
dataset.loc[0, :]

YYYY-MM-DD          1991-01-01
HH:MM (LST)               1:00
Zenith (deg)          99.00000
Azimuth (deg)        -99.00000
ETR (W/m^2)                  0
ETRN (W/m^2)                 0
Glo Mod (W/m^2)              0
Glo Mod Unc (%)              0
Glo Mod Source               1
Dir Mod (W/m^2)              0
Dir Mod Unc (%)              0
Dir Mod Source               1
Dif Mod  (W/m^2)             0
Dif Mod Unc (%)              0
Dif Mod Source               1
Meas Glo (W/m^2)         -9900
Meas Glo Flg                99
Meas Dir (W/m^2)         -9900
Meas Dir Flg                99
Meas Dif (W/m^2)         -9900
Meas Dif Flg                99
TotCC (10ths)                6
Precip Wat (cm)        0.70000
Precip Wat Flg               1
AOD (unitless)         0.10800
AOD Flg                      1
Name: 0, dtype: object

## Preprocess dataframe and select the features

In [82]:
# To preprocess the data frame and caculate the GHI
def preprocess(df):
    # Select columns
    df = df.loc[:, ['YYYY-MM-DD', 'HH:MM (LST)', 'Zenith (deg)', 'Dir Mod (W/m^2)', \
                    'Dif Mod  (W/m^2)', 'Meas Dir (W/m^2)', 'Meas Dif (W/m^2)',
                    'Precip Wat (cm)', 'AOD (unitless)']]
    
    # Convert degree to radian
    df['Zenith (deg)'] = df['Zenith (deg)'].apply(math.radians)
    df = df.rename({'Zenith (deg)': 'Zenith (rad)'}, axis = 1)
    
    # Calculate GHI
    df['GHI'] = df['Dif Mod  (W/m^2)'].astype('float') + \
                df['Dir Mod (W/m^2)']. astype('float') * df['Zenith (rad)'].apply(math.cos)

    return df

In [83]:
dataset_post = preprocess(dataest)
dataset_post.head()

Unnamed: 0,YYYY-MM-DD,HH:MM (LST),Zenith (rad),Dir Mod (W/m^2),Dif Mod (W/m^2),Meas Dir (W/m^2),Meas Dif (W/m^2),Precip Wat (cm),AOD (unitless),GHI
0,1991-01-01,1:00,1.72788,0,0,-9900,-9900,0.7,0.108,0.0
1,1991-01-01,2:00,1.72788,0,0,-9900,-9900,0.7,0.108,0.0
2,1991-01-01,3:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0
3,1991-01-01,4:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0
4,1991-01-01,5:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0


In [84]:
dataset_post.tail()

Unnamed: 0,YYYY-MM-DD,HH:MM (LST),Zenith (rad),Dir Mod (W/m^2),Dif Mod (W/m^2),Meas Dir (W/m^2),Meas Dif (W/m^2),Precip Wat (cm),AOD (unitless),GHI
131491,2005-12-31,20:00,1.72788,0,0,-9900,-9900,2.0,0.049,0.0
131492,2005-12-31,21:00,1.72788,0,0,-9900,-9900,2.0,0.049,0.0
131493,2005-12-31,22:00,1.72788,0,0,-9900,-9900,2.0,0.049,0.0
131494,2005-12-31,23:00,1.72788,0,0,-9900,-9900,2.0,0.049,0.0
131495,2005-12-31,24:00,1.72788,0,0,-9900,-9900,2.0,0.049,0.0


In [85]:
dataset_post['YYYY-MM-DD'] = pd.to_datetime(dataset_post['YYYY-MM-DD'])  
dataset_post = dataset_post.rename({'YYYY-MM-DD':'Datetime'}, axis = 1)
dataset_post.head()

Unnamed: 0,Datetime,HH:MM (LST),Zenith (rad),Dir Mod (W/m^2),Dif Mod (W/m^2),Meas Dir (W/m^2),Meas Dif (W/m^2),Precip Wat (cm),AOD (unitless),GHI
0,1991-01-01,1:00,1.72788,0,0,-9900,-9900,0.7,0.108,0.0
1,1991-01-01,2:00,1.72788,0,0,-9900,-9900,0.7,0.108,0.0
2,1991-01-01,3:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0
3,1991-01-01,4:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0
4,1991-01-01,5:00,1.72788,0,0,-9900,-9900,0.8,0.108,0.0


In [86]:
daily = dataset_post.groupby('Datetime')
daily_data = daily['Datetime', 'GHI', 'Precip Wat (cm)', 'AOD (unitless)'].mean()

In [87]:
daily_data.head()

Unnamed: 0_level_0,GHI,Precip Wat (cm),AOD (unitless)
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1991-01-01,95.99922,0.825,0.10233
1991-01-02,79.88263,1.0,0.10283
1991-01-03,66.10387,1.15417,0.09408
1991-01-04,104.28998,1.1125,0.08858
1991-01-05,92.72703,1.0,0.13192


In [88]:
daily_data = daily_data.rename({'Precip Wat (cm)':'Precip_water', 'AOD (unitless)':'AOD'}, axis = 1)
daily_data.head()

Unnamed: 0_level_0,GHI,Precip_water,AOD
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1991-01-01,95.99922,0.825,0.10233
1991-01-02,79.88263,1.0,0.10283
1991-01-03,66.10387,1.15417,0.09408
1991-01-04,104.28998,1.1125,0.08858
1991-01-05,92.72703,1.0,0.13192


In [92]:
daily_data.index

DatetimeIndex(['1991-01-01', '1991-01-02', '1991-01-03', '1991-01-04',
               '1991-01-05', '1991-01-06', '1991-01-07', '1991-01-08',
               '1991-01-09', '1991-01-10',
               ...
               '2005-12-22', '2005-12-23', '2005-12-24', '2005-12-25',
               '2005-12-26', '2005-12-27', '2005-12-28', '2005-12-29',
               '2005-12-30', '2005-12-31'],
              dtype='datetime64[ns]', name=u'Datetime', length=5479, freq=None)

### Use month as a feature

In [95]:
daily_data.index[100].month

4

In [96]:
daily_data['Month'] = daily_data.index.month

In [97]:
daily_data.head()

Unnamed: 0_level_0,GHI,Precip_water,AOD,Month
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1991-01-01,95.99922,0.825,0.10233,1
1991-01-02,79.88263,1.0,0.10283,1
1991-01-03,66.10387,1.15417,0.09408,1
1991-01-04,104.28998,1.1125,0.08858,1
1991-01-05,92.72703,1.0,0.13192,1


In [100]:
daily_data = daily_data.loc['2001':'2005']

In [101]:
daily_data.head()

Unnamed: 0_level_0,GHI,Precip_water,AOD,Month
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001-01-01,108.83462,0.81667,0.108,1
2001-01-02,103.79,0.9375,0.108,1
2001-01-03,108.79913,0.94583,0.108,1
2001-01-04,109.80042,0.80417,0.108,1
2001-01-05,101.97893,0.7375,0.108,1


In [104]:
p = figure(width=800, height=350, x_axis_type="datetime")

# add renderers
p.line(daily_data.index, daily_data.GHI, color='navy', legend='GHI')
show(p)

In [105]:
p = figure(width=800, height=350, x_axis_type="datetime")

# add renderers
p.line(daily_data.index, daily_data.Precip_water, color='navy', legend='Precip Water')
show(p)

In [106]:
p = figure(width=800, height=350, x_axis_type="datetime")

# add renderers
p.line(daily_data.index, daily_data.AOD, color='navy', legend='Precip Water')
show(p)

In [107]:
daily_data.tail()

Unnamed: 0_level_0,GHI,Precip_water,AOD,Month
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-12-27,29.45757,3.39583,0.108,12
2005-12-28,98.15594,2.27917,0.108,12
2005-12-29,40.87997,1.825,0.108,12
2005-12-30,25.20833,3.0125,0.10596,12
2005-12-31,57.2109,2.3625,0.08587,12


## Create Difference Transformation Data