In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [23]:
neerslag = pd.read_csv('2015-2017_neerslaggegevens_debilt_locatie_A.csv',
                      index_col = '# DTG', parse_dates = True)
neerslag = neerslag.drop(['LATITUDE', 'LONGITUDE', 'ALTITUDE'], axis = 1)
neerslag.columns

Index(['DR_PWS_10', 'DR_REGENM_10', 'WW_COR_10', 'RI_PWS_10', 'RI_REGENM_10'], dtype='object')

In [24]:
vocht_temp = pd.read_csv('2015-2017_vochtigheid_en_temperatuur_de_bilt.csv',
                        index_col = '# DTG', parse_dates = True)
vocht_temp = vocht_temp.drop(['LATITUDE', 'LONGITUDE', 'ALTITUDE'], axis = 1)
vocht_temp.columns

Index(['U_BOOL_10', 'T_DRYB_10', 'TN_10CM_PAST_6H_10', 'T_DEWP_10',
       'T_DEWP_SEA_10', 'T_DRYB_SEA_10', 'TN_DRYB_10', 'T_WETB_10',
       'TX_DRYB_10', 'U_10', 'U_SEA_10'],
      dtype='object')

In [25]:
weer_druk = pd.read_csv('2015-2017_weer_en_luchtdruk_de_bilt_testlocatie_A.csv',
                        index_col = '# DTG', parse_dates = True)
weer_druk = weer_druk.drop(['LATITUDE', 'LONGITUDE', 'ALTITUDE'], axis = 1)
weer_druk.columns

Index(['P_NAP_MSL_10', 'P_STN_LEVEL_10', 'P_SENSOR_10', 'VV_10',
       'WW_IND_CURR_10', 'WW_IND_PAST_10_10', 'WW_CURR_10', 'WW_PAST_10',
       'AH_10', 'MOR_10'],
      dtype='object')

In [26]:
wind = pd.read_csv('2015-2017_windgegevens_debilt_locatie_A.csv',
                        index_col = '# DTG', parse_dates = True)
wind = wind.drop(['LATITUDE', 'LONGITUDE', 'ALTITUDE'], axis = 1)
wind.columns

Index(['FF_10M_10', 'DD_10', 'DDN_10', 'DD_STD_10', 'DDX_10', 'FF_SENSOR_10',
       'FF_10M_STD_10', 'FX_10M_10', 'FX_10M_MD_10', 'FX_SENSOR_10',
       'FX_SENSOR_MD_10', 'SQUALL_10'],
      dtype='object')

In [27]:
zonneschijn = pd.read_csv('2015-2017_zonneschijnduur_en_straling_debilt.csv',
                        index_col = '# DTG', parse_dates = True)
zonneschijn = zonneschijn.drop(['LATITUDE', 'LONGITUDE', 'ALTITUDE'], axis = 1)
zonneschijn.columns

Index(['Q_GLOB_10', 'QN_GLOB_10', 'QX_GLOB_10', 'SQ_10'], dtype='object')

In [28]:
features = neerslag.join([vocht_temp, weer_druk, wind, zonneschijn])

In [29]:
features_15T = features.resample('15T').mean()
features_1T = features.resample('1T').mean()

In [30]:
features_15T = features_15T.interpolate(limit = 4)
features_1T = features_1T.interpolate(limit = 60)

In [31]:
def drop_vars(df):
    # neerslaggegevens
    df = df.drop('DR_PWS_10', axis=1)
    df = df.drop('WW_COR_10', axis=1)
    df = df.drop('RI_PWS_10', axis=1)
    df = df.drop('SQUALL_10', axis=1)
    
    # vocht & temp
    df = df.drop('T_DEWP_SEA_10', axis=1)
    df = df.drop('T_DRYB_SEA_10', axis=1)
    df = df.drop('U_SEA_10', axis=1)

    # weer & luchtdruk
    df = df.drop('P_STN_LEVEL_10', axis=1)
    df = df.drop('P_SENSOR_10', axis=1)
    df = df.drop('WW_IND_CURR_10', axis=1)
    df = df.drop('WW_IND_PAST_10_10', axis=1)
    df = df.drop('WW_CURR_10', axis=1)
    df = df.drop('WW_PAST_10', axis=1)
    
    # wind
    df = df.drop('FX_10M_10', axis=1)
    df = df.drop('FX_10M_MD_10', axis=1)
    df = df.drop('FX_SENSOR_MD_10', axis=1)
    return df

features = drop_vars(features)
features_1T = drop_vars(features_1T)
features_15T = drop_vars(features_15T)

In [32]:
features.to_csv('features.csv', sep = ',')
features_15T.to_csv('features_resample-15T_interpolate-4.csv', sep = ',')
features_1T.to_csv('features_resample-1T_interpolate-60.csv', sep = ',')

In [33]:
features_15T = pd.read_csv('features_resample-15T_interpolate-4.csv',
                       index_col = '# DTG',
                       parse_dates = True)
features_1T = pd.read_csv('features_resample-1T_interpolate-60.csv',
                       index_col = '# DTG',
                       parse_dates = True)
pv_1T = pd.read_csv('all_data_resample-1T_interpolate-60.csv', 
                    index_col = 0,
                    parse_dates = True)
pv_15T = pd.read_csv('all_data_resample-15T_interpolate-4.csv', 
                    index_col = 0,
                    parse_dates = True)

In [34]:
pv_1T = pv_1T.tz_localize(None)
pv_15T = pv_15T.tz_localize(None)
all_data_1T = pv_1T.join([features_1T])
all_data_15T = pv_15T.join([features_15T])
all_data_1T.to_csv('all_data_1T.csv', sep = ',')
all_data_15T.to_csv('all_data_15T.csv', sep = ',')