# Make Weather Data Sets for Generator and Scale

In [1]:
working_dir = "/Volumes/Samsung_T5/WeatherNet_V2"

In [2]:
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import MinMaxScaler

from PIL import Image

import os 
import glob 
import shutil

from tqdm import trange
from tqdm import tqdm

In [3]:
weather_npy_dir = working_dir + "/npy_dataset/weather"

if os.path.exists(weather_npy_dir):
    print("/npy_dataset/weather exists")
else: 
    os.mkdir(weather_npy_dir)

/npy_dataset/weather exists


## Select which weather data to scale

In [4]:
min_data = '90'

df_path = working_dir + "/weather_data_15min/" + min_data + "_min_weather_prep.csv"

data_dir_path = weather_npy_dir + "/" + min_data + "_min"
if os.path.exists(data_dir_path) != True:
    os.mkdir(data_dir_path)

In [5]:
weather_df = pd.read_csv(df_path)
weather_df.index = weather_df["time_stamp"]

In [6]:
labels = weather_df["p_target_6"]
labels.to_csv(data_dir_path + "/" + min_data + "_labels.csv")

In [7]:
weather_df.drop(["time_stamp","time_stamp.1","p_target_6"],axis=1,inplace=True)
weather_df.head()

Unnamed: 0_level_0,Month,Average 60 m temperature,Average 60 m wind speed,Vector-averaged 60 m wind speed,Vector-averaged 60 m wind direction,Standard deviation of 60 m wind direction,Total precipitaion for the period,Estimated heat flux,Estimated friction velocity,Average 10 m temperature,...,Vector-averaged 10 m wind direction,Standard deviation of 10 m wind direction,Average global irrradiation,Average net radiation,Estimated surface roughness length,day_month,Average 10 m vapor pressure,Average 10 m dew point temperature,hour_min,target
time_stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020_01_01_00_15,1,-3.4994,584.35,578.0,263.98,8.4474,0.0,0.0,34.89,-3.3795,...,264.28,11.341,0.0,-52.595,7.61,1,0.418,-5.1122,00_15,0
2020_01_01_00_30,1,-3.5981,622.76,616.97,263.45,7.8146,0.0,0.0,38.53,-3.4822,...,261.12,10.872,0.0,-52.977,9.21,1,0.414,-5.2493,00_30,0
2020_01_01_00_45,1,-3.7045,642.28,636.82,263.51,7.4678,0.0,0.0,44.15,-3.5998,...,264.08,10.863,0.0,-53.749,17.02,1,0.414,-5.2413,00_45,0
2020_01_01_01_00,1,-3.8625,596.84,591.08,262.92,7.961,0.0,0.0,39.93,-3.7582,...,259.55,11.25,0.0,-54.059,14.98,1,0.411,-5.348,01_00,0
2020_01_01_01_15,1,-3.997,559.11,554.5,262.78,7.3519,0.0,0.0,30.41,-3.9303,...,264.2,10.98,0.0,-53.716,4.33,1,0.407,-5.4834,01_15,0


In [8]:
weather_df.loc[weather_df['Estimated heat flux'] > 99990, 'Estimated heat flux'] = 0
weather_df.loc[weather_df['Estimated friction velocity'] > 99990, 'Estimated friction velocity'] = 0
weather_df.loc[weather_df['Estimated surface roughness length'] > 99990, 'Estimated surface roughness length'] = 0

In [9]:
weather_df.dtypes

Month                                          int64
Average 60 m temperature                     float64
Average 60 m wind speed                      float64
Vector-averaged 60 m wind speed              float64
Vector-averaged 60 m wind direction          float64
Standard deviation of 60 m wind direction    float64
Total precipitaion for the period            float64
Estimated heat flux                          float64
Estimated friction velocity                  float64
Average 10 m temperature                     float64
Average 10 m wind speed                      float64
Vector-averaged 10 m wind speed              float64
Vector-averaged 10 m wind direction          float64
Standard deviation of 10 m wind direction    float64
Average global irrradiation                  float64
Average net radiation                        float64
Estimated surface roughness length           float64
day_month                                      int64
Average 10 m vapor pressure                  f

In [10]:
 # One-hot encode categorical variables 
weather_df = pd.concat([weather_df,pd.get_dummies(weather_df['Month'], prefix='month')],axis=1)
weather_df.drop(['Month'],axis=1, inplace=True)

weather_df = pd.concat([weather_df,pd.get_dummies(weather_df['hour_min'], prefix='hour_min')],axis=1)
weather_df.drop(['hour_min'],axis=1, inplace=True)

weather_dff = pd.concat([weather_df,pd.get_dummies(weather_df['day_month'], prefix='day_month')],axis=1)
weather_df.drop(['day_month'],axis=1, inplace=True)

In [11]:
Min_Max_Col = [
'Average 60 m temperature',
'Average 60 m wind speed',
'Vector-averaged 60 m wind speed',
'Vector-averaged 60 m wind direction',
'Standard deviation of 60 m wind direction',
'Total precipitaion for the period',
'Estimated heat flux',
'Estimated friction velocity',
'Average 10 m temperature',
'Average 10 m wind speed',
'Vector-averaged 10 m wind speed',
'Vector-averaged 10 m wind direction',
'Standard deviation of 10 m wind direction',
'Average global irrradiation',
'Average net radiation',
'Estimated surface roughness length',
'Average 10 m vapor pressure',
'Average 10 m dew point temperature',
]

## This is where the split for scaling between train and test occurs
2020_05_24_04_30

In [12]:
train = weather_df.iloc[:-750,:]
test =weather_df.iloc[-750:,:]

In [13]:
train.tail()

Unnamed: 0_level_0,Average 60 m temperature,Average 60 m wind speed,Vector-averaged 60 m wind speed,Vector-averaged 60 m wind direction,Standard deviation of 60 m wind direction,Total precipitaion for the period,Estimated heat flux,Estimated friction velocity,Average 10 m temperature,Average 10 m wind speed,...,hour_min_21_30,hour_min_21_45,hour_min_22_00,hour_min_22_15,hour_min_22_30,hour_min_22_45,hour_min_23_00,hour_min_23_15,hour_min_23_30,hour_min_23_45
time_stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020_05_24_02_00,20.681,578.34,576.3,222.72,4.8162,0.0,0.0,0.0,18.146,317.18,...,0,0,0,0,0,0,0,0,0,0
2020_05_24_02_15,20.658,511.37,506.86,219.24,7.6064,0.0,0.0,0.0,17.633,238.1,...,0,0,0,0,0,0,0,0,0,0
2020_05_24_02_30,20.117,511.43,509.98,216.99,4.2993,0.0,0.0,0.0,17.382,221.41,...,0,0,0,0,0,0,0,0,0,0
2020_05_24_02_45,20.327,491.73,490.39,227.14,4.2243,0.0,0.0,0.0,17.322,203.32,...,0,0,0,0,0,0,0,0,0,0
2020_05_24_03_00,20.003,473.64,472.82,225.32,3.3681,0.0,0.0,0.0,17.364,216.31,...,0,0,0,0,0,0,0,0,0,0


In [14]:
scaler = MinMaxScaler()
scaler.fit(train[Min_Max_Col])

train_scaled = scaler.transform(train[Min_Max_Col])
test_scaled = scaler.transform(test[Min_Max_Col])

In [15]:
train_scaled = pd.DataFrame(train_scaled,columns=Min_Max_Col)
test_scaled = pd.DataFrame(test_scaled,columns=Min_Max_Col)
train_scaled.head()

Unnamed: 0,Average 60 m temperature,Average 60 m wind speed,Vector-averaged 60 m wind speed,Vector-averaged 60 m wind direction,Standard deviation of 60 m wind direction,Total precipitaion for the period,Estimated heat flux,Estimated friction velocity,Average 10 m temperature,Average 10 m wind speed,Vector-averaged 10 m wind speed,Vector-averaged 10 m wind direction,Standard deviation of 10 m wind direction,Average global irrradiation,Average net radiation,Estimated surface roughness length,Average 10 m vapor pressure,Average 10 m dew point temperature
0,0.00017,0.005553,0.005741,0.002639,8.4e-05,0.0,0.191211,0.035361,0.000173,0.004179,0.004096,0.002643,0.000113,0.0,0.161374,0.008981,0.520902,0.464173
1,0.000169,0.005937,0.006131,0.002634,7.8e-05,0.0,0.191211,0.03905,0.000172,0.004428,0.004347,0.002611,0.000109,0.0,0.16095,0.010869,0.520702,0.461127
2,0.000168,0.006132,0.00633,0.002635,7.5e-05,0.0,0.191211,0.044746,0.000171,0.004404,0.004324,0.002641,0.000109,0.0,0.160095,0.020085,0.520702,0.461304
3,0.000166,0.005678,0.005872,0.002629,8e-05,0.0,0.191211,0.040469,0.000169,0.004113,0.004033,0.002595,0.000113,0.0,0.159751,0.017678,0.520552,0.458933
4,0.000165,0.0053,0.005506,0.002627,7.4e-05,0.0,0.191211,0.030821,0.000168,0.004074,0.003998,0.002642,0.00011,0.0,0.160131,0.00511,0.520352,0.455924


In [16]:
pd.options.mode.chained_assignment = None

for i,col in enumerate(Min_Max_Col):
    train.drop([col],axis=1, inplace=True)
train_index = train.index.values
train = train.reset_index(drop=True)

col_1 = list(train.columns)
col_2 = list(train_scaled.columns)
cols = col_2 + col_1

train_new = pd.concat([train_scaled,train],ignore_index=True,axis=1)
train_new.index = train_index
train_new.columns = cols

for i,col in enumerate(Min_Max_Col):
    test.drop([col],axis=1, inplace=True)
test_index = test.index.values
test = test.reset_index(drop=True)

col_1 = list(test.columns)
col_2 = list(test_scaled.columns)
cols = col_2 + col_1

test_new = pd.concat([test_scaled,test],ignore_index=True,axis=1)
test_new.index = test_index
test_new.columns = cols

In [17]:
new_weather_df = pd.concat([train_new,test_new])

In [18]:
new_weather_df.tail()

Unnamed: 0,Average 60 m temperature,Average 60 m wind speed,Vector-averaged 60 m wind speed,Vector-averaged 60 m wind direction,Standard deviation of 60 m wind direction,Total precipitaion for the period,Estimated heat flux,Estimated friction velocity,Average 10 m temperature,Average 10 m wind speed,...,hour_min_21_30,hour_min_21_45,hour_min_22_00,hour_min_22_15,hour_min_22_30,hour_min_22_45,hour_min_23_00,hour_min_23_15,hour_min_23_30,hour_min_23_45
2020_05_31_21_30,0.000346,0.005227,0.005475,0.001152,2.2e-05,0.0,0.191211,0.0,0.000335,0.002597,...,1,0,0,0,0,0,0,0,0,0
2020_05_31_21_45,0.000345,0.005153,0.0054,0.001195,2.6e-05,0.0,0.191211,0.0,0.000334,0.002696,...,0,1,0,0,0,0,0,0,0,0
2020_05_31_22_00,0.000345,0.005088,0.005333,0.001261,3e-05,0.0,0.191211,0.0,0.000333,0.00243,...,0,0,1,0,0,0,0,0,0,0
2020_05_31_22_15,0.000346,0.005372,0.005617,0.001257,3e-05,0.0,0.191211,0.0,0.000334,0.002657,...,0,0,0,1,0,0,0,0,0,0
2020_05_31_22_30,0.000345,0.005334,0.005577,0.001245,3.1e-05,0.0,0.191211,0.0,0.000334,0.00269,...,0,0,0,0,1,0,0,0,0,0


In [19]:
new_weather_df.to_csv(data_dir_path+'/scaled_' + min_data + '_min_weather.csv')