# Solar Forecast with LSTM
# Part 1: Data Prep

In [None]:
import pandas as pd
import os 

In [None]:
working_dir = '/Users/rick/Desktop/Argonne_Summer_Work/WeatherNet/Weather_LSTM'

## Make Holistic Weather Data

In [None]:
jan_df = pd.read_csv(working_dir+"/weather_argonne_01_2020.csv")
feb_df = pd.read_csv(working_dir+"/weather_argonne_02_2020.csv")
mar_df = pd.read_csv(working_dir+"/weather_argonne_03_2020.csv")
apr_df = pd.read_csv(working_dir+"/weather_argonne_04_2020.csv")

In [None]:
jan_col = list(jan_df.columns)
feb_col = list(feb_df.columns)
mar_col = list(mar_df.columns)
apr_col = list(apr_df.columns)

if jan_col == feb_col == mar_col == apr_col: 
    print('All column names are the same.')    

In [None]:
df = pd.concat([jan_df,feb_df,mar_df,apr_df],axis=0) 
df.dropna(inplace=True)
df = df.reset_index(drop=True)
df.head()

In [None]:
df.tail()

In [None]:
new_col_names = [name.replace("\n"," ") for i ,name in enumerate(df.columns)]
df.columns = new_col_names
df.columns

In [None]:
df.dtypes

In [None]:
col_to_float = [
'Average 60 m wind direction (scalar) (units: deg)',
'Average 60 m wind speed (scalar)  (units: m/s)',
'Standard deviation of 60 m wind direction (units: deg)',
'Average 60 m temperature  (units:deg C)',
'Average 10 m wind direction (scalar) (units: deg)',
'Average 10 m wind speed (scalar) (units: m/s)',
'Standard deviation of 10 m wind direction (units: deg)',
'Average 10 m temperature (units: deg C)',
'Average dew point temperature (units: deg C) ',
'Average relative humidity (units: %)',
'Average temperature difference/100m (units: deg C/100m)',
'Average solar radiation (units: Watts/m**2)',
'Average net radiation (units: Watts/m**2)',
'Average barometric pressure (units: kPa)',
'Average water vapor pressure (units: kPa)',
'Average 10 cm soil temperature (units: deg C)',
'Average 100 cm soil temperature (units: deg C)']

In [None]:
def remove_whitespace(x):
    """
    Helper function to remove any blank space from a string
    x: a string
    """
    try:
        # Remove spaces inside of the string
        x = "".join(x.split())

    except:
        pass
    return x

In [None]:
for i,col_name in enumerate(col_to_float):
    if df[col_name].dtype == "O":
        df[col_name] = df[col_name].apply(remove_whitespace)
        df[col_name] = pd.to_numeric(df[col_name],errors='coerce')

In [None]:
df.dtypes

In [None]:
df.drop(['Pasquill stability class '],axis=1, inplace=True)

In [None]:
solar_values = df["Average solar radiation (units: Watts/m**2)"].values
new_solar_values = []
for i,value in enumerate(solar_values):
    if value < .3:
        new_solar_values.append(0)
    elif value < 111.6:
        new_solar_values.append(1)
    else: 
        new_solar_values.append(2)
df["target"] = new_solar_values
df.head()

In [None]:

switch_hour = {"30.0": "00",
               "130.0": "01",
               "230.0": "02",
               "330.0": "03",
               "430.0": "04",
                "530.0": "05",
               "630.0": "06",
               "730.0":"07",
               "830.0":"08",
               "930.0":"09",
               "1030.0":"10",
               "1130.0":"11",
               "1230.0":"12",
               "1330.0":"13",
               "1430.0":"14",
               "1530.0":"15",
               "1630.0":"16",
               "1730.0":"17",
               "1830.0":"18",
               "1930.0":"19",
               "2030.0":"20",
               "2130.0":"21",
               "2230.0":"22",
               "2330.0":"23",
               "2400.0":"24",
              }

switch_day = {"1":"01",
              "2":"02",
              "3":"03",
              "4":"04",
              "5":"05",
              "6":"06",
              "7":"07",
              "8":"08",
              "9":"09",  
              "10":"10", 
              "11":"11", 
              "12":"12", 
              "13":"13", 
              "14":"14", 
              "15":"15", 
              "16":"16", 
              "17":"17", 
              "18":"18", 
              "19":"19", 
              "20":"20", 
              "21":"21", 
              "22":"22", 
              "23":"23",
              "24":"24",
              "25":"25",
              "26":"26",
              "27":"27",
              "28":"28",
              "29":"29",
              "30":"30",
              "31":"31",
}

switch_month = {"1.0":"01",
                "2.0":"02",
                "3.0":"03",
                "4.0":"04",
}

In [None]:
df.tail()

In [None]:
Months = []
[Months.append(switch_month.get(str(month))) for i,month in enumerate(df["Month"].values)]
df["Month"] = Months
    
days = []
[days.append(switch_day.get(str(day))) for i,day in enumerate(df["Day of the month"].values)]
df["Day of the month"] = days

hours = []
[hours.append(switch_hour.get(str(hour))) for i,hour in enumerate(df["Mid averaging-period time"].values)]
df["Mid averaging-period time"] = hours

In [None]:
days = df["Day of the month"].values
months = df["Month"].values
hours = df["Mid averaging-period time"].values

In [None]:
dates = []
for i in range(len(days)):
    date = "2020_" + months[i] + "_" + days[i] + "_" + hours[i]
    dates.append(date)
df["date"] = dates

In [None]:
df.head()

In [None]:
df = pd.concat([df,pd.get_dummies(df['Mid averaging-period time'], prefix='hour')],axis=1)
df.drop(['Mid averaging-period time'],axis=1, inplace=True)
df.drop(['Day of the month'],axis=1, inplace=True)
df.drop(['Month'],axis=1, inplace=True)
df.drop(['Year'],axis=1, inplace=True)

In [None]:
df.head()

In [None]:
p_targets = df["target"].values
p_targets = p_targets[1:]
df = df.iloc[:-1,:]
df["p_target"] = p_targets

In [None]:
df.columns

In [None]:
# Add lag features
col_to_lag = ['Average 60 m wind direction (scalar) (units: deg)',
       'Average 60 m wind speed (scalar)  (units: m/s)',
       'Standard deviation of 60 m wind direction (units: deg)',
       'Average 60 m temperature  (units:deg C)',
       'Average 10 m wind direction (scalar) (units: deg)',
       'Average 10 m wind speed (scalar) (units: m/s)',
       'Standard deviation of 10 m wind direction (units: deg)',
       'Average 10 m temperature (units: deg C)',
       'Average dew point temperature (units: deg C) ',
       'Average relative humidity (units: %)',
       'Average temperature difference/100m (units: deg C/100m)',
       'Total precipitation (units: mm)',
       'Average solar radiation (units: Watts/m**2)',
       'Average net radiation (units: Watts/m**2)',
       'Average barometric pressure (units: kPa)',
       'Average water vapor pressure (units: kPa)',
       'Average 10 cm soil temperature (units: deg C)',
       'Average 100 cm soil temperature (units: deg C)',
       'Average 10 ft soil temperature (units: deg C)', 'target',]

for i, col_name in enumerate(col_to_lag):
    for j in range(1,13):
        df[col_name + "_" +str(j)] = df[col_name].shift(j)

In [None]:
for i ,name in enumerate(df.columns):
    print(name)

In [None]:
df.dropna(inplace=True)
df.index = df["date"]
df.head(15)

In [None]:
df.to_csv("Argonne_Weather_All.csv")