### This notebook deals with neural networks to forecast solar irradiation in a specific city -- Prof. André Marques -- Feb24 -- FIAP

### Input_parameters

In [223]:
file_path = 'bar.csv' # input data source label
forecast_number = 30 # number of days to consider in the forecasting
file_output = 'sgc_30dA.csv' # name of the output file
station = 'Barcelos_30d' # name of the city

### Import_libraries

In [224]:
import pandas as pd # dataframe tool handler
import numpy as np # numeric tool handler
import matplotlib.pyplot as plt # figure plot function
import seaborn as sns # figure plot function
import time # to compute processing time
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split # for dataset division
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import gc # to clean up the local memory
from pylab import rcParams # for figure plot

### Functions_definition

In [225]:
def calc_metrics(a,b):
    # function to compute some errors based on a and b
    # a = reference list or array
    # b = forecast list or array
    return {'mae' : mean_absolute_error(a, b),
            'rmse' : mean_squared_error(a, b) ** 0.5,
            'r2' : r2_score(a,b),
            'mae' : mean_absolute_error(a, b)*100}
sc1 = MinMaxScaler(feature_range=(0,1)) # math data transformation - values between 0 and 1

In [226]:
def mape(y_true, y_pred): 
    # Mean Absolute Percentage Error (MAPE)
    # y_true: reference value
    # y_pred: forecast value
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [227]:
# Function to Calculate Result Metrics
def result_metrics_forecast(test_series,forecast_series,model_name):
    # model_name: math model used for forecast
    print('Result Metrics for ' +  str(model_name))
    print('R2 Score : ',round(r2_score(test_series,forecast_series),3))
    print('Mean Squared Error : ',round(mean_squared_error(test_series,forecast_series),3))
    print('Mean Absolute Error : ',round(mean_absolute_error(test_series,forecast_series),3))

In [228]:
def plot_result(test_series,forecast_series,model_name):
    # function to plot a figure with results
    fig = plt.figure(figsize=(20,10))
    sns.set(font_scale = 1.5) # size of the font
    plt.plot (test_series.index,test_series,label='Actual')
    plt.plot (test_series.index,forecast_series,label='Predicted')
    plt.title(str(model_name) + ' - Forecasting'+' '+station)
    plt.ylabel('ALLSKy_SFC_SW_DWN - kW.h/m² (day)')
    plt.xlabel('Time - Day')
    plt.grid(True)
    plt.legend()
    plt.show()

In [229]:
# Function to compute statistics based on two reference lists
# a = original target, b = predicted_standard, c = predicted_reconv
def reconv (a,b):
    mean = np.mean(a)
    stdv = np.std(a)
    c = b * stdv + mean
    c = round(c,3)
    return c

In [230]:
# Function to evalute the overfitting
def ovft(a,b,c,d):
    o_train = mean_squared_error(a,b)
    o_test = mean_squared_error (c,d)
    if o_train > 1.25 * o_test:
        print ('No overfitting concern')
    else:
        print ('Overfitting demands attention')

In [231]:
# Function to compute the number of a specific value or position
def data_count(df):
    # df = df.set_index("date_m", inplace = True)
    for a in df.columns:
        b =-999.0
        print(a,df[a].value_counts()[b])
    #df = df.reset_index(inplace = True)

In [232]:
# Function to replace or substitute a specific value
def data_replace(df):
    # df = df.set_index("date_m", inplace = True)
    for a in df.columns:
        z = df[a].median()
        print(a,z)
        b =-999.0
        df[a] = df[a].replace(b,z)
    # df = df.reset_index(inplace = True)

### Plot_parameter

In [233]:
rcParams['axes.labelsize'] = 12
rcParams['xtick.labelsize'] = 12
rcParams['ytick.labelsize'] = 12
plt.style.context('fivethirtyeight')

<contextlib._GeneratorContextManager at 0x21dea07a208>

In [234]:
gc.collect() # memory refresh

80

### Data_preparation

In [235]:
df = pd.read_csv(file_path)
df[['date_m']] = df[['date_m']].apply(pd.to_datetime)
df['Year'] = df['date_m'].dt.year
df['Month'] = df['date_m'].dt.month

In [236]:
df.head(2)

Unnamed: 0,level_0,index,date_m,ALLSKY_SFC_SW_DWN,ALLSKY_KT,T2M,PRECTOTCORR,RH2M,PS,WS10M,WD10M,Year,Month
0,0,0,2013-01-01,4.09,0.41,25.98,5.67,92.12,100.02,0.59,100.88,2013,1
1,1,1,2013-01-02,4.29,0.43,25.44,11.37,95.5,100.0,0.59,219.19,2013,1


In [237]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3741 entries, 0 to 3740
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   level_0            3741 non-null   int64         
 1   index              3741 non-null   int64         
 2   date_m             3741 non-null   datetime64[ns]
 3   ALLSKY_SFC_SW_DWN  3741 non-null   float64       
 4   ALLSKY_KT          3741 non-null   float64       
 5   T2M                3741 non-null   float64       
 6   PRECTOTCORR        3741 non-null   float64       
 7   RH2M               3741 non-null   float64       
 8   PS                 3741 non-null   float64       
 9   WS10M              3741 non-null   float64       
 10  WD10M              3741 non-null   float64       
 11  Year               3741 non-null   int64         
 12  Month              3741 non-null   int64         
dtypes: datetime64[ns](1), float64(8), int64(4)
memory usage: 380.1 

In [256]:
df.describe()

Unnamed: 0,level_0,index,ALLSKY_SFC_SW_DWN,ALLSKY_KT,T2M,PRECTOTCORR,RH2M,PS,WS10M,WD10M,Year,Month
count,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0,3741.0
mean,1870.0,1870.0,4.826538,0.484542,26.205825,5.883708,89.148161,100.102163,0.719244,130.052876,2017.631115,6.415664
std,1080.078006,1080.078006,1.147928,0.10692,1.365059,7.264232,7.838259,0.166393,0.123436,51.717751,2.959122,3.479452
min,0.0,0.0,0.8,0.08,23.21,0.0,43.56,99.6,0.31,23.25,2013.0,1.0
25%,935.0,935.0,4.16,0.43,25.43,0.99,88.25,99.99,0.63,95.5,2015.0,3.0
50%,1870.0,1870.0,4.96,0.48,25.93,3.69,91.31,100.1,0.72,116.56,2018.0,6.0
75%,2805.0,2805.0,5.68,0.56,26.51,8.08,93.56,100.21,0.8,154.88,2020.0,9.0
max,3740.0,3740.0,7.14,0.71,34.06,94.75,99.0,100.64,1.16,332.44,2023.0,12.0


In [257]:
df1 = df
df1 = df1.set_index('date_m')
df1 = df1[['ALLSKY_SFC_SW_DWN', 'ALLSKY_KT','T2M','PRECTOTCORR','RH2M','PS','WS10M','WD10M','Year','Month']]

In [258]:
df1.head(2)

Unnamed: 0_level_0,ALLSKY_SFC_SW_DWN,ALLSKY_KT,T2M,PRECTOTCORR,RH2M,PS,WS10M,WD10M,Year,Month
date_m,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-01-01,4.09,0.41,25.98,5.67,92.12,100.02,0.59,100.88,2013,1
2013-01-02,4.29,0.43,25.44,11.37,95.5,100.0,0.59,219.19,2013,1


In [259]:
data_replace(df1)

ALLSKY_SFC_SW_DWN 4.96
ALLSKY_KT 0.48
T2M 25.93
PRECTOTCORR 3.69
RH2M 91.31
PS 100.1
WS10M 0.72
WD10M 116.56
Year 2018.0
Month 6.0


### Train_test_split

In [260]:
a = forecast_number
train = int(len(df) - a)
test = a
print(len(df), train,test)

3741 3711 30


In [261]:
train = df1.iloc[0:3711, :]
test = df1.iloc[3711:, :]

In [262]:
test.tail(2)

Unnamed: 0_level_0,ALLSKY_SFC_SW_DWN,ALLSKY_KT,T2M,PRECTOTCORR,RH2M,PS,WS10M,WD10M,Year,Month
date_m,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-03-29,4.96,0.48,25.93,3.69,91.31,100.1,0.72,116.56,2023,3
2023-03-30,4.96,0.48,25.93,3.69,91.31,100.1,0.72,116.56,2023,3
