In [1]:
import numpy as np
import pandas as pd 
#import matplotlib.pyplot as plt
#import seaborn as sns
pd.set_option('max_columns', 50)
####model libraries ####################
from sklearn.linear_model import Ridge, Lasso,SGDRegressor,LinearRegression
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor,AdaBoostRegressor,RandomForestRegressor, ExtraTreesRegressor
from sklearn.tree import ExtraTreeRegressor,DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
#########################################
from sklearn.cross_decomposition import PLSRegression

from sklearn.metrics import mean_squared_error, mean_absolute_error,make_scorer,r2_score,mean_absolute_percentage_error,explained_variance_score
from sklearn.preprocessing import MinMaxScaler

#### visulization #####
import plotly.graph_objects as go



In [2]:
#df = pd.ExcelFile('C:/Users/halil/Desktop/soalr_data.xlsx').parse('sheet 1')
weather_raw = pd.read_excel('C:/Users/halil/Desktop/soalr_data.xlsx',sheet_name="weather")
guneko_raw = pd.read_excel('C:/Users/halil/Desktop/soalr_data.xlsx',sheet_name="1000255-GUNEKO")

In [3]:
guneko_production = guneko_raw[["Date","Production"]]
guneko_gti = guneko_raw[["Date.1","GTI"]]


weather_guneko = weather_raw.loc[weather_raw.name ==1000255]
ali = pd.merge(guneko_production,weather_guneko,left_on="Date",right_on="date")
dataset = ali.drop(["name","date","lat","lon"],axis=1)

In [4]:
conversion = dataset["Date"].astype(str)

dataset["year"] = conversion.apply(lambda x:int(x[:4]))
dataset["month"] = conversion.apply(lambda x:int(x[5:7]))
dataset["day"] = conversion.apply(lambda x:int(x[8:10]))
dataset["hour"] = conversion.apply(lambda x:int(x[11:13]))
onehot_encoded = dataset.drop(["Date"],axis=1)

In [5]:

def metrics(y_train1,y_train_pred1,y_test1,y_test_pred1):
    print("for train data, mean squared error is",mean_squared_error(y_train1, y_train_pred1))
    print("for train data, mean absolute error is:",mean_absolute_error(y_train1, y_train_pred1))
    print("for train data, mean absolute percentage error is",mean_absolute_percentage_error(y_train1, y_train_pred1),"\n")

    print("for test data, mean squared error is",mean_squared_error(y_test1, y_test_pred1))
    print("for test data, mean absolute error is:",mean_absolute_error(y_test1, y_test_pred1))
    print("for test data, mean absolute percentage error is",mean_absolute_percentage_error(y_test1, y_test_pred1),"\n")


In [6]:
#normalization in range (-1,1)
def normalize(df,column_list):
    for col in column_list:
        feature_range = (0,1)
        min_max_scaler = MinMaxScaler(feature_range=feature_range)

        df[col] = min_max_scaler.fit_transform(df[col].values.reshape(-1,1))

    return df

In [7]:
def cal_model_error(x_train,x_test,y_train,y_test,model_name):
    model = model_name() 
    model.fit(x_train,y_train)
    train_prediction = model.predict(x_train)
    test_prediction = model.predict(x_test)

    return round(mean_squared_error(y_train, train_prediction),3), round(mean_squared_error(y_test, test_prediction),3)
    #return mean_squared_error(y_train, train_prediction), mean_squared_error(y_test, test_prediction)


In [8]:
def make_data(dataframe):
    train = dataframe[:len(dataframe)- 24*5]
    test = dataframe[len(dataframe)- 24*5:]

    x_train = train.copy()
    x_test = test.copy()

    y_train = x_train.pop('Production')
    y_test = x_test.pop('Production')

    return x_train, x_test, y_train, y_test 

In [9]:
def get_year(whole_data,target_year):

    whole_data = whole_data.loc[whole_data.year== target_year]
    whole_data = whole_data.drop(["year"],axis=1)
    whole_data = whole_data.reset_index(drop=True)

    return whole_data

In [10]:
def get_months(target_data):
    months = []

    for i in range(1,13):
        month_data = target_data.loc[target_data["month"]==i].copy()
        aw1 = month_data.reset_index(drop=True)
        aw1 = aw1.drop(["month"],axis=1)
        months.append(aw1)

    return months

In [11]:
#x_train, x_test, y_train, y_test = make_data(data_months[0])

In [12]:
#error calculate method with only one function , dynamically but can give error 

def calculate_errors(month_data):
    errors = []
    ridge,lasso,decisiontree,kneighbors,linear,randomforest,xgb,adaboost,neuralnetwork= 0,0,0,0,0,0,0,0,0
    model_list = [ridge,lasso,decisiontree,kneighbors,linear,randomforest,xgb,adaboost,neuralnetwork]

    models = [Ridge,Lasso,DecisionTreeRegressor,KNeighborsRegressor,LinearRegression,RandomForestRegressor,XGBRegressor,AdaBoostRegressor,MLPRegressor,MLPRegressor]
    
    for i in range(0,12):
        x_train, x_test, y_train, y_test = make_data(month_data[i])

        
        for m in range(0,9):
            model_list[m] = cal_model_error(x_train, x_test, y_train, y_test,models[m])

        errors.append({f"ridge":model_list[0],"lasso":model_list[1],"decisiontree":model_list[2],"kneighbors":model_list[3],
        "linear":model_list[4],"randomforest":model_list[5],"xgb":model_list[6],"adaboost":model_list[7],"neuralnetwork":model_list[8]})
    return errors

In [13]:
#error calculation method that passing different functions for each model
"""def calculate_errors(month_data):
    errors = []
    for i in range(0,12):
        x_train, x_test, y_train, y_test = make_data(month_data[i])

        ridge = calculate_ridge(x_train, x_test, y_train, y_test)
        lasso = calculate_lasso(x_train, x_test, y_train, y_test)
        decisiontree = calculate_decisiontree(x_train, x_test, y_train, y_test)
        kneighbors = calculate_kneighbors(x_train, x_test, y_train, y_test)
        linear= calculate_linear_reg(x_train, x_test, y_train, y_test)
        randomforest = calculate_randomforest(x_train, x_test, y_train, y_test)
        xgb =calculate_xgb(x_train, x_test, y_train, y_test)

        errors.append({f"ridge":ridge,"lasso":lasso,"decisiontree":decisiontree,
                "kneighbors":kneighbors,"linear":linear,"randomforest":randomforest,"xgb":xgb})
    return errors"""

'def calculate_errors(month_data):\n    errors = []\n    for i in range(0,12):\n        x_train, x_test, y_train, y_test = make_data(month_data[i])\n\n        ridge = calculate_ridge(x_train, x_test, y_train, y_test)\n        lasso = calculate_lasso(x_train, x_test, y_train, y_test)\n        decisiontree = calculate_decisiontree(x_train, x_test, y_train, y_test)\n        kneighbors = calculate_kneighbors(x_train, x_test, y_train, y_test)\n        linear= calculate_linear_reg(x_train, x_test, y_train, y_test)\n        randomforest = calculate_randomforest(x_train, x_test, y_train, y_test)\n        xgb =calculate_xgb(x_train, x_test, y_train, y_test)\n\n        errors.append({f"ridge":ridge,"lasso":lasso,"decisiontree":decisiontree,\n                "kneighbors":kneighbors,"linear":linear,"randomforest":randomforest,"xgb":xgb})\n    return errors'

In [14]:
def dataframe_maker(errors):    
    aylar = ["January","February","March","April","May","June","July","August","September","October","November","December"]

    ridge_train, ridge_test = [] , []
    lasso_train, lasso_test= [], []
    decisiontree_train, decisiontree_test = [], []
    kneighbors_train, kneighbors_test = [], []
    linear_train, linear_test = [], []
    randomforest_train, randomforest_test = [], []
    xgb_train, xgb_test = [], []
    adaboost_train, adaboost_test = [], []
    neuralnetwork_train, neuralnetwork_test = [], []
    for i in errors:
        ridge_train.append(i["ridge"][0])
        ridge_test.append(i["ridge"][1])

        lasso_train.append(i["lasso"][0])
        lasso_test.append(i["lasso"][1])

        decisiontree_train.append(i["decisiontree"][0])
        decisiontree_test.append(i["decisiontree"][1])

        kneighbors_train.append(i["kneighbors"][0])
        kneighbors_test.append(i["kneighbors"][1])

        linear_train.append(i["linear"][0])
        linear_test.append(i["linear"][1])

        randomforest_train.append(i["randomforest"][0])
        randomforest_test.append(i["randomforest"][1])
        
        xgb_train.append(i["xgb"][0])
        xgb_test.append(i["xgb"][1])

        adaboost_train.append(i["adaboost"][0])
        adaboost_test.append(i["adaboost"][1])

        neuralnetwork_train.append(i["neuralnetwork"][0])
        neuralnetwork_test.append(i["neuralnetwork"][1])
        

    seri5 = pd.Series(aylar)
    train_frames = {"months":seri5,"ridge":ridge_train,"lasso":lasso_train,"decisiontree":decisiontree_train,"kneighbors":kneighbors_train,
                    "linear":linear_train,"randomforest":randomforest_train,"xgb": xgb_train,"adaboost":adaboost_train,"neuralnetwork": neuralnetwork_train}      

    test_frames = {"months":seri5,"ridge":ridge_test,"lasso":lasso_test,"decisiontree":decisiontree_test,"kneighbors":kneighbors_test,
                    "linear":linear_test,"randomforest":randomforest_test,"xgb": xgb_test,"adaboost":adaboost_test,"neuralnetwork":neuralnetwork_test}  

    train_dataframe = pd.DataFrame(train_frames)
    test_dataframe = pd.DataFrame(test_frames)
    return train_dataframe, test_dataframe    

In [15]:
def plot_resulst(train, test, model_name):
    
    fig = go.Figure(layout_yaxis_range=[0,0.05])
    fig.add_trace(go.Bar(x=test["months"],y=train[model_name],name=f'Train Errors', marker_color='indianred',text=train[model_name],textposition="outside"))

    fig.add_trace(go.Bar(x=test["months"], y=test[model_name], name=f'Test Errors',marker_color='lightsalmon',text=test[model_name],textposition="outside"))

    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(barmode='group', xaxis_tickangle=-45,title=f"{model_name} model error results")
    fig.show()

In [16]:
onehot_encoded

Unnamed: 0,Production,temperature,cloud cover,global_rad:W,diffuse_rad:W,direct_rad:W,relative_humidity_2m:p,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C,sun_elevation:d,year,month,day,hour
0,0.0,6.1,32.7,0.0,0.0,0.0,94.5,0.5,1.0,6.1,-74.4,2020,1,1,0
1,0.0,5.1,24.1,0.0,0.0,0.0,97.6,0.9,1.0,5.1,-74.2,2020,1,1,1
2,0.0,4.3,17.5,0.0,0.0,0.0,98.6,1.0,1.0,4.3,-65.7,2020,1,1,2
3,0.0,3.9,12.0,0.0,0.0,0.0,98.6,0.9,1.0,3.9,-55.2,2020,1,1,3
4,0.0,2.4,81.7,0.0,0.0,0.0,100.0,1.0,1.0,2.4,-43.4,2020,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19031,410.0,8.9,82.2,573.4,298.2,275.2,40.3,3.7,1.0,12.5,46.9,2022,3,22,11
19032,0.0,10.1,86.2,645.7,317.3,328.4,37.6,3.5,1.0,16.5,52.2,2022,3,22,12
19033,0.0,10.7,96.5,526.3,333.2,193.1,38.0,3.2,1.0,15.6,52.7,2022,3,22,13
19034,0.0,10.4,99.2,361.3,293.1,68.2,38.9,2.4,1.0,13.8,48.3,2022,3,22,14


In [17]:
normalized = normalize(onehot_encoded,onehot_encoded.columns[0:11])
data_2021 = get_year(normalized,2021)


In [18]:
data_months = get_months(data_2021)
errors = calculate_errors(data_months)
train_errors,test_errors = dataframe_maker(errors)

In [19]:
train_errors

Unnamed: 0,months,ridge,lasso,decisiontree,kneighbors,linear,randomforest,xgb,adaboost,neuralnetwork
0,January,0.009,0.042,0.0,0.002,0.009,0.001,0.0,0.004,0.009
1,February,0.011,0.06,0.0,0.004,0.011,0.001,0.0,0.006,0.016
2,March,0.01,0.067,0.0,0.004,0.009,0.001,0.0,0.006,0.009
3,April,0.015,0.096,0.0,0.007,0.013,0.001,0.0,0.011,0.014
4,May,0.009,0.136,0.0,0.003,0.009,0.001,0.0,0.006,0.007
5,June,0.01,0.139,0.0,0.003,0.009,0.001,0.0,0.004,0.011
6,July,0.017,0.119,0.0,0.003,0.017,0.001,0.0,0.013,0.015
7,August,0.012,0.117,0.0,0.003,0.012,0.001,0.0,0.007,0.008
8,September,0.009,0.124,0.0,0.001,0.008,0.001,0.0,0.003,0.006
9,October,0.008,0.112,0.0,0.002,0.008,0.0,0.0,0.002,0.009


In [20]:
test_errors

Unnamed: 0,months,ridge,lasso,decisiontree,kneighbors,linear,randomforest,xgb,adaboost,neuralnetwork
0,January,0.007,0.03,0.01,0.02,0.007,0.006,0.008,0.006,0.007
1,February,0.006,0.106,0.002,0.002,0.006,0.001,0.002,0.006,0.016
2,March,0.016,0.117,0.023,0.106,0.016,0.015,0.017,0.019,0.025
3,April,0.022,0.11,0.024,0.032,0.024,0.016,0.018,0.025,0.028
4,May,0.012,0.126,0.021,0.015,0.012,0.011,0.014,0.012,0.03
5,June,0.02,0.118,0.022,0.016,0.019,0.017,0.016,0.026,0.031
6,July,0.007,0.133,0.001,0.003,0.007,0.001,0.001,0.017,0.015
7,August,0.006,0.122,0.01,0.003,0.006,0.001,0.003,0.006,0.007
8,September,0.008,0.13,0.004,0.003,0.007,0.001,0.001,0.004,0.015
9,October,0.017,0.069,0.017,0.015,0.059,0.012,0.012,0.013,0.015


In [21]:
plot_resulst(train_errors,test_errors,"ridge")

In [22]:
plot_resulst(train_errors,test_errors,"lasso")

In [23]:
plot_resulst(train_errors,test_errors,"decisiontree")

In [24]:
plot_resulst(train_errors,test_errors,"kneighbors")

In [25]:
plot_resulst(train_errors,test_errors,"linear")

In [26]:
plot_resulst(train_errors,test_errors,"randomforest")

In [27]:
plot_resulst(train_errors,test_errors,"xgb")

In [28]:
plot_resulst(train_errors,test_errors,"adaboost")

In [29]:
plot_resulst(train_errors,test_errors,"neuralnetwork")