In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import json
import plotly.graph_objects as go
import plotly.express as px
import warnings
from functions import *

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
start_date = "2016-01-01"
end_date = datetime.date.today()

In [3]:
url="https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate="+f'{start_date}'+"&endDate="+f'{end_date}'


In [4]:
url

'https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate=2016-01-01&endDate=2024-01-09'

In [5]:
response = requests.get(url, verify=False)
json_data = json.loads(response.text.encode('utf8'))
df = pd.DataFrame(json_data['body']['hourlyConsumptions']).iloc[:-1] # the last value can not comes right.
df['date'] = pd.to_datetime(df.date.str[:16])

In [6]:
df

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
70315,2024-01-08 19:00:00,42374.33
70316,2024-01-08 20:00:00,41269.22
70317,2024-01-08 21:00:00,40203.65
70318,2024-01-08 22:00:00,38818.94


In [7]:
fh_new = 24*7+1                              # forecast horizon weekly -we are adding +1 because by indexing we are gonna lost a line, +1 yapinca yine günün ayni saatine denk geliyor 22:00 ise yine 22:00 de oluyor
date = pd.date_range(start=df.date.tail(1).iloc[0], periods=fh_new, freq='H', name='date')
date = pd.DataFrame(date)
date.head(10)                              # that is for new forecast - the forecasting from last element we create new prediction. there are 169(24*7+1) rows

Unnamed: 0,date
0,2024-01-08 23:00:00
1,2024-01-09 00:00:00
2,2024-01-09 01:00:00
3,2024-01-09 02:00:00
4,2024-01-09 03:00:00
5,2024-01-09 04:00:00
6,2024-01-09 05:00:00
7,2024-01-09 06:00:00
8,2024-01-09 07:00:00
9,2024-01-09 08:00:00


In [8]:
df_fea_eng = pd.merge(df, date, how='outer')             # for feature engineering fe
df_fea_eng                                               # we added the forcast date to df and now we have more large index
                                                         # we merged the df and the dataframe for forcasting df+169
                                                         # 'outer' we saved the same values and set for null values NaN

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
70483,2024-01-15 19:00:00,
70484,2024-01-15 20:00:00,
70485,2024-01-15 21:00:00,
70486,2024-01-15 22:00:00,


In [9]:
def rolling_feature(df, fh):
    df_copy = df.copy()                                           
    rolling_windows = [fh, fh+3, fh+10, fh+15, fh+20, fh+25]
    lags = [fh, fh+5, fh+10, fh+15, fh+20, fh+30]
    for a in rolling_windows:
        df_copy['rolling_mean_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).mean().shift(1)    
        df_copy['rolling_std_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).std().shift(1)
        df_copy['rolling_min_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).min().shift(1)
        df_copy['rolling_max_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).max().shift(1)
        df_copy['rolling_var_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).var().shift(1)
    for l in lags:
        df_copy['consuption_lag_'+str(l)]=df_copy['consumption'].shift(l)
    return df_copy

# rolling is a python function =rolling(window, min_periods)
# in our first case each windows is a new hour which we want to predict a=169 that means we want to find the mean of 169 hours
# min_periods= that means how much values should be in  each windows. 

In [10]:
# rolling mean = hareketli ortalama
# rolling standart deviation = hareketli standart sapma


In [11]:
# burda rolling_mean_169  bu sütun icin rolling fonksioynunu kullaninca aslinda arkasinda baya iyi bir fonksiyon var
# mesela ben burda window yani a=169 aldim
# bu su anlama geliyor. son 169 taneyi al ve ortalamalarini bul
# buna Rolling mean deniyor. 169 saatlik bir window istiyorsan son 169 saatin degerlerini alarak yeni bir ortalama olusturur
# mesela asagida cevaplarda gözüken 3 cü degerin rolling_mean_169 degerini bulurken datanin ilk 4 degerini aldi ve bunlarin ortalamasini aldi
# aslinda 169 tanesini aldi ama verri seti yeni basladigi icin alamadi
# bir öncekinde 3 tane alabildi.
# bu islemin sonucunda bize en saglikli baslayan veri 169 dan sonra gelen satir
# zaten asagilarda ilk 169+30(bur yukarida lag'tan geliyor) taneyi almayacagiz
# cünkü mesela fh_new+30 sütünu icin herbir satiri katarak ortalama almaya calisinca ilk fh_new+30 sayisi kadar satir almazsa,
# üst satirlarda NaN degerler olusmus oluyor

# bu sekilde dalgalanmayi stabil hale getirir.
# standart sapma bir data daki varyansin kareköküdür. yani degiskenligi gösteren bir degerdir



# lag = gecikme anlamina geliyor. bu verideki desenleri incelemek, trendleri anlamak ve gelecek degerleri hesaplamak icin kullanilir.


In [12]:
df_fea_eng = rolling_feature(df_fea_eng, fh_new)
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70483,2024-01-15 19:00:00,,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41137.280000,2961.812213,34570.16,44146.88,8.772332e+06,41338.483750,2317.373992,34570.16,44146.88,5.370222e+06,38581.729524,5538.469796,28086.04,44146.88,3.067465e+07,37287.606538,5730.752341,28086.04,44146.88,3.284152e+07,37265.595484,5240.253236,28086.04,44146.88,2.746025e+07,43250.62,41850.42,39055.77,28369.77,35042.77,32657.35
70484,2024-01-15 20:00:00,,39447.260000,3026.561303,34570.16,42374.33,9.160073e+06,40968.725000,3119.823162,34570.16,44146.88,9.733297e+06,41490.664667,2314.475654,34570.16,44146.88,5.356798e+06,39092.327500,5150.343016,28086.04,44146.88,2.652603e+07,37377.400000,5830.227258,28086.04,44146.88,3.399155e+07,37284.637667,5328.745718,28086.04,44146.88,2.839553e+07,42374.33,42481.63,41830.32,28086.04,33476.77,33400.40
70485,2024-01-15 21:00:00,,38715.492500,2939.989785,34570.16,41269.22,8.643540e+06,40661.971429,3236.860664,34570.16,44146.88,1.047727e+07,41466.403571,2399.864733,34570.16,44146.88,5.759351e+06,39671.605789,4573.356186,28522.74,44146.88,2.091559e+07,37539.926250,5897.487778,28086.04,44146.88,3.478036e+07,37252.996552,5420.198295,28086.04,44146.88,2.937855e+07,41269.22,42485.72,42501.26,28522.74,31771.81,33676.59
70486,2024-01-15 22:00:00,,37864.250000,2935.579524,34570.16,40203.65,8.617627e+06,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41386.799231,2478.545485,34570.16,44146.88,6.143188e+06,40290.987222,3798.435852,30077.88,44146.88,1.442811e+07,37790.713913,5897.729111,28086.04,44146.88,3.478321e+07,37236.188571,5518.890089,28086.04,44146.88,3.045815e+07,40203.65,43116.00,42545.74,30077.88,30046.17,34140.74


In [13]:
# here is important that in our new created columns shouldn't included NaN values.
# Because same ML Algorithm can not handle mit NaN value.
# But Catboost can handle with NaN.

In [14]:
def data_features(df):
    df_copy = df.copy()
    df_copy['month'] = df_copy['date'].dt.month
    df_copy['year'] = df_copy['date'].dt.year
    df_copy['hour'] = df_copy['date'].dt.hour
    df_copy['quarter'] = df_copy['date'].dt.quarter
    df_copy['dayofweek'] = df_copy['date'].dt.dayofweek
    df_copy['dayofyear'] = df_copy['date'].dt.dayofyear
    df_copy['dayofmonth'] = df_copy['date'].dt.day
    df_copy['weekofyear'] = df_copy['date'].dt.isocalendar().week
    return(df_copy)

# for the values of new features I don't make the values as One-hot-Encoder 
# because in documantation of Catbost is written that don't make as One Hot 
# let's set those as Label Encoder

In [15]:
df_fea_eng = data_features(df_fea_eng)
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2016,0,1,4,1,1,53
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,,1,2016,1,1,4,1,1,53
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,,1,2016,2,1,4,1,1,53
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,,1,2016,3,1,4,1,1,53
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,,1,2016,4,1,4,1,1,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70483,2024-01-15 19:00:00,,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41137.280000,2961.812213,34570.16,44146.88,8.772332e+06,41338.483750,2317.373992,34570.16,44146.88,5.370222e+06,38581.729524,5538.469796,28086.04,44146.88,3.067465e+07,37287.606538,5730.752341,28086.04,44146.88,3.284152e+07,37265.595484,5240.253236,28086.04,44146.88,2.746025e+07,43250.62,41850.42,39055.77,28369.77,35042.77,32657.35,1,2024,19,1,0,15,15,3
70484,2024-01-15 20:00:00,,39447.260000,3026.561303,34570.16,42374.33,9.160073e+06,40968.725000,3119.823162,34570.16,44146.88,9.733297e+06,41490.664667,2314.475654,34570.16,44146.88,5.356798e+06,39092.327500,5150.343016,28086.04,44146.88,2.652603e+07,37377.400000,5830.227258,28086.04,44146.88,3.399155e+07,37284.637667,5328.745718,28086.04,44146.88,2.839553e+07,42374.33,42481.63,41830.32,28086.04,33476.77,33400.40,1,2024,20,1,0,15,15,3
70485,2024-01-15 21:00:00,,38715.492500,2939.989785,34570.16,41269.22,8.643540e+06,40661.971429,3236.860664,34570.16,44146.88,1.047727e+07,41466.403571,2399.864733,34570.16,44146.88,5.759351e+06,39671.605789,4573.356186,28522.74,44146.88,2.091559e+07,37539.926250,5897.487778,28086.04,44146.88,3.478036e+07,37252.996552,5420.198295,28086.04,44146.88,2.937855e+07,41269.22,42485.72,42501.26,28522.74,31771.81,33676.59,1,2024,21,1,0,15,15,3
70486,2024-01-15 22:00:00,,37864.250000,2935.579524,34570.16,40203.65,8.617627e+06,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41386.799231,2478.545485,34570.16,44146.88,6.143188e+06,40290.987222,3798.435852,30077.88,44146.88,1.442811e+07,37790.713913,5897.729111,28086.04,44146.88,3.478321e+07,37236.188571,5518.890089,28086.04,44146.88,3.045815e+07,40203.65,43116.00,42545.74,30077.88,30046.17,34140.74,1,2024,22,1,0,15,15,3


In [16]:
# burda bir yerde NaN almamak icin belli bir yerden sonrasini aldik rolling uyguladiktan sonra
# cünkü naN olmamasi gerekiyordu
# pandas for icinde islem yaparken eger o sayisa önceki data bulamazsa onu NaN olarak atiyor

In [17]:
df_fea_eng = df_fea_eng[fh_new+30:].reset_index(drop=True) # in lag we have the biggest value as 30 we get the from fh_new+30. values
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70284,2024-01-15 19:00:00,,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41137.280000,2961.812213,34570.16,44146.88,8.772332e+06,41338.483750,2317.373992,34570.16,44146.88,5.370222e+06,38581.729524,5538.469796,28086.04,44146.88,3.067465e+07,37287.606538,5730.752341,28086.04,44146.88,3.284152e+07,37265.595484,5240.253236,28086.04,44146.88,2.746025e+07,43250.62,41850.42,39055.77,28369.77,35042.77,32657.35,1,2024,19,1,0,15,15,3
70285,2024-01-15 20:00:00,,39447.260000,3026.561303,34570.16,42374.33,9.160073e+06,40968.725000,3119.823162,34570.16,44146.88,9.733297e+06,41490.664667,2314.475654,34570.16,44146.88,5.356798e+06,39092.327500,5150.343016,28086.04,44146.88,2.652603e+07,37377.400000,5830.227258,28086.04,44146.88,3.399155e+07,37284.637667,5328.745718,28086.04,44146.88,2.839553e+07,42374.33,42481.63,41830.32,28086.04,33476.77,33400.40,1,2024,20,1,0,15,15,3
70286,2024-01-15 21:00:00,,38715.492500,2939.989785,34570.16,41269.22,8.643540e+06,40661.971429,3236.860664,34570.16,44146.88,1.047727e+07,41466.403571,2399.864733,34570.16,44146.88,5.759351e+06,39671.605789,4573.356186,28522.74,44146.88,2.091559e+07,37539.926250,5897.487778,28086.04,44146.88,3.478036e+07,37252.996552,5420.198295,28086.04,44146.88,2.937855e+07,41269.22,42485.72,42501.26,28522.74,31771.81,33676.59,1,2024,21,1,0,15,15,3
70287,2024-01-15 22:00:00,,37864.250000,2935.579524,34570.16,40203.65,8.617627e+06,40081.153333,3120.734402,34570.16,43250.62,9.738983e+06,41386.799231,2478.545485,34570.16,44146.88,6.143188e+06,40290.987222,3798.435852,30077.88,44146.88,1.442811e+07,37790.713913,5897.729111,28086.04,44146.88,3.478321e+07,37236.188571,5518.890089,28086.04,44146.88,3.045815e+07,40203.65,43116.00,42545.74,30077.88,30046.17,34140.74,1,2024,22,1,0,15,15,3


In [18]:
df_fea_eng.isnull().sum().sum()
# we cleaned all the NaN values 
# we have only NaN which we will be  predicting values

168

Split the data

In [19]:
split_date = df_fea_eng.date.tail(fh_new).iloc[0]
split_date  # the date is end of the real data than will come the date for prediction

Timestamp('2024-01-08 23:00:00')

In [20]:
historical = df_fea_eng.loc[df_fea_eng['date'] <= split_date] # our real data
historical

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70116,2024-01-08 19:00:00,42374.33,36381.824024,5001.381662,26447.08,44146.88,2.501382e+07,36326.047965,4979.104000,26447.08,44146.88,2.479148e+07,36004.172570,5143.987876,26126.26,44146.88,2.646061e+07,35701.921467,5387.754764,24536.76,44146.88,2.902790e+07,35515.128836,5441.302243,24536.76,44146.88,2.960777e+07,35480.618093,5379.431753,24536.76,44146.88,2.893829e+07,36523.41,29361.85,26126.26,25301.83,30412.43,29271.85,1,2024,19,1,0,8,8,2
70117,2024-01-08 20:00:00,41269.22,36416.444852,5022.573467,26447.08,44146.88,2.522624e+07,36389.782558,4985.990594,26447.08,44146.88,2.486010e+07,36094.943911,5111.959812,26134.31,44146.88,2.613213e+07,35794.706793,5354.575701,24536.76,44146.88,2.867148e+07,35578.419312,5451.192925,24536.76,44146.88,2.971550e+07,35521.594021,5401.579312,24536.76,44146.88,2.917706e+07,35865.40,30327.94,26134.31,24692.58,29972.27,29186.41,1,2024,20,1,0,8,8,2
70118,2024-01-08 21:00:00,40203.65,36448.420118,5036.227104,26447.08,44146.88,2.536358e+07,36437.691279,4993.092426,26447.08,44146.88,2.493097e+07,36179.496480,5071.288926,26447.08,44146.88,2.571797e+07,35884.797228,5305.992431,24536.76,44146.88,2.815356e+07,35638.191534,5451.330048,24536.76,44146.88,2.971700e+07,35548.066546,5417.156185,24536.76,44146.88,2.934558e+07,35134.98,31411.98,27615.67,24536.76,28876.95,29586.37,1,2024,21,1,0,8,8,2
70119,2024-01-08 22:00:00,38818.94,36478.412249,5043.446139,26447.08,44146.88,2.543635e+07,36467.299186,5000.273678,26447.08,44146.88,2.500274e+07,36249.820391,5039.043268,26447.08,44146.88,2.539196e+07,35969.943370,5248.284265,24913.75,44146.88,2.754449e+07,35698.121164,5438.850633,24536.76,44146.88,2.958110e+07,35574.697113,5427.321581,24536.76,44146.88,2.945582e+07,34327.75,33028.92,28473.55,24913.75,27658.01,30501.95,1,2024,22,1,0,8,8,2


In [21]:
y = historical[['date','consumption']].set_index('date') # our target value
y
# bunu galiba yapilan train predictleri karsilastirmak icin kullanacagiz

Unnamed: 0_level_0,consumption
date,Unnamed: 1_level_1
2016-01-09 07:00:00,26853.42
2016-01-09 08:00:00,30627.32
2016-01-09 09:00:00,33468.25
2016-01-09 10:00:00,34792.84
2016-01-09 11:00:00,35382.85
...,...
2024-01-08 19:00:00,42374.33
2024-01-08 20:00:00,41269.22
2024-01-08 21:00:00,40203.65
2024-01-08 22:00:00,38818.94


In [22]:
X = historical.drop('consumption', axis=1).set_index('date')
X   # without real consumption values

Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2016-01-09 07:00:00,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
2016-01-09 08:00:00,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2016-01-09 09:00:00,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
2016-01-09 10:00:00,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
2016-01-09 11:00:00,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-08 19:00:00,36381.824024,5001.381662,26447.08,44146.88,2.501382e+07,36326.047965,4979.104000,26447.08,44146.88,2.479148e+07,36004.172570,5143.987876,26126.26,44146.88,2.646061e+07,35701.921467,5387.754764,24536.76,44146.88,2.902790e+07,35515.128836,5441.302243,24536.76,44146.88,2.960777e+07,35480.618093,5379.431753,24536.76,44146.88,2.893829e+07,36523.41,29361.85,26126.26,25301.83,30412.43,29271.85,1,2024,19,1,0,8,8,2
2024-01-08 20:00:00,36416.444852,5022.573467,26447.08,44146.88,2.522624e+07,36389.782558,4985.990594,26447.08,44146.88,2.486010e+07,36094.943911,5111.959812,26134.31,44146.88,2.613213e+07,35794.706793,5354.575701,24536.76,44146.88,2.867148e+07,35578.419312,5451.192925,24536.76,44146.88,2.971550e+07,35521.594021,5401.579312,24536.76,44146.88,2.917706e+07,35865.40,30327.94,26134.31,24692.58,29972.27,29186.41,1,2024,20,1,0,8,8,2
2024-01-08 21:00:00,36448.420118,5036.227104,26447.08,44146.88,2.536358e+07,36437.691279,4993.092426,26447.08,44146.88,2.493097e+07,36179.496480,5071.288926,26447.08,44146.88,2.571797e+07,35884.797228,5305.992431,24536.76,44146.88,2.815356e+07,35638.191534,5451.330048,24536.76,44146.88,2.971700e+07,35548.066546,5417.156185,24536.76,44146.88,2.934558e+07,35134.98,31411.98,27615.67,24536.76,28876.95,29586.37,1,2024,21,1,0,8,8,2
2024-01-08 22:00:00,36478.412249,5043.446139,26447.08,44146.88,2.543635e+07,36467.299186,5000.273678,26447.08,44146.88,2.500274e+07,36249.820391,5039.043268,26447.08,44146.88,2.539196e+07,35969.943370,5248.284265,24913.75,44146.88,2.754449e+07,35698.121164,5438.850633,24536.76,44146.88,2.958110e+07,35574.697113,5427.321581,24536.76,44146.88,2.945582e+07,34327.75,33028.92,28473.55,24913.75,27658.01,30501.95,1,2024,22,1,0,8,8,2


In [23]:
forecast_df = df_fea_eng.loc[df_fea_eng['date'] > split_date].set_index('date').drop('consumption', axis=1)
print(forecast_df.shape)  
forecast_df.head()          
# from real time until the end
# buda tahmini yapilacak tarihler 
# haa anladim aslinda rolling yapmamizin mantigi burda yatiyor
# biz simdi aslinda önümüzdeki saatlerin consumption degerlerini bilmiyoruz ve 
# su an modelimizde yok tahmin edemiyoruz ama rolling ile bu önümüzdeki tarihler icin 
# degerler vermis olduk. hep bir önceki 169 degeri alip ortalama, standart sapma, min, max, varyans


(168, 44)


Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2024-01-09 00:00:00,36513.67432,5039.245315,26447.08,44146.88,25393990.0,36473.114826,5005.392475,26447.08,44146.88,25053950.0,36340.817542,4976.986887,26447.08,44146.88,24770400.0,36097.734728,5123.764753,26126.26,44146.88,26252970.0,35801.004497,5367.865653,24536.76,44146.88,28813980.0,35616.472423,5424.095573,24536.76,44146.88,29420810.0,31636.37,36523.41,29361.85,26126.26,25301.83,34425.0,1,2024,0,1,1,9,9,2
2024-01-09 01:00:00,36542.705893,5040.115237,26447.08,44146.88,25402760.0,36480.940175,5019.037304,26447.08,44146.88,25190740.0,36380.025225,4963.227098,26447.08,44146.88,24633620.0,36152.223661,5084.083652,26134.31,44146.88,25847910.0,35856.85117,5326.861376,24536.76,44146.88,28375450.0,35643.436373,5425.150962,24536.76,44146.88,29432260.0,29603.31,35865.4,30327.94,26134.31,24692.58,36133.55,1,2024,1,1,1,9,9,2
2024-01-09 02:00:00,36584.259162,5026.326845,26447.08,44146.88,25263960.0,36493.606,5031.122975,26447.08,44146.88,25312200.0,36414.217797,4956.238401,26447.08,44146.88,24564300.0,36207.267143,5043.133827,26447.08,44146.88,25433200.0,35916.553155,5277.718464,24536.76,44146.88,27854310.0,35672.973698,5423.752709,24536.76,44146.88,29417090.0,28099.35,35134.98,31411.98,27615.67,24536.76,35037.32,1,2024,2,1,1,9,9,2
2024-01-09 03:00:00,36635.373072,4997.811787,26447.08,44146.88,24978120.0,36513.67432,5039.245315,26447.08,44146.88,25393990.0,36442.639602,4955.892356,26447.08,44146.88,24560870.0,36254.73453,5016.188028,26447.08,44146.88,25162140.0,35977.734839,5225.046819,24913.75,44146.88,27301110.0,35708.554974,5415.492931,24536.76,44146.88,29327560.0,27288.93,34327.75,33028.92,28473.55,24913.75,33256.58,1,2024,3,1,1,9,9,2
2024-01-09 04:00:00,36692.018182,4959.288409,26447.08,44146.88,24594540.0,36542.705893,5040.115237,26447.08,44146.88,25402760.0,36462.146571,4963.33222,26447.08,44146.88,24634670.0,36297.963333,4996.256932,26447.08,44146.88,24962580.0,36037.540162,5174.998788,24961.74,44146.88,26780610.0,35750.926263,5397.963908,24536.76,44146.88,29138010.0,26677.86,33102.06,35111.09,28627.06,24961.74,32028.11,1,2024,4,1,1,9,9,2


Time Series

![](2023-12-05-00-03-09.png)

In [24]:
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit 
# That make cross validation by time series data
# that is not a traditional cros validation. That is specified for time series.
import numpy as np


tscv = TimeSeriesSplit(n_splits=3, test_size=fh_new * 20)
score_list = []
fold = 1
unseen_preds = []
importance = [] # for feature importance. hangi feature'in ne kadar katki verdigini görmek icin

for train_index, test_index in tscv.split(X, y): # burda aslinda datayi bölüyoruz bir altta ciktisi var train_index ne demek oldugunun
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    print(X_train.shape, X_val.shape)
    rf = RandomForestRegressor(n_estimators=3, random_state=42)
    rf.fit(X_train, y_train)

    forecast_predcited = rf.predict(forecast_df)
    unseen_preds.append(forecast_predcited) # 3 cross validation sonuclari gelecek galiba n_split=3 oldugu icin. cünkü time serimzi 3 parcaya bölmüstü
    score = mean_absolute_error(y_val, rf.predict(X_val))
    print(f"MAE FOLD - {fold}: {score}")
    score_list.append(score)
    importance.append(rf.feature_importances_) # burdanda 3 farkli sonuclar gelecek
    fold += 1

print("CV Mean Score: ", np.mean(score_list))

(59981, 44) (3380, 44)
MAE FOLD - 1: 1045.9225562130177
(63361, 44) (3380, 44)
MAE FOLD - 2: 1810.8476962524653
(66741, 44) (3380, 44)
MAE FOLD - 3: 1170.3205157790926
CV Mean Score:  1342.3635894148586


In [25]:
for train_index, test_index in tscv.split(X, y):
    print(train_index, test_index)
# burdaki 0. satirin degerlerinde ilk list train index yanindaki 59956 dan sonra gelenler ise test_index olarak ele aliniyor
# yani aslinda burda verilerin paketleniyor. hangi verinin nereye gidecegi kararlastiriliyor

[    0     1     2 ... 59978 59979 59980] [59981 59982 59983 ... 63358 63359 63360]
[    0     1     2 ... 63358 63359 63360] [63361 63362 63363 ... 66738 66739 66740]
[    0     1     2 ... 66738 66739 66740] [66741 66742 66743 ... 70118 70119 70120]


In [26]:
# iki yöntem var. ya modeli kaydedip, tahminleri bu modele göre yapmak
# ama time serieslerde bazen zamana bagli olarak bir cok sey degisebiliyor.
# düzenli calibere edebilirsen modeli kaydedip kullanmak daha iyi olur yoksa burdaki gibi anlik calibre iyi olabilir
# catboost hizli yapiyor ve 24 saniyede egitti.
# ben RF ile 16 dk da egittim


In [27]:
forecasted=pd.DataFrame(unseen_preds[2],columns=["forecasting"]).set_index(forecast_df.index)
# unseen_preds[2] sonuncu cross validation sonuclarini alalim dedik
# normalde hangisini alacainda ayri bir strateji belki ortalamalarinida alabiliriz
# yukarida forecast_df datasini bugunden sonraki data olarak belirlemistik

In [28]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_fea_eng.date.iloc[-fh_new*5:], y=df_fea_eng.consumption.iloc[-fh_new*5:], name = 'Historical Data', mode = 'lines'))
fig1.add_trace(go.Scatter(x=forecasted.index, y=forecasted['forecasting'], name = 'Tarihsel Veri', mode = 'lines'))

In [29]:
f_importance = pd.concat([pd.Series(X.columns.to_list(),name='Feature'),pd.Series(importance[2],name="Importance")],axis=1).sort_values(by='Importance',ascending=True)

In [30]:
import plotly.express as px
fig2 = px.bar(f_importance.tail(20), x='Importance', y='Feature')
fig2.show()

In [31]:
# ! pip install joblib

In [32]:
rf

In [33]:
model = rf

In [34]:
from joblib import dump

dump(model, 'model_for_consumption.joblib')

# eger model ismini 'veysel' olarak kaydetmek istersen
# dump(model, 'veysel.joblib') yazabilirsin

['model_for_consumption.joblib']

In [35]:
from joblib import load

# Modeli dosyadan yükleyin
model = load('model_for_consumption.joblib')

# Modeli kullanarak tahminler yapabilirsiniz
# Örneğin: predictions = model.predict(X_test)

In [36]:
# from joblib import load

# Modelin tam yolunu belirtin
# model_path = "C:/models/model.joblib"

# Modeli yükleyin
# model = load(model_path)


In [37]:
model.predict(X_val)
# bir predict denemesi modeli tekrar yükleyerek

array([36306.86      , 35998.21666667, 35998.21666667, ...,
       39759.30666667, 40925.64333333, 39900.86333333])