In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import json
import plotly.graph_objects as go
import plotly.express as px
import warnings
from functions import *

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
start_date = "2016-01-01"
end_date = datetime.date.today()

In [3]:
url="https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate="+f'{start_date}'+"&endDate="+f'{end_date}'


In [4]:
url

'https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate=2016-01-01&endDate=2024-01-08'

In [5]:
response = requests.get(url, verify=False)
json_data = json.loads(response.text.encode('utf8'))
df = pd.DataFrame(json_data['body']['hourlyConsumptions']).iloc[:-1] # the last value can not comes right.
df['date'] = pd.to_datetime(df.date.str[:16])

In [6]:
df

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
70304,2024-01-08 08:00:00,38522.81
70305,2024-01-08 09:00:00,41157.37
70306,2024-01-08 10:00:00,41793.91
70307,2024-01-08 11:00:00,41276.01


In [7]:
fh_new = 24*7+1                              # forecast horizon weekly -we are adding +1 because by indexing we are gonna lost a line, +1 yapinca yine günün ayni saatine denk geliyor 22:00 ise yine 22:00 de oluyor
date = pd.date_range(start=df.date.tail(1).iloc[0], periods=fh_new, freq='H', name='date')
date = pd.DataFrame(date)
date.head(10)                              # that is for new forecast - the forecasting from last element we create new prediction. there are 169(24*7+1) rows

Unnamed: 0,date
0,2024-01-08 12:00:00
1,2024-01-08 13:00:00
2,2024-01-08 14:00:00
3,2024-01-08 15:00:00
4,2024-01-08 16:00:00
5,2024-01-08 17:00:00
6,2024-01-08 18:00:00
7,2024-01-08 19:00:00
8,2024-01-08 20:00:00
9,2024-01-08 21:00:00


In [8]:
df_fea_eng = pd.merge(df, date, how='outer')             # for feature engineering fe
df_fea_eng                                               # we added the forcast date to df and now we have more large index
                                                         # we merged the df and the dataframe for forcasting df+169
                                                         # 'outer' we saved the same values and set for null values NaN

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
70472,2024-01-15 08:00:00,
70473,2024-01-15 09:00:00,
70474,2024-01-15 10:00:00,
70475,2024-01-15 11:00:00,


In [9]:
def rolling_feature(df, fh):
    df_copy = df.copy()                                           
    rolling_windows = [fh, fh+3, fh+10, fh+15, fh+20, fh+25]
    lags = [fh, fh+5, fh+10, fh+15, fh+20, fh+30]
    for a in rolling_windows:
        df_copy['rolling_mean_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).mean().shift(1)    
        df_copy['rolling_std_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).std().shift(1)
        df_copy['rolling_min_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).min().shift(1)
        df_copy['rolling_max_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).max().shift(1)
        df_copy['rolling_var_'+ str(a)] = df_copy['consumption'].rolling(a, min_periods=1).var().shift(1)
    for l in lags:
        df_copy['consuption_lag_'+str(l)]=df_copy['consumption'].shift(l)
    return df_copy

# rolling is a python function =rolling(window, min_periods)
# in our first case each windows is a new hour which we want to predict a=169 that means we want to find the mean of 169 hours
# min_periods= that means how much values should be in  each windows. 

In [10]:
# rolling mean = hareketli ortalama
# rolling standart deviation = hareketli standart sapma


In [11]:
# burda rolling_mean_169  bu sütun icin rolling fonksioynunu kullaninca aslinda arkasinda baya iyi bir fonksiyon var
# mesela ben burda window yani a=169 aldim
# bu su anlama geliyor. son 169 taneyi al ve ortalamalarini bul
# buna Rolling mean deniyor. 169 saatlik bir window istiyorsan son 169 saatin degerlerini alarak yeni bir ortalama olusturur
# mesela asagida cevaplarda gözüken 3 cü degerin rolling_mean_169 degerini bulurken datanin ilk 4 degerini aldi ve bunlarin ortalamasini aldi
# aslinda 169 tanesini aldi ama verri seti yeni basladigi icin alamadi
# bir öncekinde 3 tane alabildi.
# bu islemin sonucunda bize en saglikli baslayan veri 169 dan sonra gelen satir
# zaten asagilarda ilk 169+30(bur yukarida lag'tan geliyor) taneyi almayacagiz
# cünkü mesela fh_new+30 sütünu icin herbir satiri katarak ortalama almaya calisinca ilk fh_new+30 sayisi kadar satir almazsa,
# üst satirlarda NaN degerler olusmus oluyor

# bu sekilde dalgalanmayi stabil hale getirir.
# standart sapma bir data daki varyansin kareköküdür. yani degiskenligi gösteren bir degerdir



# lag = gecikme anlamina geliyor. bu verideki desenleri incelemek, trendleri anlamak ve gelecek degerleri hesaplamak icin kullanilir.


In [12]:
df_fea_eng = rolling_feature(df_fea_eng, fh_new)
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70472,2024-01-15 08:00:00,,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,35806.656667,5792.649790,28051.54,41793.91,3.355479e+07,34112.270000,5055.815743,28051.54,41793.91,2.556127e+07,34773.364762,4578.955651,28051.54,41793.91,2.096683e+07,34461.556538,4161.601400,28051.54,41793.91,1.731893e+07,33628.425806,4283.753241,28051.54,41793.91,1.835054e+07,33535.65,28840.35,36132.13,34861.17,31934.60,31309.23
70473,2024-01-15 09:00:00,,40445.984000,1382.388224,38522.81,41793.91,1.910997e+06,36776.046250,5355.621465,28464.09,41793.91,2.868268e+07,33977.612667,5203.482825,28051.54,41793.91,2.707623e+07,34768.974500,4697.864027,28051.54,41793.91,2.206993e+07,34562.634800,4214.717397,28051.54,41793.91,1.776384e+07,33803.395000,4242.823684,28051.54,41793.91,1.800155e+07,38522.81,28334.69,35034.97,36683.26,32642.10,29970.14
70474,2024-01-15 10:00:00,,40926.777500,1003.450882,39479.82,41793.91,1.006914e+06,37963.468571,4505.928499,29978.71,41793.91,2.030339e+07,33902.087143,5391.370648,28051.54,41793.91,2.906688e+07,34668.222632,4804.345516,28051.54,41793.91,2.308174e+07,34642.657083,4285.922792,28051.54,41793.91,1.836913e+07,33982.945172,4200.330521,28051.54,41793.91,1.764278e+07,41157.37,28051.54,33466.23,38191.35,33386.25,29029.39
70475,2024-01-15 11:00:00,,40849.913333,1214.463630,39479.82,41793.91,1.474922e+06,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,33935.614615,5609.997196,28051.54,41793.91,3.147207e+07,34472.493333,4865.056915,28051.54,41793.91,2.366878e+07,34697.283478,4373.696660,28051.54,41793.91,1.912922e+07,34174.152143,4146.890561,28051.54,41793.91,1.719670e+07,41793.91,28464.09,31735.18,37718.00,33665.72,28555.95


In [13]:
# here is important that in our new created columns shouldn't included NaN values.
# Because same ML Algorithm can not handle mit NaN value.
# But Catboost can handle with NaN.

In [14]:
def data_features(df):
    df_copy = df.copy()
    df_copy['month'] = df_copy['date'].dt.month
    df_copy['year'] = df_copy['date'].dt.year
    df_copy['hour'] = df_copy['date'].dt.hour
    df_copy['quarter'] = df_copy['date'].dt.quarter
    df_copy['dayofweek'] = df_copy['date'].dt.dayofweek
    df_copy['dayofyear'] = df_copy['date'].dt.dayofyear
    df_copy['dayofmonth'] = df_copy['date'].dt.day
    df_copy['weekofyear'] = df_copy['date'].dt.isocalendar().week
    return(df_copy)

# for the values of new features I don't make the values as One-hot-Encoder 
# because in documantation of Catbost is written that don't make as One Hot 
# let's set those as Label Encoder

In [15]:
df_fea_eng = data_features(df_fea_eng)
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2016,0,1,4,1,1,53
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,,1,2016,1,1,4,1,1,53
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,,1,2016,2,1,4,1,1,53
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,,1,2016,3,1,4,1,1,53
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,,1,2016,4,1,4,1,1,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70472,2024-01-15 08:00:00,,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,35806.656667,5792.649790,28051.54,41793.91,3.355479e+07,34112.270000,5055.815743,28051.54,41793.91,2.556127e+07,34773.364762,4578.955651,28051.54,41793.91,2.096683e+07,34461.556538,4161.601400,28051.54,41793.91,1.731893e+07,33628.425806,4283.753241,28051.54,41793.91,1.835054e+07,33535.65,28840.35,36132.13,34861.17,31934.60,31309.23,1,2024,8,1,0,15,15,3
70473,2024-01-15 09:00:00,,40445.984000,1382.388224,38522.81,41793.91,1.910997e+06,36776.046250,5355.621465,28464.09,41793.91,2.868268e+07,33977.612667,5203.482825,28051.54,41793.91,2.707623e+07,34768.974500,4697.864027,28051.54,41793.91,2.206993e+07,34562.634800,4214.717397,28051.54,41793.91,1.776384e+07,33803.395000,4242.823684,28051.54,41793.91,1.800155e+07,38522.81,28334.69,35034.97,36683.26,32642.10,29970.14,1,2024,9,1,0,15,15,3
70474,2024-01-15 10:00:00,,40926.777500,1003.450882,39479.82,41793.91,1.006914e+06,37963.468571,4505.928499,29978.71,41793.91,2.030339e+07,33902.087143,5391.370648,28051.54,41793.91,2.906688e+07,34668.222632,4804.345516,28051.54,41793.91,2.308174e+07,34642.657083,4285.922792,28051.54,41793.91,1.836913e+07,33982.945172,4200.330521,28051.54,41793.91,1.764278e+07,41157.37,28051.54,33466.23,38191.35,33386.25,29029.39,1,2024,10,1,0,15,15,3
70475,2024-01-15 11:00:00,,40849.913333,1214.463630,39479.82,41793.91,1.474922e+06,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,33935.614615,5609.997196,28051.54,41793.91,3.147207e+07,34472.493333,4865.056915,28051.54,41793.91,2.366878e+07,34697.283478,4373.696660,28051.54,41793.91,1.912922e+07,34174.152143,4146.890561,28051.54,41793.91,1.719670e+07,41793.91,28464.09,31735.18,37718.00,33665.72,28555.95,1,2024,11,1,0,15,15,3


In [16]:
# burda bir yerde NaN almamak icin belli bir yerden sonrasini aldik rolling uyguladiktan sonra
# cünkü naN olmamasi gerekiyordu
# pandas for icinde islem yaparken eger o sayisa önceki data bulamazsa onu NaN olarak atiyor

In [17]:
df_fea_eng = df_fea_eng[fh_new+30:].reset_index(drop=True) # in lag we have the biggest value as 30 we get the from fh_new+30. values
df_fea_eng

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70273,2024-01-15 08:00:00,,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,35806.656667,5792.649790,28051.54,41793.91,3.355479e+07,34112.270000,5055.815743,28051.54,41793.91,2.556127e+07,34773.364762,4578.955651,28051.54,41793.91,2.096683e+07,34461.556538,4161.601400,28051.54,41793.91,1.731893e+07,33628.425806,4283.753241,28051.54,41793.91,1.835054e+07,33535.65,28840.35,36132.13,34861.17,31934.60,31309.23,1,2024,8,1,0,15,15,3
70274,2024-01-15 09:00:00,,40445.984000,1382.388224,38522.81,41793.91,1.910997e+06,36776.046250,5355.621465,28464.09,41793.91,2.868268e+07,33977.612667,5203.482825,28051.54,41793.91,2.707623e+07,34768.974500,4697.864027,28051.54,41793.91,2.206993e+07,34562.634800,4214.717397,28051.54,41793.91,1.776384e+07,33803.395000,4242.823684,28051.54,41793.91,1.800155e+07,38522.81,28334.69,35034.97,36683.26,32642.10,29970.14,1,2024,9,1,0,15,15,3
70275,2024-01-15 10:00:00,,40926.777500,1003.450882,39479.82,41793.91,1.006914e+06,37963.468571,4505.928499,29978.71,41793.91,2.030339e+07,33902.087143,5391.370648,28051.54,41793.91,2.906688e+07,34668.222632,4804.345516,28051.54,41793.91,2.308174e+07,34642.657083,4285.922792,28051.54,41793.91,1.836913e+07,33982.945172,4200.330521,28051.54,41793.91,1.764278e+07,41157.37,28051.54,33466.23,38191.35,33386.25,29029.39,1,2024,10,1,0,15,15,3
70276,2024-01-15 11:00:00,,40849.913333,1214.463630,39479.82,41793.91,1.474922e+06,39294.261667,3080.192163,33535.65,41793.91,9.487584e+06,33935.614615,5609.997196,28051.54,41793.91,3.147207e+07,34472.493333,4865.056915,28051.54,41793.91,2.366878e+07,34697.283478,4373.696660,28051.54,41793.91,1.912922e+07,34174.152143,4146.890561,28051.54,41793.91,1.719670e+07,41793.91,28464.09,31735.18,37718.00,33665.72,28555.95,1,2024,11,1,0,15,15,3


In [18]:
df_fea_eng.isnull().sum().sum()
# we cleaned all the NaN values 
# we have only NaN which we will be  predicting values

168

Split the data

In [19]:
split_date = df_fea_eng.date.tail(fh_new).iloc[0]
split_date  # the date is end of the real data than will come the date for prediction

Timestamp('2024-01-08 12:00:00')

In [20]:
historical = df_fea_eng.loc[df_fea_eng['date'] <= split_date] # our real data
historical

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70105,2024-01-08 08:00:00,38522.81,35529.754260,5094.934271,24961.74,44028.47,2.595836e+07,35341.113721,5245.924527,24536.76,44028.47,2.751972e+07,35079.370391,5321.831904,24536.76,44028.47,2.832189e+07,35055.637554,5255.482681,24536.76,44028.47,2.762010e+07,34912.184762,5258.576726,24536.76,44028.47,2.765263e+07,34734.581186,5305.932793,24536.76,44028.47,2.815292e+07,24961.74,26286.14,32028.11,32177.56,29619.03,30225.41,1,2024,8,1,0,8,8,2
70106,2024-01-08 09:00:00,41157.37,35609.997278,5033.927129,26126.26,44028.47,2.534042e+07,35421.522035,5187.418813,24536.76,44028.47,2.690931e+07,35115.653631,5323.051796,24536.76,44028.47,2.833488e+07,35090.122609,5257.311905,24536.76,44028.47,2.763933e+07,34959.294709,5250.782089,24536.76,44028.47,2.757071e+07,34791.202577,5287.317791,24536.76,44028.47,2.795573e+07,26126.26,25301.83,30412.43,34425.00,29271.85,28747.42,1,2024,9,1,0,8,8,2
70107,2024-01-08 10:00:00,41793.91,35698.938757,4998.027072,26134.31,44028.47,2.498027e+07,35518.153488,5138.040971,24913.75,44028.47,2.639947e+07,35175.681229,5330.295935,24536.76,44028.47,2.841205e+07,35126.711576,5276.051617,24536.76,44028.47,2.783672e+07,35022.181058,5253.476879,24536.76,44028.47,2.759902e+07,34861.843557,5280.311880,24536.76,44028.47,2.788169e+07,26134.31,24692.58,29972.27,36133.55,29186.41,27939.94,1,2024,10,1,0,8,8,2
70108,2024-01-08 11:00:00,41276.01,35791.599112,4964.697794,26447.08,44028.47,2.464822e+07,35616.293953,5095.338793,24961.74,44028.47,2.596248e+07,35241.723911,5338.691965,24536.76,44028.47,2.850163e+07,35157.474402,5298.408651,24536.76,44028.47,2.807313e+07,35088.887407,5259.021995,24536.76,44028.47,2.765731e+07,34936.286546,5275.688251,24536.76,44028.47,2.783289e+07,27615.67,24536.76,28876.95,35037.32,29586.37,27349.77,1,2024,11,1,0,8,8,2


In [21]:
y = historical[['date','consumption']].set_index('date') # our target value
y
# bunu galiba yapilan train predictleri karsilastirmak icin kullanacagiz

Unnamed: 0_level_0,consumption
date,Unnamed: 1_level_1
2016-01-09 07:00:00,26853.42
2016-01-09 08:00:00,30627.32
2016-01-09 09:00:00,33468.25
2016-01-09 10:00:00,34792.84
2016-01-09 11:00:00,35382.85
...,...
2024-01-08 08:00:00,38522.81
2024-01-08 09:00:00,41157.37
2024-01-08 10:00:00,41793.91
2024-01-08 11:00:00,41276.01


In [22]:
X = historical.drop('consumption', axis=1).set_index('date')
X   # without real consumption values

Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2016-01-09 07:00:00,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
2016-01-09 08:00:00,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2016-01-09 09:00:00,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
2016-01-09 10:00:00,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
2016-01-09 11:00:00,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-08 08:00:00,35529.754260,5094.934271,24961.74,44028.47,2.595836e+07,35341.113721,5245.924527,24536.76,44028.47,2.751972e+07,35079.370391,5321.831904,24536.76,44028.47,2.832189e+07,35055.637554,5255.482681,24536.76,44028.47,2.762010e+07,34912.184762,5258.576726,24536.76,44028.47,2.765263e+07,34734.581186,5305.932793,24536.76,44028.47,2.815292e+07,24961.74,26286.14,32028.11,32177.56,29619.03,30225.41,1,2024,8,1,0,8,8,2
2024-01-08 09:00:00,35609.997278,5033.927129,26126.26,44028.47,2.534042e+07,35421.522035,5187.418813,24536.76,44028.47,2.690931e+07,35115.653631,5323.051796,24536.76,44028.47,2.833488e+07,35090.122609,5257.311905,24536.76,44028.47,2.763933e+07,34959.294709,5250.782089,24536.76,44028.47,2.757071e+07,34791.202577,5287.317791,24536.76,44028.47,2.795573e+07,26126.26,25301.83,30412.43,34425.00,29271.85,28747.42,1,2024,9,1,0,8,8,2
2024-01-08 10:00:00,35698.938757,4998.027072,26134.31,44028.47,2.498027e+07,35518.153488,5138.040971,24913.75,44028.47,2.639947e+07,35175.681229,5330.295935,24536.76,44028.47,2.841205e+07,35126.711576,5276.051617,24536.76,44028.47,2.783672e+07,35022.181058,5253.476879,24536.76,44028.47,2.759902e+07,34861.843557,5280.311880,24536.76,44028.47,2.788169e+07,26134.31,24692.58,29972.27,36133.55,29186.41,27939.94,1,2024,10,1,0,8,8,2
2024-01-08 11:00:00,35791.599112,4964.697794,26447.08,44028.47,2.464822e+07,35616.293953,5095.338793,24961.74,44028.47,2.596248e+07,35241.723911,5338.691965,24536.76,44028.47,2.850163e+07,35157.474402,5298.408651,24536.76,44028.47,2.807313e+07,35088.887407,5259.021995,24536.76,44028.47,2.765731e+07,34936.286546,5275.688251,24536.76,44028.47,2.783289e+07,27615.67,24536.76,28876.95,35037.32,29586.37,27349.77,1,2024,11,1,0,8,8,2


In [23]:
forecast_df = df_fea_eng.loc[df_fea_eng['date'] > split_date].set_index('date').drop('consumption', axis=1)
print(forecast_df.shape)  
forecast_df.head()          
# from real time until the end
# buda tahmini yapilacak tarihler 
# haa anladim aslinda rolling yapmamizin mantigi burda yatiyor
# biz simdi aslinda önümüzdeki saatlerin consumption degerlerini bilmiyoruz ve 
# su an modelimizde yok tahmin edemiyoruz ama rolling ile bu önümüzdeki tarihler icin 
# degerler vermis olduk. hep bir önceki 169 degeri alip ortalama, standart sapma, min, max, varyans


(168, 44)


Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consuption_lag_169,consuption_lag_174,consuption_lag_179,consuption_lag_184,consuption_lag_189,consuption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmonth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2024-01-08 13:00:00,35937.555385,4916.313643,26447.08,44028.47,24170140.0,35788.781337,5001.663622,26134.31,44028.47,25016640.0,35377.036034,5313.943976,24536.76,44028.47,28238000.0,35225.202283,5324.975106,24536.76,44028.47,28355360.0,35198.239312,5261.035904,24536.76,44028.47,27678500.0,35054.808454,5267.707218,24536.76,44028.47,27748740.0,28627.06,24961.74,26286.14,32028.11,32177.56,27538.26,1,2024,13,1,0,8,8,2
2024-01-08 14:00:00,35981.070238,4898.259519,26447.08,44028.47,23992950.0,35845.240234,4961.07873,26447.08,44028.47,24612300.0,35428.108483,5284.694572,24536.76,44028.47,27928000.0,35242.672732,5334.29378,24536.76,44028.47,28454690.0,35214.306755,5270.432474,24536.76,44028.47,27777460.0,35082.973109,5266.742108,24536.76,44028.47,27738570.0,29361.85,26126.26,25301.83,30412.43,34425.0,27453.02,1,2024,14,1,0,8,8,2
2024-01-08 15:00:00,36020.706287,4885.894129,26447.08,44028.47,23871960.0,35893.649471,4935.061827,26447.08,44028.47,24354840.0,35485.319096,5244.117143,24536.76,44028.47,27500760.0,35269.212527,5336.880007,24536.76,44028.47,28482290.0,35218.527647,5284.262696,24536.76,44028.47,27923430.0,35113.239375,5263.656322,24536.76,44028.47,27706080.0,30327.94,26134.31,24692.58,29972.27,36133.55,27351.97,1,2024,15,1,0,8,8,2
2024-01-08 16:00:00,36055.00006,4880.476122,26447.08,44028.47,23819050.0,35937.555385,4916.313643,26447.08,44028.47,24170140.0,35546.641477,5195.046827,24536.76,44028.47,26988510.0,35298.477403,5337.020367,24536.76,44028.47,28483790.0,35213.608172,5298.095848,24536.76,44028.47,28069820.0,35144.269895,5259.852737,24536.76,44028.47,27666050.0,31411.98,27615.67,24536.76,28876.95,35037.32,28165.39,1,2024,16,1,0,8,8,2
2024-01-08 17:00:00,36083.139576,4881.807003,26447.08,44028.47,23832040.0,35981.070238,4898.259519,26447.08,44028.47,23992950.0,35609.555086,5142.274284,24913.75,44028.47,26442980.0,35334.152556,5330.22186,24536.76,44028.47,28411270.0,35214.561081,5312.457357,24536.76,44028.47,28222200.0,35173.522,5258.149042,24536.76,44028.47,27648130.0,33028.92,28473.55,24913.75,27658.01,33256.58,29597.19,1,2024,17,1,0,8,8,2


Time Series

![](2023-12-05-00-03-09.png)

In [24]:
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit 
# That make cross validation by time series data
# that is not a traditional cros validation. That is specified for time series.
import numpy as np


tscv = TimeSeriesSplit(n_splits=3, test_size=fh_new * 20)
score_list = []
fold = 1
unseen_preds = []
importance = [] # for feature importance. hangi feature'in ne kadar katki verdigini görmek icin

for train_index, test_index in tscv.split(X, y): # burda aslinda datayi bölüyoruz bir altta ciktisi var train_index ne demek oldugunun
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    print(X_train.shape, X_val.shape)
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)

    forecast_predcited = rf.predict(forecast_df)
    unseen_preds.append(forecast_predcited) # 3 cross validation sonuclari gelecek galiba n_split=3 oldugu icin. cünkü time serimzi 3 parcaya bölmüstü
    score = mean_absolute_error(y_val, rf.predict(X_val))
    print(f"MAE FOLD - {fold}: {score}")
    score_list.append(score)
    importance.append(rf.feature_importances_) # burdanda 3 farkli sonuclar gelecek
    fold += 1

print("CV Mean Score: ", np.mean(score_list))

(59970, 44) (3380, 44)
MAE FOLD - 1: 834.7721221301776
(63350, 44) (3380, 44)
MAE FOLD - 2: 1596.3879292603551
(66730, 44) (3380, 44)
MAE FOLD - 3: 961.1536479881656
CV Mean Score:  1130.7712331262328


In [25]:
for train_index, test_index in tscv.split(X, y):
    print(train_index, test_index)
# burdaki 0. satirin degerlerinde ilk list train index yanindaki 59956 dan sonra gelenler ise test_index olarak ele aliniyor
# yani aslinda burda verilerin paketleniyor. hangi verinin nereye gidecegi kararlastiriliyor

[    0     1     2 ... 59967 59968 59969] [59970 59971 59972 ... 63347 63348 63349]
[    0     1     2 ... 63347 63348 63349] [63350 63351 63352 ... 66727 66728 66729]
[    0     1     2 ... 66727 66728 66729] [66730 66731 66732 ... 70107 70108 70109]


In [26]:
# iki yöntem var. ya modeli kaydedip, tahminleri bu modele göre yapmak
# ama time serieslerde bazen zamana bagli olarak bir cok sey degisebiliyor.
# düzenli calibere edebilirsen modeli kaydedip kullanmak daha iyi olur yoksa burdaki gibi anlik calibre iyi olabilir
# catboost hizli yapiyor ve 24 saniyede egitti.
# ben RF ile 16 dk da egittim


In [27]:
forecasted=pd.DataFrame(unseen_preds[2],columns=["forecasting"]).set_index(forecast_df.index)
# unseen_preds[2] sonuncu cross validation sonuclarini alalim dedik
# normalde hangisini alacainda ayri bir strateji belki ortalamalarinida alabiliriz
# yukarida forecast_df datasini bugunden sonraki data olarak belirlemistik

In [28]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_fea_eng.date.iloc[-fh_new*5:], y=df_fea_eng.consumption.iloc[-fh_new*5:], name = 'Historical Data', mode = 'lines'))
fig1.add_trace(go.Scatter(x=forecasted.index, y=forecasted['forecasting'], name = 'Tarihsel Veri', mode = 'lines'))

In [29]:
f_importance = pd.concat([pd.Series(X.columns.to_list(),name='Feature'),pd.Series(importance[2],name="Importance")],axis=1).sort_values(by='Importance',ascending=True)

In [30]:
import plotly.express as px
fig2 = px.bar(f_importance.tail(20), x='Importance', y='Feature')
fig2.show()

In [None]:
# ! pip install joblib

In [35]:
rf

In [36]:
model = rf

In [40]:
from joblib import dump

dump(model, 'model.joblib')

# eger model ismini 'veysel' olarak kaydetmek istersen
# dump(model, 'veysel.joblib') yazabilirsin

['model.joblib']

In [39]:
from joblib import load

# Modeli dosyadan yükleyin
model = load('model.joblib')

# Modeli kullanarak tahminler yapabilirsiniz
# Örneğin: predictions = model.predict(X_test)

In [None]:
# from joblib import load

# Modelin tam yolunu belirtin
# model_path = "C:/models/model.joblib"

# Modeli yükleyin
# model = load(model_path)


In [41]:
model.predict(X_val)
# bir predict denemesi modeli tekrar yükleyerek

array([42108.8177, 42292.4398, 42374.3074, ..., 41978.0717, 42099.6106,
       42422.81  ])