In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor, VotingRegressor, StackingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import joblib

In [4]:
# Veriyi Yükle
df = pd.read_csv('veriOnIslendi.csv')

In [None]:
# Özellikler (features) ve hedef (target) ayrımı
X = df.drop(columns=['7'])
y = df['7']

0      290.378149
1      332.114617
2      332.258287
3      270.489957
4      320.293139
          ...    
295    309.993230
296    254.313790
297    354.848666
298    367.547577
299    467.012361
Name: 7, Length: 300, dtype: float64


In [4]:
# Eğitim ve test setine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Modeller ve parametreler
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Elastic Net': ElasticNet(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
    'KNN': KNeighborsRegressor(),
    'SVR': SVR(),
    'Polynomial Regression': Pipeline([
        ('poly', PolynomialFeatures()),
        ('linear', LinearRegression())
    ])
}

In [6]:
param_grid = {
    'Linear Regression': {},
    'Ridge Regression': {'alpha': [0.1, 1, 10]},
    'Lasso Regression': {'alpha': [0.01, 0.1, 1]},
    'Elastic Net': {'alpha': [0.1, 1], 'l1_ratio': [0.2, 0.5, 0.8]},
    'Decision Tree': {'max_depth': [3, 5, 10, 20]},
    'Random Forest': {'n_estimators': [50, 100], 'max_depth': [5, 10]},
    'Gradient Boosting': {'n_estimators': [50, 100], 'learning_rate': [0.05, 0.1]},
    'KNN': {'n_neighbors': [3, 5, 7]},
    'SVR': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']},
    'Polynomial Regression': {'poly__degree': [2, 3, 4]}  # PolynomialFeatures için degree parametresi
}

In [7]:
# Modellerin eğitimi ve değerlendirilmesi
results = []

In [8]:
for model_name, model in models.items():
    print(f"{model_name} modeli eğitiliyor...")
    grid_search = GridSearchCV(model, param_grid[model_name], cv=5, scoring='neg_root_mean_squared_error')
    grid_search.fit(X_train, y_train)
    
    best_params = grid_search.best_params_
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    results.append({
        'Model': model_name,
        'RMSE': rmse,
        'Best Parameters': best_params
    })

Linear Regression modeli eğitiliyor...
Ridge Regression modeli eğitiliyor...
Lasso Regression modeli eğitiliyor...
Elastic Net modeli eğitiliyor...




Decision Tree modeli eğitiliyor...
Random Forest modeli eğitiliyor...




Gradient Boosting modeli eğitiliyor...




KNN modeli eğitiliyor...
SVR modeli eğitiliyor...
Polynomial Regression modeli eğitiliyor...




In [16]:
results_df = pd.DataFrame(results)
print(results_df)

                   Model       RMSE  \
0      Linear Regression  35.025770   
1       Ridge Regression  35.002955   
2       Lasso Regression  34.967592   
3            Elastic Net  35.604737   
4          Decision Tree  44.437489   
5          Random Forest  35.685269   
6      Gradient Boosting  36.551196   
7                    KNN  74.913947   
8                    SVR  35.319422   
9  Polynomial Regression  38.513199   

                              Best Parameters  
0                                          {}  
1                              {'alpha': 0.1}  
2                              {'alpha': 0.1}  
3             {'alpha': 0.1, 'l1_ratio': 0.8}  
4                            {'max_depth': 5}  
5       {'max_depth': 5, 'n_estimators': 100}  
6  {'learning_rate': 0.1, 'n_estimators': 50}  
7                          {'n_neighbors': 3}  
8               {'C': 10, 'kernel': 'linear'}  
9                         {'poly__degree': 2}  


In [19]:
# Ensemble modellerin eğitimi
ensemble_results = []

In [21]:
# Bagging
bagging_model = BaggingRegressor(estimator=RandomForestRegressor(), n_estimators=50)
bagging_model.fit(X_train, y_train)
bagging_rmse = mean_squared_error(y_test, bagging_model.predict(X_test), squared=False)
ensemble_results.append({'Model': 'Bagging', 'RMSE': bagging_rmse})



In [22]:
# Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=100)
gb_model.fit(X_train, y_train)
gb_rmse = mean_squared_error(y_test, gb_model.predict(X_test), squared=False)
ensemble_results.append({'Model': 'Gradient Boosting', 'RMSE': gb_rmse})



In [23]:
# Soft Voting
voting_model = VotingRegressor([
    ('ridge', Ridge(alpha=1)),
    ('rf', RandomForestRegressor(n_estimators=100)),
    ('svr', SVR(C=1, kernel='linear'))
])
voting_model.fit(X_train, y_train)
voting_rmse = mean_squared_error(y_test, voting_model.predict(X_test), squared=False)
ensemble_results.append({'Model': 'Soft Voting', 'RMSE': voting_rmse})



In [24]:
# Stacking
stacking_model = StackingRegressor(
    estimators=[
        ('ridge', Ridge(alpha=1)),
        ('rf', RandomForestRegressor(n_estimators=100)),
        ('svr', SVR(C=1, kernel='linear'))
    ],
    final_estimator=LinearRegression()
)
stacking_model.fit(X_train, y_train)
stacking_rmse = mean_squared_error(y_test, stacking_model.predict(X_test), squared=False)
ensemble_results.append({'Model': 'Stacking', 'RMSE': stacking_rmse})
ensemble_results_df = pd.DataFrame(ensemble_results)
print(ensemble_results_df)

               Model       RMSE
0            Bagging  37.268963
1  Gradient Boosting  38.734997
2        Soft Voting  39.118926
3           Stacking  35.115046




In [27]:
# En iyi modelin seçilmesi ve kaydedilmesi
all_results = pd.concat([results_df, ensemble_results_df], ignore_index=True)
best_model_name = all_results.loc[all_results['RMSE'].idxmin(), 'Model']

if best_model_name in models:
    final_model = models[best_model_name]
else:
    final_model = {'Bagging': bagging_model, 'Gradient Boosting': gb_model, 'Soft Voting': voting_model, 'Stacking': stacking_model}[best_model_name]

final_model.fit(X_train, y_train)
joblib.dump(final_model, f'{best_model_name}_model.joblib')
print(f"En İyi Model: {best_model_name} olarak kaydedildi.")

En İyi Model: Lasso Regression olarak kaydedildi.
