In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tabulate import tabulate
import joblib

df = pd.read_parquet("../PreprocessedData/dataLog.parquet")

# Özelliklerden 'Vehicle_Age' çıkarıldı
X = df[['Power', 'Mileage', 'Brand_encoded', 'Model_encoded', 'Fuel_encoded', 'Transmission_Manual']]
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, random_state=42),
    "LightGBM": LGBMRegressor(n_estimators=100, random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {"MSE": mse, "MAE": mae, "R2": r2}
    
    file_name = f"{name}Model.pkl"
    joblib.dump(model, file_name)
    print(f"Model '{name}' adıyla '{file_name}' dosyasına kaydedildi.")

results_df = pd.DataFrame(results).T

print(tabulate(results_df, headers="keys", tablefmt="pretty"))


Model 'LinearRegression' adıyla 'LinearRegressionModel.pkl' dosyasına kaydedildi.
Model 'RandomForest' adıyla 'RandomForestModel.pkl' dosyasına kaydedildi.
Model 'XGBoost' adıyla 'XGBoostModel.pkl' dosyasına kaydedildi.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005468 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 836
[LightGBM] [Info] Number of data points in the train set: 862709, number of used features: 6
[LightGBM] [Info] Start training from score 10.358890
Model 'LightGBM' adıyla 'LightGBMModel.pkl' dosyasına kaydedildi.
+------------------+----------------------+---------------------+--------------------+
|                  |         MSE          |         MAE         |         R2         |
+------------------+----------------------+---------------------+--------------------+
| LinearRegression | 0.06748200460447042  | 0.1

In [2]:
# Eğitim ve test hatalarını hesaplama
for name, model in models.items():
    train_mse = mean_squared_error(y_train, model.predict(X_train))
    test_mse = mean_squared_error(y_test, model.predict(X_test))
    
    train_r2 = model.score(X_train, y_train)
    test_r2 = model.score(X_test, y_test)
    
    print(f"Model: {name}")
    print(f"  Eğitim MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}")
    print(f"  Eğitim R²: {train_r2:.4f}, Test R²: {test_r2:.4f}")
    print("-" * 50)


Model: LinearRegression
  Eğitim MSE: 0.0695, Test MSE: 0.0675
  Eğitim R²: 0.7888, Test R²: 0.7940
--------------------------------------------------
Model: RandomForest
  Eğitim MSE: 0.0055, Test MSE: 0.0213
  Eğitim R²: 0.9833, Test R²: 0.9349
--------------------------------------------------
Model: XGBoost
  Eğitim MSE: 0.0218, Test MSE: 0.0221
  Eğitim R²: 0.9338, Test R²: 0.9326
--------------------------------------------------
Model: LightGBM
  Eğitim MSE: 0.0246, Test MSE: 0.0247
  Eğitim R²: 0.9252, Test R²: 0.9247
--------------------------------------------------
