In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Veri setini yükle
df = pd.read_csv("healthcare_dataset.csv")

# Sütun adlarını sadeleştir
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_", regex=False)

# Tarih sütunlarını datetime'a çevir
df["date_of_admission"] = pd.to_datetime(df["date_of_admission"])
df["discharge_date"] = pd.to_datetime(df["discharge_date"])

# Length of stay sütununu oluştur
df["length_of_stay"] = (df["discharge_date"] - df["date_of_admission"]).dt.days

# Tarih sütunlarını çıkar
df.drop(["date_of_admission", "discharge_date"], axis=1, inplace=True)

# Hedef ve özellikler
y = df['billing_amount']
X = df.drop('billing_amount', axis=1)

# Veriyi eğitim ve test olarak ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LabelEncoder'ları her sütun için ayrı sakla
label_encoders = {}

from sklearn.preprocessing import OrdinalEncoder

categorical_cols = [col for col in X_train.columns if X_train[col].dtype == 'object']

# Küçük harfe çevirme (normalize etmek için)
for col in categorical_cols:
    X_train[col] = X_train[col].str.lower()
    X_test[col] = X_test[col].str.lower()

# OrdinalEncoder ile encode et, bilinmeyen kategoriler -1 olarak kodlanacak
encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)

# fit_transform sadece eğitim setinde
X_train[categorical_cols] = encoder.fit_transform(X_train[categorical_cols])

# transform test setinde, yeni kategoriler -1 olarak kodlanır, hata vermez
X_test[categorical_cols] = encoder.transform(X_test[categorical_cols])


# Model ve skor ayarlaması
mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)

models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42)
}

for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, scoring=mse_scorer, cv=5)
    rmse_scores = np.sqrt(-scores)
    
    print(f"📊 {name}")
    print(f"Cross-validation RMSE scores: {rmse_scores}")
    print(f"Ortalama RMSE: {rmse_scores.mean():.2f}")
    print("-" * 40)


📊 Linear Regression
Cross-validation RMSE scores: [14186.4640898  14303.77669531 14165.720539   14310.44081912
 14242.9253483 ]
Ortalama RMSE: 14241.87
----------------------------------------
📊 Decision Tree
Cross-validation RMSE scores: [19121.1771061  19371.87221927 19212.6170044  19561.53115691
 19071.25934573]
Ortalama RMSE: 19267.69
----------------------------------------


In [8]:
from sklearn.model_selection import GridSearchCV

# Parametre aralığı
param_grid = {
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# GridSearchCV
grid_search = GridSearchCV(
    estimator=DecisionTreeRegressor(random_state=42),
    param_grid=param_grid,
    scoring='neg_mean_squared_error',
    cv=5,
    n_jobs=-1
)

# Eğitim verisiyle fit et
grid_search.fit(X_train, y_train)

# Sonuçları al
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

print("🔍 En iyi hiperparametreler:")
print(best_params)


🔍 En iyi hiperparametreler:
{'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [9]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = best_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("📈 Test Verisi Performansı")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Skoru: {r2:.4f}")

📈 Test Verisi Performansı
MAE: 12445.51
RMSE: 14521.56
R² Skoru: -0.0602


In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Test seti tahmini
y_pred = best_model.predict(X_test)

# Değerlendirme
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("📈 Test Verisi Değerlendirme Sonuçları")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

📈 Test Verisi Değerlendirme Sonuçları
Mean Absolute Error (MAE): 12445.51
Mean Squared Error (MSE): 210875689.20
Root Mean Squared Error (RMSE): 14521.56
R² Score: -0.0602
