In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_samples = 1000

df = pd.DataFrame({
    "carat": np.round(np.random.uniform(0.2, 5.0, n_samples), 2),
    "depth": np.round(np.random.uniform(55, 65, n_samples), 2),
    "table": np.round(np.random.uniform(50, 70, n_samples), 2),
    "x": np.round(np.random.uniform(3.0, 10.0, n_samples), 2),
    "y": np.round(np.random.uniform(3.0, 10.0, n_samples), 2),
    "z": np.round(np.random.uniform(2.0, 6.0, n_samples), 2),
    "clarity_score": np.random.randint(1, 10, n_samples),
    "color_score": np.random.randint(1, 8, n_samples),
    "cut_score": np.random.randint(1, 5, n_samples)
})

# Fiyat: karma bir fonksiyonla üretelim
df["price"] = (
    df["carat"] * 2000 +
    df["depth"] * 10 +
    df["table"] * 5 +
    df["clarity_score"] * 300 +
    df["color_score"] * 200 +
    df["cut_score"] * 150 +
    np.random.normal(0, 500, n_samples)
).round(2)

df.to_csv("train.csv", index=False)
df.head()


Unnamed: 0,carat,depth,table,x,y,z,clarity_score,color_score,cut_score,price
0,2.0,56.85,55.23,7.71,7.0,3.57,1,3,3,6270.17
1,4.76,60.42,54.94,8.58,8.64,3.89,1,6,3,12028.74
2,3.71,63.73,68.13,4.75,8.32,5.42,3,2,1,10865.45
3,3.07,62.32,54.99,7.37,4.08,3.36,5,7,2,10417.25
4,0.95,63.07,55.44,7.0,4.04,5.48,3,3,1,4318.55


Veri Seti İçeriği (9 özellik + hedef)
carat, depth, table, x, y, z: ölçüm özellikleri

clarity_score, color_score, cut_score: sayısal olarak encode edilmiş kalite puanları

price: hedef değişken

yapay veri hazırladık şimdi modelleme yapablım


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np

# Veriyi oku
df = pd.read_csv("train.csv")

# Özellik ve hedef ayrımı
X = df.drop("price", axis=1)
y = df["price"]

# Train/Test bölme
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model oluştur ve eğit
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Tahmin ve değerlendirme
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("✅ RMSE:", round(rmse, 2))
print("✅ R² Score:", round(r2, 4))


✅ RMSE: 605.16
✅ R² Score: 0.9549


rmse düşük hata
r2 score güclü bir model

In [4]:
import joblib

# Model ve özellik adlarını kaydet
joblib.dump(model, "rf_model.pkl")
joblib.dump(list(X.columns), "model_columns.pkl")


['model_columns.pkl']