In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [7]:
df = pd.read_csv("C:\\Users\\gabde\\Downloads\\synthetic_db.csv")
df = df[["segment", "magnitude", "engine_cost", "sum"]]
df

Unnamed: 0,segment,magnitude,engine_cost,sum
0,1,1,27683.571008,41774.757899
1,1,2,53645.343563,73990.154964
2,1,3,65036.314025,116019.808937
3,1,1,70791.487975,115559.326379
4,1,2,51001.016010,100949.117128
...,...,...,...,...
235,3,2,101223.612768,155684.668243
236,3,3,146514.153905,247550.276153
237,3,1,101154.015473,127234.502360
238,3,2,121727.398068,224795.744136


In [24]:
data = pd.get_dummies(df, columns=['segment', 'magnitude'], drop_first=True)

In [25]:
data

Unnamed: 0,engine_cost,sum,segment_2,segment_3,magnitude_2,magnitude_3
0,27683.571008,41774.757899,False,False,False,False
1,53645.343563,73990.154964,False,False,True,False
2,65036.314025,116019.808937,False,False,False,True
3,70791.487975,115559.326379,False,False,False,False
4,51001.016010,100949.117128,False,False,True,False
...,...,...,...,...,...,...
235,101223.612768,155684.668243,False,True,True,False
236,146514.153905,247550.276153,False,True,False,True
237,101154.015473,127234.502360,False,True,False,False
238,121727.398068,224795.744136,False,True,True,False


In [26]:
X = data[['engine_cost', 'segment_2', 'segment_3', 'magnitude_2', 'magnitude_3']]
y = data['sum']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [27]:
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42)
}

In [28]:
results = {}
for name, model in models.items():
    # Обучение
    model.fit(X_train, y_train)
    
    # Предсказание
    y_pred = model.predict(X_test)
    
    # Метрики
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {"MAE": mae, "RMSE": rmse, "R²": r2}

In [29]:
for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    print(f"  MAE: {metrics['MAE']:.2f}")
    print(f"  RMSE: {metrics['RMSE']:.2f}")
    print(f"  R²: {metrics['R²']:.4f}")

# Выбор лучшей модели по RMSE
best_model_name = min(results, key=lambda k: results[k]['RMSE'])
print(f"\nBest model: {best_model_name}")


Linear Regression:
  MAE: 11392.25
  RMSE: 14167.55
  R²: 0.8915

Random Forest:
  MAE: 13878.00
  RMSE: 17412.42
  R²: 0.8361

Gradient Boosting:
  MAE: 12661.57
  RMSE: 17158.30
  R²: 0.8409

Best model: Linear Regression
