In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score

 1. Загрузка данных

In [None]:
data = pd.read_csv("forestfires.csv")

# Кодируем категориальные признаки (month, day)
for col in ["month", "day"]:
    data[col] = LabelEncoder().fit_transform(data[col])

# Формируем X и y
X = data.drop("area", axis=1).values
y = np.log1p(data["area"].values)   # лог-трансформация целевой переменной

2. Деление на train/test

In [None]:
def custom_train_test_split(X, y, test_size=0.3, random_state=42):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    split = int(len(X) * (1 - test_size))
    train_idx, test_idx = indices[:split], indices[split:]
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

X_train, X_test, y_train, y_test = custom_train_test_split(X, y)

3. Линейная регрессия 

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
print("=== Линейная регрессия ===")
print("R2 (train):", r2_score(y_train, lr.predict(X_train)))
print("R2 (test) :", r2_score(y_test, lr.predict(X_test)))
print()

 4. Полиномиальная регрессия 

In [None]:
train_scores, test_scores = [], []
degrees = range(1, 6)

for d in degrees:
    poly = PolynomialFeatures(d)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)
    
    model = LinearRegression()
    model.fit(X_train_poly, y_train)
    
    train_scores.append(r2_score(y_train, model.predict(X_train_poly)))
    test_scores.append(r2_score(y_test, model.predict(X_test_poly)))

plt.figure(figsize=(8, 5))
plt.plot(degrees, train_scores, marker="o", label="Train R²")
plt.plot(degrees, test_scores, marker="s", label="Test R²")
plt.xlabel("Степень полинома")
plt.ylabel("R² score")
plt.title("Полиномиальная регрессия")
plt.legend()
plt.grid(True)
plt.show()

5. Регуляризация (Ridge и Lasso) 

In [None]:
alphas = [0.01, 0.1, 1, 10, 100,1000,10000,1000000]

# Ridge
ridge_train, ridge_test = [], []
for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(X_train, y_train)
    ridge_train.append(r2_score(y_train, ridge.predict(X_train)))
    ridge_test.append(r2_score(y_test, ridge.predict(X_test)))

plt.figure(figsize=(8, 5))
plt.plot(alphas, ridge_train, marker="o", label="Train R² (Ridge)")
plt.plot(alphas, ridge_test, marker="s", label="Test R² (Ridge)")
plt.xscale("log")
plt.xlabel("Alpha")
plt.ylabel("R² score")
plt.title("Ridge регрессия")
plt.legend()
plt.grid(True)
plt.show()

# Lasso
lasso_train, lasso_test = [], []
for a in alphas:
    lasso = Lasso(alpha=a, max_iter=5000)
    lasso.fit(X_train, y_train)
    lasso_train.append(r2_score(y_train, lasso.predict(X_train)))
    lasso_test.append(r2_score(y_test, lasso.predict(X_test)))

plt.figure(figsize=(8, 5))
plt.plot(alphas, lasso_train, marker="o", label="Train R² (Lasso)")
plt.plot(alphas, lasso_test, marker="s", label="Test R² (Lasso)")
plt.xscale("log")
plt.xlabel("Alpha")
plt.ylabel("R² score")
plt.title("Lasso регрессия")
plt.legend()
plt.grid(True)
plt.show()
