<a href="https://colab.research.google.com/github/vsevolod-BR/prictice/blob/main/liner_regression(pr_2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import linear_model
from sklearn import metrics
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [68]:
# гиперпараметры
alphas = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50]

# загрузка данных (способ, приведенный в задании)
X, y = datasets.fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
num_features = X.shape[1]
for i in range(num_features):
    column = X.columns[i]
    X_train[f"{column}_squared"] = X_train[column] ** 2
    X_test[f"{column}_squared"] = X_test[column] ** 2
    X_train[f"{column}_cubed"] = X_train[column] ** 3
    X_test[f"{column}_cubed"] = X_test[column] ** 3
    X_train[f"{column}_log"] = np.log(10 + X_train[column])
    X_test[f"{column}_log"] = np.log(10 + X_test[column])

    for j in range(i + 1, num_features):
        other_column = X.columns[j]
        X_train[f"{column}_x_{other_column}"] = X_train[column] * X_train[other_column]
        X_test[f"{column}_x_{other_column}"] = X_test[column] * X_test[other_column]

X_train()

In [71]:
# обучение и преобразование обучающей и тестовой выборки
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train = pd.DataFrame(X_scaler.transform(X_train), columns=X_scaler.get_feature_names_out())
X_test = pd.DataFrame(X_scaler.transform(X_test), columns=X_scaler.get_feature_names_out())

In [72]:
# Задание 1. Без регуляризации

model = linear_model.LinearRegression()
model.fit(X_train, y_train)
print(f'MSE metrics train: {metrics.mean_squared_error(y_train, model.predict(X_train))}')
print(f'MSE metrics test: {metrics.mean_squared_error(y_test, model.predict(X_test))}')

MSE metrics train: 0.5179331255246699
MSE metrics test: 0.5558915986952442


In [73]:
# Задание 1. Ridge

# инициализируем переменную для отслеживания лучшего MSE
# np.inf - константа библиотеки numpy для обозначения положительной бесконечности (числа, большего чем любое вычисляемое)
# np.inf - используется как точка отсчета для поиска наименьшего в цикле
best_mse_train = np.inf
best_mse_test = np.inf
best_alpha = None

for i in alphas:
  model = linear_model.Ridge(alpha=i)
  model.fit(X_train, y_train)
  train_mse = metrics.mean_squared_error(y_train, model.predict(X_train))
  test_mse = metrics.mean_squared_error(y_test, model.predict(X_test))
  if test_mse < best_mse_test:
    best_mse_train = train_mse
    best_mse_test = test_mse
    best_alpha = i

print(f'Best MSE metrics train (Ridge): {best_mse_train}')
print(f'Best MSE metrics test (Ridge): {best_mse_test}')
print(f'Best hiperparam (Ridge): {best_alpha}')

Best MSE metrics train (Ridge): 0.5181476333801714
Best MSE metrics test (Ridge): 0.5543310655086396
Best hiperparam (Ridge): 50


In [74]:
# Задание 1. Lasso

best_mse_train = np.inf
best_mse_test = np.inf
best_alpha = None

for i in alphas:
  model = linear_model.Lasso(alpha=i)
  model.fit(X_train, y_train)
  train_mse = metrics.mean_squared_error(y_train, model.predict(X_train))
  test_mse = metrics.mean_squared_error(y_test, model.predict(X_test))
  if test_mse < best_mse_test:
    best_mse_train = train_mse
    best_mse_test = test_mse
    best_alpha = i

print(f'Best MSE metrics train (Ridge): {best_mse_train}')
print(f'Best MSE metrics test (Ridge): {best_mse_test}')
print(f'Best hiperparam (Ridge): {best_alpha}')

Best MSE metrics train (Ridge): 0.52338408362522
Best MSE metrics test (Ridge): 0.5482548967938964
Best hiperparam (Ridge): 0.01


In [75]:
# Задание 1. ElasticNet

best_mse_train = np.inf
best_mse_test = np.inf
best_alpha = None

for i in alphas:
  model = linear_model.ElasticNet(alpha=i)
  model.fit(X_train, y_train)
  train_mse = metrics.mean_squared_error(y_train, model.predict(X_train))
  test_mse = metrics.mean_squared_error(y_test, model.predict(X_test))
  if test_mse < best_mse_test:
    best_mse_train = train_mse
    best_mse_test = test_mse
    best_alpha = i

print(f'Best MSE metrics train (Ridge): {best_mse_train}')
print(f'Best MSE metrics test (Ridge): {best_mse_test}')
print(f'Best hiperparam (Ridge): {best_alpha}')

Best MSE metrics train (Ridge): 0.5211546671597785
Best MSE metrics test (Ridge): 0.5499533403867808
Best hiperparam (Ridge): 0.01


In [None]:
model.coef_