In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

### Загрузка данных

In [None]:
data = pd.read_csv('Mobile-Price-Prediction-cleaned_data.csv')
data.head()

In [None]:
target = 'Price'
X = data.drop(columns = [target])
y = data[target]
X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Масштабирование

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

In [None]:
st_scaler = StandardScaler()

In [None]:
st_scaler.fit(X_train)

In [None]:
X_train_st = st_scaler.transform(X_train)
X_test_st = st_scaler.transform(X_test)

In [None]:
min_max_scaler = MinMaxScaler()

In [None]:
X_train_min_max = min_max_scaler.fit_transform(X_train)
X_test_min_max = min_max_scaler.transform(X_test)

In [None]:
X_train.shape, X_train_st.shape, X_train_min_max.shape

### Применение алгоритма

In [None]:
lr = Ridge()
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

In [None]:
lr = Ridge()
lr.fit(X_train_st,y_train)

In [None]:
y_pred = lr.predict(X_test_st)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

In [None]:
lr = Ridge()
lr.fit(X_train_min_max,y_train)

In [None]:
y_pred = lr.predict(X_test_min_max)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

### Использование Pipeline

In [None]:
from sklearn.pipeline import Pipeline

https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html

In [None]:
pipe_st = Pipeline(steps=[
    ('st_scaler', StandardScaler()),
    ('ridge', Ridge())
])

In [None]:
pipe_st.fit(X_train, y_train)

In [None]:
y_pred = pipe_st.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

### Использование make_pipeline

In [None]:
from sklearn.pipeline import make_pipeline

https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html

In [None]:
pipe_min_max = make_pipeline(MinMaxScaler(), Ridge())

In [None]:
pipe_min_max.fit(X_train, y_train)

In [None]:
y_pred = pipe_min_max.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

### Использование PolynomialFeatures

In [None]:
from sklearn.preprocessing import PolynomialFeatures

https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

In [None]:
poly = PolynomialFeatures(degree=2)

In [None]:
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
X_train_poly.shape, X_test_poly.shape

In [None]:
poly.get_feature_names_out()

In [None]:
pipe_poly = Pipeline(steps=[
    ('poly', PolynomialFeatures(degree=2)),
    ('st_scaler', StandardScaler()),
    ('ridge', Ridge())
])

In [None]:
pipe_poly.fit(X_train, y_train)

In [None]:
y_pred = pipe_poly.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

### Подбор коэффициента регуляризации

In [None]:
from sklearn.linear_model import RidgeCV, LassoCV

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html

In [None]:
pipe_poly = Pipeline(steps=[
    ('poly', PolynomialFeatures(degree=2)),
    ('st_scaler', StandardScaler()),
    ('ridge', RidgeCV())
])
pipe_poly.fit(X_train, y_train)
y_pred = pipe_poly.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')

In [None]:
pipe_poly.steps[2][1].alpha_

In [None]:
pipe_poly.steps[2][1].alphas

### Подбор гиперпараметров

In [None]:
from sklearn.model_selection import GridSearchCV

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

In [None]:
pipe_poly = Pipeline(steps=[
    ('poly', PolynomialFeatures(degree=2)),
    ('st_scaler', StandardScaler()),
    ('ridge', Ridge())
])

param_grid = [{
    'poly__degree': [1,2,3],
    'ridge__alpha': [ 0.1, 1, 10],
    'ridge__solver' : [ 'sparse_cg', 'sag', 'saga'],
    }]


gridsearch = GridSearchCV(estimator = pipe_poly,
                          param_grid = param_grid,
                          cv = 3,
                          verbose=0)

gridsearch.fit(X_train, y_train)

In [None]:
gridsearch.best_params_

In [None]:
gridsearch.best_score_

In [None]:
y_pred = gridsearch.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse:.2f}')
print(f'R2: {r2:.2f}')