#### Import of modules, loading data

In [47]:
#Import of modules
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest, f_regression

In [48]:
data = pd.read_excel('data/data_ford_price.xlsx')

In [49]:
data.head()

Unnamed: 0,price,year,condition,cylinders,odometer,title_status,transmission,drive,size,lat,long,weather
0,43900,2016,4,6,43500,clean,automatic,4wd,full-size,36.4715,-82.4834,59.0
1,15490,2009,2,8,98131,clean,automatic,4wd,full-size,40.468826,-74.281734,52.0
2,2495,2002,2,8,201803,clean,automatic,4wd,full-size,42.477134,-82.949564,45.0
3,1300,2000,1,8,170305,rebuilt,automatic,4wd,full-size,40.764373,-82.349503,49.0
4,13865,2010,3,8,166062,clean,automatic,4wd,,49.210949,-123.11472,


#### Preparation of data

In [50]:
#Preparation of data
data = data[['price','year', 'cylinders', 'odometer', 'lat', 'long', 'weather']]
data.dropna(inplace = True)

y = data['price']
X = data.drop(columns='price')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)


#### Getting columns for RFE and KBest

In [51]:
#Selecting features with RFE
estimator = LinearRegression()
selector = RFE(estimator, n_features_to_select=3, step=1)
selector_RFE = selector.fit(X_train, y_train)

selector_kbest = SelectKBest(f_regression, k=3)
selector_kbest.fit(X_train, y_train)
 
columns_for_rfe = selector_RFE.get_feature_names_out()
columns_KBest = selector_kbest.get_feature_names_out()



print('Columns for RFE:',columns_for_rfe)

print('Columns for KBest:',columns_KBest)

Columns for RFE: ['year' 'cylinders' 'lat']
Columns for KBest: ['year' 'cylinders' 'odometer']


#### Predicting for RFE

In [52]:
#Predicting
X = data[columns_for_rfe]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)

RFE_liner_regression = LinearRegression()
RFE_liner_regression.fit(X_train,y_train)

y_test_predict_rfe = RFE_liner_regression.predict(X_test)

MAE_RFE = mean_absolute_error(y_test, y_test_predict_rfe)

print('MAE for RFE: %.3f' % MAE_RFE)

MAE for RFE: 5096.570


#### Predicting for SelectKBest

In [53]:
X = data[columns_KBest]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)

KBest_liner_regression = LinearRegression()
KBest_liner_regression.fit(X_train,y_train)

y_test_predict_KBest = KBest_liner_regression.predict(X_test)

MAE_KBest = mean_absolute_error(y_test, y_test_predict_KBest)

print('MAE for KBest: %.3f' % MAE_KBest)

MAE for KBest: 4708.946


#### Вывод
Метод KBest на тесте показал результат лучше, чем RFE:  
MAE for KBest: 4708.946  
MAE for RFE: 5096.570
