In [60]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer

# Загрузка данных из файлов
train_data_path = pd.read_csv('train.csv')
test_data_path = pd.read_csv('test.csv')

# Предварительная обработка данных

# Отделяем колонку SalePrices от остальных признаков в таблице train.csv
X_train = train_data_path.drop(
    'SalePrice', 
    axis=1
)
y_train = train_data_path['SalePrice']

# Применение метода One-Hot Encoding для обработки категориальных переменных (преобразование НЕ числовых значение в числовые)
X_train_encoded = pd.get_dummies(X_train)
X_test_encoded = pd.get_dummies(test_data_path)

# Удаляем стобцы со значениями NaN
X_train_dropna = X_train_encoded.dropna(axis=1)
X_test_dropna = X_test_encoded.dropna(axis=1)

# Выравнивание колонок в X_train_encoded и X_test_encoded
X_train_dropna, X_test_dropna = X_train_dropna.align(
    X_test_dropna, 
    join='inner', 
    axis=1
)

# Заполнение пропущенных значений NaN средним значением
# imputer = SimpleImputer(strategy = 'mean')
# X_train_imputed = imputer.fit_transform(X_train_encoded)
# X_test_imputed = imputer.fit_transform(X_test_encoded)

# Обучение модели

# Создание и обучение модели линейной регрессии
model = LinearRegression()
model.fit(X_train_dropna, y_train)

# Предсказание на данных test.csv
# X_test = test_data_path.drop('SalePrice', axis=1)
y_predictions = model.predict(X_test_dropna)

# Оценка качества предсказаний
# mae = mean_absolute_error(y_true, y_predicted)
# print('MAE:', mae)

# Сохранение результатов
id_column = test_data_path['Id']
predictions = pd.DataFrame({
    'Id': id_column, 
    'SalePrice': y_predictions
})
predictions.to_csv(
    'predictions.csv', index = False
)

In [56]:
test_data_path

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2915,160,RM,21.0,1936,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,6,2006,WD,Normal
1455,2916,160,RM,21.0,1894,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,4,2006,WD,Abnorml
1456,2917,20,RL,160.0,20000,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,9,2006,WD,Abnorml
1457,2918,85,RL,62.0,10441,Pave,,Reg,Lvl,AllPub,...,0,0,,MnPrv,Shed,700,7,2006,WD,Normal


In [58]:
predictions

Unnamed: 0,Id,SalePrice
0,1461,112061.367322
1,1462,127269.290235
2,1463,180561.742653
3,1464,200243.700760
4,1465,210754.141298
...,...,...
1454,2915,76450.276569
1455,2916,84915.081702
1456,2917,166933.676538
1457,2918,119638.035128
