In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load your dataset
dataset = pd.read_csv("Housing.csv")

In [2]:
numerical_features = ["area", "bedrooms", "stories","parking"]

boolean_features = ["mainroad", "basement", "guestroom", "hotwaterheating", "airconditioning", "prefarea"]

categorical_features = ["furnishingstatus"]


In [3]:
scaler = StandardScaler()

dataset[numerical_features] = scaler.fit_transform(dataset[numerical_features])

dataset[boolean_features] = dataset[boolean_features].apply(lambda col:col.map({"yes":1, "no":0}))
dataset[categorical_features] = dataset[categorical_features].apply(lambda col:col.map({"furnished":1, "semi-furnished":0.5,"unfurnished":0}))

In [4]:
dataset.head(10)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,1.046726,1.403419,2,1.378217,1,0,0,0,1,1.517692,1,1.0
1,12250000,1.75701,1.403419,4,2.532024,1,0,0,0,1,2.679409,0,1.0
2,12250000,2.218232,0.047278,2,0.22441,1,0,1,0,0,1.517692,1,0.5
3,12215000,1.083624,1.403419,2,0.22441,1,0,1,0,1,2.679409,1,1.0
4,11410000,1.046726,1.403419,1,0.22441,1,1,1,0,1,1.517692,0,1.0
5,10850000,1.083624,0.047278,3,-0.929397,1,0,1,0,1,1.517692,1,0.5
6,10150000,1.581745,1.403419,3,2.532024,1,0,0,0,1,1.517692,1,0.5
7,10150000,5.096263,2.75956,3,0.22441,1,0,0,0,0,-0.805741,0,0.0
8,9870000,1.360358,1.403419,1,0.22441,1,1,1,0,1,1.517692,1,1.0
9,9800000,0.276484,0.047278,2,2.532024,1,1,0,0,1,0.355976,1,0.0


In [5]:
dataset["price"] = dataset["price"]/1e6


In [6]:
target = dataset["price"].values

features = dataset.drop("price", axis=1).values


X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [7]:
X_train

array([[ 0.3917898 ,  0.04727831,  2.        , ...,  0.35597563,
         0.        ,  1.        ],
       [ 0.94525725,  0.04727831,  2.        , ...,  2.67940935,
         0.        ,  0.5       ],
       [-0.61552098, -1.30886273,  1.        , ...,  1.51769249,
         0.        ,  1.        ],
       ...,
       [-0.30004453,  0.04727831,  2.        , ...,  0.35597563,
         0.        ,  1.        ],
       [-0.51220705, -1.30886273,  1.        , ..., -0.80574124,
         0.        ,  0.        ],
       [ 0.16117836,  0.04727831,  2.        , ...,  0.35597563,
         0.        ,  0.5       ]])

In [8]:
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Mean Squared Error: 1.7717511165940407


In [9]:
import joblib

# Save the model to a file
joblib.dump(model, "linear_regression_model.pkl")
print("Model saved as linear_regression_model.pkl")

Model saved as linear_regression_model.pkl


In [10]:
# Load the model
loaded_model = joblib.load("linear_regression_model.pkl")

# Use the loaded model for predictions
new_predictions = loaded_model.predict(X_test)
print(new_predictions)


[5.20369171 7.25700402 3.0628286  4.55959165 3.33293231 3.56308068
 5.64546631 6.41397967 2.75583155 2.66893866 9.5706003  2.82743151
 3.19568626 3.35226399 3.7138795  5.30108824 2.98792027 4.81079982
 4.3830317  3.52509219 5.7962595  5.8400007  2.76021461 4.76259015
 5.20475574 7.51554272 3.25468169 5.23616446 8.17852317 3.43416616
 6.44392159 3.34600478 6.74232474 4.15493684 3.58915247 5.78812593
 4.76837018 4.39168404 3.21765705 4.63819662 4.52216028 3.54128406
 7.23813612 4.02151569 3.70197877 4.29887956 6.70500402 3.99346652
 3.79818505 3.45182156 7.29399687 2.83290527 4.37869815 4.46800285
 3.7146239  2.7184669  7.52444965 2.95043715 4.19459629 2.79582777
 4.80178873 3.6182303  5.09168577 4.24812271 4.7299642  4.62098294
 7.21584677 3.48510619 5.93382014 6.23658098 4.80992361 5.13092023
 4.52728287 8.00686907 3.54392764 5.46308605 3.90244129 4.66106363
 4.82537089 4.27680347 7.75407271 4.00857323 6.50097791 5.38679601
 2.78534301 6.83635637 2.63855349 3.63423199 8.00407241 8.0483