

# Data

> The data contains the following columns:

> * '`Avg. Area Income`': Avg. Income of residents of the city house is located in.
> * '`Avg. Area House Age`': Avg Age of Houses in same city
> * '`Avg. Area Number of Rooms`': Avg Number of Rooms for Houses in same city
> * '`Avg. Area Number of Bedrooms`': Avg Number of Bedrooms for Houses in same city
> * '`Area Population`': Population of city hou  se is located in
> * '`Price`': Price that the house sold at
> * '`Address`': Address for the house


In [73]:
!pip install hvplot

# 📤 Import Libraries

In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import hvplot.pandas
%matplotlib inline

# sns.set_style("whitegrid")
# plt.style.use("fivethirtyeight")

## Check out the Data

In [75]:
USAhousing = pd.read_csv('/kaggle/input/usa-housing/USA_Housing.csv')
USAhousing.head()

In [76]:
USAhousing.info()

In [77]:
USAhousing.describe()

In [78]:
USAhousing.columns

# 📊 Exploratory Data Analysis (EDA)

Let's create some simple plots to check out the data!

In [79]:
sns.pairplot(USAhousing)

In [80]:
USAhousing.hvplot.hist(by='Price', subplots=False, width=1000)

In [81]:
USAhousing.hvplot.hist("Price")

In [82]:
USAhousing.hvplot.scatter(x='Avg. Area House Age', y='Price')

In [83]:
USAhousing.hvplot.scatter(x='Avg. Area Income', y='Price')

In [84]:
USAhousing.columns

In [85]:
sns.heatmap(USAhousing.corr(), annot=True)

# Training a Linear Regression Model


## X and y arrays

In [86]:
features = USAhousing[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
               'Avg. Area Number of Bedrooms', 'Area Population']]
label = USAhousing['Price']
features

## Train Test Split

In [87]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.3, random_state=42)

# Preparing Data For Linear Regression


In [88]:
import torch
from torch.utils import data

In [89]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('std_scalar', StandardScaler())
])

X_train = torch.from_numpy(pipeline.fit_transform(X_train))
X_test = torch.from_numpy(pipeline.transform(X_test))
X_train, X_test

In [90]:
y_train = torch.from_numpy(np.array(y_train)).reshape(-1,1)
y_test = torch.from_numpy(np.array(y_test)).reshape(-1,1)
y_train, y_test

## LINEAR REGRESSION

In [91]:
def load_array(data_arrays, batch_size, is_train=True): 
    """Construct a PyTorch data iterator."""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [92]:
from torch import nn

net = nn.Sequential(nn.Linear(5, 1))

In [93]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

In [94]:
loss = nn.MSELoss()

In [95]:
trainer = torch.optim.SGD(net.parameters(), lr=0.01)

In [96]:
batch_size = 100
data_iter = load_array((X_train, y_train), batch_size)

In [103]:
num_epochs = 25
for epoch in range(num_epochs):
    for A, b in data_iter:
        l = loss(net(A.float()) ,b.float())
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(X_train.float()), y_train.float())
    print(f'epoch {epoch + 1}, loss {l:f}')

In [104]:
w = net[0].weight.data
b = net[0].bias.data
w = w[0]
w, b

# PREDICT WITH TEST DATA SET

In [105]:
pred = torch.mv(X_test.float(), w) + b
true_val = y_test.reshape(1,-1)[0]
pred, true_val, y_test

In [106]:
pd.DataFrame({'True Values': true_val, 'Predicted Values': pred}).hvplot.scatter(x='True Values', y='Predicted Values')