In [3]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.utils.data.dataloader as dataloader
import torch.nn.functional as F
import pandas as pd

In [9]:
url1 = 'https://raw.githubusercontent.com/tvelichkovt/PyTorch/master/california_housing_train.csv'
train_data = train_csv = pd.read_csv(url1)

print(train_csv.head())

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -114.31     34.19                15.0       5612.0          1283.0   
1    -114.47     34.40                19.0       7650.0          1901.0   
2    -114.56     33.69                17.0        720.0           174.0   
3    -114.57     33.64                14.0       1501.0           337.0   
4    -114.57     33.57                20.0       1454.0           326.0   

   population  households  median_income  median_house_value  
0      1015.0       472.0         1.4936             66900.0  
1      1129.0       463.0         1.8200             80100.0  
2       333.0       117.0         1.6509             85700.0  
3       515.0       226.0         3.1917             73400.0  
4       624.0       262.0         1.9250             65500.0  


In [17]:
print(train_csv.columns)

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'],
      dtype='object')


In [23]:
train_mean =  train_csv.mean()
train_std = train_csv.std()

print("train_mean is ", train_mean, "train_std is", train_std)

train_mean is  longitude               -119.562108
latitude                  35.625225
housing_median_age        28.589353
total_rooms             2643.664412
total_bedrooms           539.410824
population              1429.573941
households               501.221941
median_income              3.883578
median_house_value    207300.912353
dtype: float64 train_std is longitude                  2.005166
latitude                   2.137340
housing_median_age        12.586937
total_rooms             2179.947071
total_bedrooms           421.499452
population              1147.852959
households               384.520841
median_income              1.908157
median_house_value    115983.764387
dtype: float64


In [11]:
# creating custom dataset class

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file):
        self.data_frame = pd.read_csv(csv_file)
        self.norm_data = (self.data_frame - train_mean)/train_std  

    def __len__(self):
        return len(self.norm_data)

    def __getitem__(self, idx):
        data = self.norm_data.iloc[idx, 2:8].values # keep all except median_house_value  
        label = self.norm_data.iloc[idx, 8:9].values # keep only median_house_value  
        
        data = torch.tensor(data, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.float32)

        return {'data': data, 'label':label}

In [12]:
dataset_len = len(train_data)
train_data = torch.utils.data.DataLoader(dataset=train_data, shuffle=True, batch_size=10)

In [27]:
print("dataset_len is", dataset_len)

dataset_len is 17000


In [31]:
# our model

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(in_features=6, out_features=18, bias=True)
        self.fc2 = nn.Linear(in_features=18, out_features=18, bias=True)
        self.fc3 = nn.Linear(in_features=18, out_features=12, bias=True)
        self.fc4 = nn.Linear(in_features=12, out_features=1, bias=True)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

net = Network()

In [32]:
# loss and optimizer
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [16]:
# training the model

for epoch in range(15):
    running_loss = 0.0
    for i, value in enumerate(train_data):
        inputs = value['data']
        labels = value['label']
        prediction = net(inputs) # passing inputs to our model to get prediction
        loss = criterion(prediction, labels)
        running_loss += loss.item() * inputs.size(0) # multiplying with batch size
        optimizer.zero_grad() # reset all gradient calculation
        loss.backward() # this is backpropagation to calculate gradients
        optimizer.step() # applying gradient descent to update weights and bias values

print('epoch: ', epoch, ' loss: ', running_loss/dataset_len)

KeyError: 7858