## [DO NOT CHANGE THIS CELL] Import Dependencies
#### Do not use additional Packages! USE ONLY BELOW IMPORTED ONES

In [1]:
import os
import csv
import random
import numpy as np

import torch
import torch.nn as nn
from tqdm.notebook import tqdm

## [DO NOT CHANGE THIS CELL] Define file path (We use default data from colab)

In [2]:
train_csv_fp = './sample_data/california_housing_train.csv'
test_csv_fp = './sample_data/california_housing_test.csv'

## [DO NOT CHANGE THIS CELL] Define function for read CSV files

In [3]:
def read_csv(csv_fp):
    data = dict()
    all_names = []
    with open(csv_fp) as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            if i == 0:
                for name in row:
                    data[name] = list()
                    all_names.append(name)
            else:
                for ii, val in enumerate(row):
                    data[all_names[ii]].append(float(val))
    
    return data, all_names

## [DO NOT CHANGE THIS CELL] Read CSV files as a DICTIONARY

In [4]:
train_data, names = read_csv(train_csv_fp)
test_data, _ = read_csv(test_csv_fp)
print('Attributes are: ', names)

Attributes are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']


## [DO NOT CHANGE THIS CELL] Names of INPUT/OUTPUT

In [5]:
input_name = names[:-1]
output_name = names[-1]
print('Inputs are: ', input_name)
print('Outputs are: ', output_name)

Inputs are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']
Outputs are:  median_house_value


## [DO NOT CHANGE THIS CELL] Normalize the data

In [6]:
def normalize(data, mean=None, std=None):
    stat_flag = False
    if mean is None and std is None:
        mean = dict()
        std = dict()
        stat_flag = True
    
    for k, v in data.items():
        if stat_flag:
            mean[k] = np.mean(v)
            std[k] = np.std(v)
        data[k] = (np.array(v) - mean[k] ) / std[k]
    return data, mean, std

train_data, mean, std = normalize(train_data)
test_data, _, _ = normalize(test_data, mean, std)

## **[CHANGE ONLY BELOW CELL]** Code for Training

In [7]:
def train(data, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

    weight = np.random.rand(len(input_name), 1)
    bias = np.random.rand(1)

    ##### write your code to optimize weight and bias #####


    # YOUR CODE HERE #
    num_data = len(data[output_name])
    learning_rate = 0.0005

    weight = nn.Parameter(torch.tensor(weight),requires_grad=True)
    bias = nn.Parameter(torch.tensor(bias), requires_grad=True)

    for i in range(num_data):
        curr_input = np.asarray([data[name][i] for name in input_name])
        
        curr_input = torch.tensor(curr_input)

        pred_y = (curr_input.T @ weight) + bias

        curr_y = torch.tensor(data[output_name][i])

        criteria = nn.MSELoss()
        
        pred_loss = criteria(pred_y, curr_y)

        pred_loss.backward()
        #print(pred_loss)

        
        with torch.no_grad():
            weight -= learning_rate * weight.grad   
            bias -= learning_rate * bias.grad
            
        
        weight.grad.zero_()
        bias.grad.zero_()
    
    weight = weight.data.numpy()
    bias = bias.data.numpy()
    



    #######################################################

    # do not modify this return part.
    return weight, bias

##[DO NOT CHANGE THIS CELL] Code for testing

In [8]:
def test(test_data, weight, bias):
    num_data = len(test_data[output_name])

    avg_loss = 0.0
    for i in range(num_data):
        curr_input = np.asarray([test_data[name][i] for name in input_name])
        curr_answer = test_data[output_name][i]

        curr_pred = (curr_input.T @ weight) + bias

        curr_loss = np.linalg.norm(curr_answer - curr_pred)

        avg_loss += curr_loss / num_data

    return avg_loss

##[DO NOT CHANGE THIS CELL] Code for testing

In [9]:
max_iter = 50
total_loss = 0.0
for seed in tqdm(range(max_iter)):
    weight, bias = train(train_data, seed)

    test_loss = test(test_data, weight, bias)

    print('[SEED {}] TEST LOSS : {}'.format(seed, test_loss.item()))

    total_loss += test_loss / max_iter

print('TOTAL TEST LOSS: {}'.format(total_loss))

  0%|          | 0/50 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


[SEED 0] TEST LOSS : 0.5225703664850061
[SEED 1] TEST LOSS : 0.565486710530779
[SEED 2] TEST LOSS : 0.5492934184079863
[SEED 3] TEST LOSS : 0.6459874003663844
[SEED 4] TEST LOSS : 0.6596598288944044
[SEED 5] TEST LOSS : 0.5736857021532871
[SEED 6] TEST LOSS : 0.6215402460792067
[SEED 7] TEST LOSS : 0.5521964774483433
[SEED 8] TEST LOSS : 0.6367710253521576
[SEED 9] TEST LOSS : 0.549699554210568
[SEED 10] TEST LOSS : 0.6153529259173802
[SEED 11] TEST LOSS : 0.46827476783978256
[SEED 12] TEST LOSS : 0.4847978414178089
[SEED 13] TEST LOSS : 0.5616645571360886
[SEED 14] TEST LOSS : 0.5734340429302736
[SEED 15] TEST LOSS : 0.6318031620414392
[SEED 16] TEST LOSS : 0.576336272745666
[SEED 17] TEST LOSS : 0.5932975935406475
[SEED 18] TEST LOSS : 0.620969065257252
[SEED 19] TEST LOSS : 0.476057158630443
[SEED 20] TEST LOSS : 0.5821091580048319
[SEED 21] TEST LOSS : 0.5181989854207378
[SEED 22] TEST LOSS : 0.5641266323366495
[SEED 23] TEST LOSS : 0.5761001929821654
[SEED 24] TEST LOSS : 0.695697

##[DO NOT CHANGE THIS CELL] Code for Grading

In [10]:
######## DO NOT CHANGE THIS GRADING PART ########
THRESHOLD = 0.5
max_point = 40

if total_loss < THRESHOLD:
    your_point = max_point
else:
    your_point = np.exp( -0.5* (total_loss - THRESHOLD) ) * max_point

print('YOUR POINT : {}/{}'.format(your_point, max_point))
####################################################

YOUR POINT : 38.69840265693643/40
