In [1]:
import turicreate
import numpy as np
sales = turicreate.SFrame('home_data.sframe/')

In [9]:
import math

In [4]:
print(sales[['bedrooms','bathrooms']])

+----------+-----------+
| bedrooms | bathrooms |
+----------+-----------+
|   3.0    |    1.0    |
|   3.0    |    2.25   |
|   2.0    |    1.0    |
|   4.0    |    3.0    |
|   3.0    |    2.0    |
|   4.0    |    4.5    |
|   3.0    |    2.25   |
|   3.0    |    1.5    |
|   3.0    |    1.0    |
|   3.0    |    2.5    |
+----------+-----------+
[21613 rows x 2 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [5]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1
    features = ['constant'] + features
    features_sframe = data_sframe[features]
    features_matrix = features_sframe.to_numpy()
    output_sarray = data_sframe[output]
    output_array = output_sarray.to_numpy()
    
    return (features_matrix, output_array)

In [6]:
def predict_outcome(feature_matrix, weights):
    return np.dot(feature_matrix, weights)

In [18]:
def feature_derivative(errors, feature):
    return 2*np.dot(errors, feature)

In [33]:
def regression_gradient_descent(feature_matrix, output, initial_weights,
  step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        predictions = predict_outcome(feature_matrix, weights)
        errors = predictions - output
        
        feature_gradients = feature_derivative(errors, feature_matrix.T)
        weights = weights - feature_gradients * step_size
        gradient_magnitude = math.sqrt(np.sum(feature_gradients**2))
        print("Gradient magnitude:" + str(gradient_magnitude))
        if gradient_magnitude < tolerance:
            converged = True
    return weights

In [11]:
train_data,test_data = sales.random_split(.8,seed=0)

In [12]:
simple_features = ['sqft_living']
my_output = 'price'
simple_feature_matrix,output = get_numpy_data(train_data,simple_features,my_output)
initial_weights = np.array([-47000.,1.])
step_size = 7e-12
tolerance = 2.5e7

In [34]:
simple_weights = regression_gradient_descent(simple_feature_matrix,
  output, initial_weights, step_size, tolerance)

Gradient magnitude:50551530774393.43
Gradient magnitude:13127451023549.023
Gradient magnitude:3408996082527.6133
Gradient magnitude:885263580099.7512
Gradient magnitude:229889265719.7766
Gradient magnitude:59698688259.6684
Gradient magnitude:15502826422.108559
Gradient magnitude:4025844401.513712
Gradient magnitude:1045449748.1580132
Gradient magnitude:271487891.8631257
Gradient magnitude:70504114.69200751
Gradient magnitude:18320016.741784383


# Q1

In [35]:
simple_weights

array([-46999.88716555,    281.91211912])

In [62]:
test_simple_feature_matrix,test_output = get_numpy_data(test_data,simple_features,my_output)

# Q2

In [40]:
predict_outcome(test_simple_feature_matrix[0],simple_weights)

356134.4431709297

In [63]:
test_output[0]

310000.0

In [42]:
def regression_rss(feature_matrix, output, weights):
    predictions = predict_outcome(feature_matrix, weights)
    return np.sum((output-predictions)**2)

In [47]:
print(regression_rss(simple_feature_matrix,output,simple_weights))
print(regression_rss(test_simple_feature_matrix,test_output,simple_weights))

1201918394833303.8
275400047593155.94


In [50]:
model2_features  = ['sqft_living','sqft_living15']
model2_output = 'price'
model2_initial_weights = [-100000,1,1]
model2_step_size = 4e-12
model2_tolerance = 1e9
feature_matrix,output = get_numpy_data(train_data,model2_features,model2_output)

In [51]:
model2_weights = regression_gradient_descent(feature_matrix,output,
    model2_initial_weights,model2_step_size,model2_tolerance)

Gradient magnitude:73072020548860.52
Gradient magnitude:22673220965110.367
Gradient magnitude:7060794582096.98
Gradient magnitude:2275682394265.519
Gradient magnitude:928984105638.4108
Gradient magnitude:656307425178.072
Gradient magnitude:610615351821.3131
Gradient magnitude:593078765306.848
Gradient magnitude:578705920128.3187
Gradient magnitude:564945676163.1849
Gradient magnitude:551538681424.5585
Gradient magnitude:538452422879.0524
Gradient magnitude:525676912708.20874
Gradient magnitude:513204543689.9785
Gradient magnitude:501028100319.072
Gradient magnitude:489140559100.83356
Gradient magnitude:477535065232.9935
Gradient magnitude:466204926754.44226
Gradient magnitude:455143610499.4314
Gradient magnitude:444344738312.006
Gradient magnitude:433802083366.4155
Gradient magnitude:423509566576.45337
Gradient magnitude:413461253090.10345
Gradient magnitude:403651348867.41516
Gradient magnitude:394074197339.5451
Gradient magnitude:384724276147.0776
Gradient magnitude:375596193955.7322

In [64]:
test_feature_matrix,test_output = get_numpy_data(test_data,model2_features,model2_output)

# Q3

In [65]:
predict_outcome(test_feature_matrix,model2_weights)

array([366651.41203656, 762662.39786164, 386312.09499712, ...,
       682087.39928241, 585579.27865729, 216559.20396617])

In [66]:
test_output[0]

310000.0

# Q5

In [61]:
275400047593155.94

275400047593155.94

In [60]:
regression_rss(test_feature_matrix,test_output,model2_weights)

270263446465244.06

In [55]:
test_model = turicreate.linear_regression.create(train_data,
    features=model2_features,target=model2_output,validation_set=None)

In [56]:
test_model.predict(test_data[0])

dtype: float
Rows: 1
[366541.862742807]