# Linear regression

In [1]:
using Flux
using Flux: Params
using DelimitedFiles: readdlm
using Statistics: mean, std

## Download data

In [2]:
isfile("housing.data") ||
    download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data", "housing.data")

true

## Read data

In [3]:
rawdata = readdlm("housing.data")'

14×505 LinearAlgebra.Adjoint{Float64,Array{Float64,2}}:
   0.02731    0.02729    0.03237  …    0.06076    0.10959    0.04741
   0.0        0.0        0.0           0.0        0.0        0.0    
   7.07       7.07       2.18         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.469      0.469      0.458         0.573      0.573      0.573  
   6.421      7.185      6.998    …    6.976      6.794      6.03   
  78.9       61.1       45.8          91.0       89.3       80.8    
   4.9671     4.9671     6.0622        2.1675     2.3889     2.505  
   2.0        2.0        3.0           1.0        1.0        1.0    
 242.0      242.0      222.0         273.0      273.0      273.0    
  17.8       17.8       18.7      …   21.0       21.0       21.0    
 396.9      392.83     394.63        396.9      393.45     396.9    
   9.14       4.03       2.94          5.64       6.48       7.88   
  21.6       34.7       33.4          23.9     

## The last feature (the price of the house) is our target

In [4]:
x = rawdata[1:13,:]
y = rawdata[14:14,:]

1×505 Array{Float64,2}:
 21.6  34.7  33.4  36.2  28.7  22.9  …  16.8  22.4  20.6  23.9  22.0  11.9

## Normalization

$\LARGE z = \frac{x - \mu}{\sigma}$

In [5]:
x = (x .- mean(x; dims=2)) ./ std(x; dims=2)

13×505 Array{Float64,2}:
 -0.417416  -0.417418  -0.416828  …  -0.41353   -0.407858  -0.415081
 -0.486234  -0.486234  -0.486234     -0.486234  -0.486234  -0.486234
 -0.595732  -0.595732  -1.30899       0.11315    0.11315    0.11315 
 -0.272618  -0.272618  -0.272618     -0.272618  -0.272618  -0.272618
 -0.739098  -0.739098  -0.833934      0.15753    0.15753    0.15753 
  0.194741   1.28121    1.01528   …   0.983996   0.725177  -0.361293
  0.366208  -0.265527  -0.808535      0.795646   0.735312   0.433641
  0.556346   0.556346   1.0759       -0.771891  -0.66685   -0.611768
 -0.868939  -0.868939  -0.754097     -0.983782  -0.983782  -0.983782
 -0.987128  -0.987128  -1.10573      -0.803294  -0.803294  -0.803294
 -0.306024  -0.306024   0.110158  …   1.17373    1.17373    1.17373 
  0.441136   0.396591   0.416291      0.441136   0.403377   0.441136
 -0.494157  -1.20985   -1.36251      -0.984357  -0.866709  -0.670629

## Model

In [6]:
predict(x) = W*x .+ b

predict (generic function with 1 method)

In [7]:
W = randn(1,13)/10
b = zeros(1)

1-element Array{Float64,1}:
 0.0

## *Using CUDA*

## Loss function

$\LARGE MSE = \frac{1}{n} \sum (y - \hat{y})^2$

In [8]:
error(ŷ, y) = sum((y .- ŷ).^2)/size(y, 2)

error (generic function with 1 method)

In [9]:
loss(x, y) = error(predict(x), y)

loss (generic function with 1 method)

## Training

In [10]:
η = 0.1
θ = Params([W, b])

Params([[-0.039133249866796886 0.012734918741883905 … -0.05713428179488063 0.003810605069356343], [0.0]])

In [11]:
function gradient_descent(θ, η, epochs)
    for i = 1:epochs
        g = gradient(() -> loss(x, y), θ)

        for t in θ
            Flux.Optimise.update!(t, -g[t]*η)
        end
        @show loss(x, y)
    end
end

gradient_descent (generic function with 1 method)

In [12]:
gradient_descent(θ, η, 20)

loss(x, y) = 368.56687884438264
loss(x, y) = 242.83971652460696
loss(x, y) = 163.60913928441454
loss(x, y) = 113.1203960261956
loss(x, y) = 80.87832588801336
loss(x, y) = 60.257473564140895
loss(x, y) = 47.04745096819794
loss(x, y) = 38.56824915774533
loss(x, y) = 33.112100222352545
loss(x, y) = 29.589732492216456
loss(x, y) = 27.305769555671095
loss(x, y) = 25.815932142994292
loss(x, y) = 24.83614424081683
loss(x, y) = 24.18461950807681
loss(x, y) = 23.74492913479998
loss(x, y) = 23.442431711230686
loss(x, y) = 23.229220423860788
loss(x, y) = 23.074503754181624
loss(x, y) = 22.958454829645277
loss(x, y) = 22.868276486144218


## Prediction

In [13]:
predict(x[:, 1]) / y[1]

1-element Array{Float64,1}:
 1.1493213895439822