# Ploynomial regression

## Using MLJ

In [1]:
using MLJ, RDatasets

### Load data

In [2]:
auto = dataset("ISLR", "Auto")

Unnamed: 0_level_0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Year,Origin
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0
2,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0
3,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0
4,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0
5,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0
6,15.0,8.0,429.0,198.0,4341.0,10.0,70.0,1.0
7,14.0,8.0,454.0,220.0,4354.0,9.0,70.0,1.0
8,14.0,8.0,440.0,215.0,4312.0,8.5,70.0,1.0
9,14.0,8.0,455.0,225.0,4425.0,10.0,70.0,1.0
10,15.0,8.0,390.0,190.0,3850.0,8.5,70.0,1.0


### Split feature/label

In [3]:
y, X = unpack(auto, ==(:MPG), col->true)

([18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0  …  26.0, 22.0, 32.0, 36.0, 27.0, 27.0, 44.0, 32.0, 28.0, 31.0], 392×8 DataFrame. Omitted printing of 3 columns
│ Row │ Cylinders │ Displacement │ Horsepower │ Weight  │ Acceleration │
│     │ [90mFloat64[39m   │ [90mFloat64[39m      │ [90mFloat64[39m    │ [90mFloat64[39m │ [90mFloat64[39m      │
├─────┼───────────┼──────────────┼────────────┼─────────┼──────────────┤
│ 1   │ 8.0       │ 307.0        │ 130.0      │ 3504.0  │ 12.0         │
│ 2   │ 8.0       │ 350.0        │ 165.0      │ 3693.0  │ 11.5         │
│ 3   │ 8.0       │ 318.0        │ 150.0      │ 3436.0  │ 11.0         │
│ 4   │ 8.0       │ 304.0        │ 150.0      │ 3433.0  │ 12.0         │
│ 5   │ 8.0       │ 302.0        │ 140.0      │ 3449.0  │ 10.5         │
│ 6   │ 8.0       │ 429.0        │ 198.0      │ 4341.0  │ 10.0         │
│ 7   │ 8.0       │ 454.0        │ 220.0      │ 4354.0  │ 9.0          │
│ 8   │ 8.0       │ 440.0        │ 215.0      │

### Training/testing set

In [4]:
train, test = partition(eachindex(y), 0.5, shuffle=true, rng=444);

### Polynomial expansion

In [5]:
hp = X.Horsepower
X_poly = DataFrame(hp1=hp, hp2=hp.^2, hp3=hp.^3);

### Model

In [6]:
@load LinearRegressor pkg=MLJLinearModels

LinearRegressor(
    fit_intercept = true,
    solver = nothing)[34m @ 2…84[39m

In [7]:
@pipeline PolynomialReg(fs = FeatureSelector(features=[:hp1, :hp2, :hp3]), lr = LinearRegressor());

In [8]:
polyreg = PolynomialReg()
match = machine(polyreg, X_poly, y)

[34mMachine{PolynomialReg} @ 2…08[39m


### Training

In [9]:
fit!(match, rows=train)

┌ Info: Training [34mMachine{PolynomialReg} @ 2…08[39m.
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/O5b6j/src/machines.jl:187
┌ Info: Training [34mNodalMachine{FeatureSelector} @ 7…28[39m.
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/O5b6j/src/machines.jl:187
┌ Info: Training [34mNodalMachine{LinearRegressor} @ 8…93[39m.
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/O5b6j/src/machines.jl:187


[34mMachine{PolynomialReg} @ 2…08[39m


### Predict

In [10]:
ŷ = predict(match, rows=test)

196-element Array{Float64,1}:
 18.446766076547114
 20.195178631476864
 21.183927100867336
 25.159114376427155
 18.446766076547114
 21.183927100867336
 22.252568009919692
 14.843530650146391
 23.40374959241759
 17.119859839436714
 15.792127956150885
 24.64012008214464
 19.821455922458476
  ⋮
 27.088716172108114
 19.28367436796467
 20.195178631476864
 30.161944435110165
 21.601648070636962
 13.25839530567923
 25.692332100233855
 13.597850195179568
 30.161944435110165
 15.792127956150885
 28.886847332536938
 28.886847332536938

### Evaluation

In [11]:
rms(ŷ, y[test])

4.402480169024946