# Ploynomial regression

In [1]:
using MLJ, RDatasets

### Load data

In [2]:
auto = dataset("ISLR", "Auto")

Unnamed: 0_level_0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Year,Origin
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0
2,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0
3,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0
4,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0
5,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0
6,15.0,8.0,429.0,198.0,4341.0,10.0,70.0,1.0
7,14.0,8.0,454.0,220.0,4354.0,9.0,70.0,1.0
8,14.0,8.0,440.0,215.0,4312.0,8.5,70.0,1.0
9,14.0,8.0,455.0,225.0,4425.0,10.0,70.0,1.0
10,15.0,8.0,390.0,190.0,3850.0,8.5,70.0,1.0


### Split feature/label

In [3]:
y, X = unpack(auto, ==(:MPG), col->true)

([18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0  …  26.0, 22.0, 32.0, 36.0, 27.0, 27.0, 44.0, 32.0, 28.0, 31.0], [1m392×8 DataFrame[0m
[1m Row [0m│[1m Cylinders [0m[1m Displacement [0m[1m Horsepower [0m[1m Weight  [0m[1m Acceleration [0m[1m Year    [0m[1m Or[0m ⋯
[1m     [0m│[90m Float64   [0m[90m Float64      [0m[90m Float64    [0m[90m Float64 [0m[90m Float64      [0m[90m Float64 [0m[90m Fl[0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │       8.0         307.0       130.0   3504.0          12.0     70.0     ⋯
   2 │       8.0         350.0       165.0   3693.0          11.5     70.0
   3 │       8.0         318.0       150.0   3436.0          11.0     70.0
   4 │       8.0         304.0       150.0   3433.0          12.0     70.0
   5 │       8.0         302.0       140.0   3449.0          10.5     70.0     ⋯
   6 │       8.0         429.0       198.0   4341.0          10.0     70.0
   7 │    

### Training/testing set

In [4]:
train, test = partition(eachindex(y), 0.5, shuffle=true, rng=444);

### Polynomial expansion

In [5]:
hp = X.Horsepower
X_poly = DataFrame(hp1=hp, hp2=hp.^2, hp3=hp.^3);

### Model

In [6]:
LinearRegressor = @load LinearRegressor pkg=MLJLinearModels

import MLJLinearModels ✔


┌ Info: For silent loading, specify `verbosity=0`. 
└ @ Main /home/yuehhua/.julia/packages/MLJModels/lDzCR/src/loading.jl:168


MLJLinearModels.LinearRegressor

In [7]:
PolynomialReg = @pipeline FeatureSelector(features=[:hp1, :hp2, :hp3]) LinearRegressor();

In [8]:
polyreg = machine(PolynomialReg, X_poly, y)

Machine trained 0 times; caches data
  model: Pipeline293(feature_selector = FeatureSelector(features = [:hp1, :hp2, :hp3], …), …)
  args: 
    1:	Source @402 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @593 ⏎ `AbstractVector{Continuous}`


### Training

In [9]:
fit!(polyreg, rows=train)

┌ Info: Training machine(Pipeline293(feature_selector = FeatureSelector(features = [:hp1, :hp2, :hp3], …), …), …).
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/rQDaq/src/machines.jl:487
┌ Info: Training machine(FeatureSelector(features = [:hp1, :hp2, :hp3], …), …).
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/rQDaq/src/machines.jl:487
┌ Info: Training machine(LinearRegressor(fit_intercept = true, …), …).
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/rQDaq/src/machines.jl:487
┌ Info: Solver: MLJLinearModels.Analytical
│   iterative: Bool false
│   max_inner: Int64 200
└ @ MLJLinearModels /home/yuehhua/.julia/packages/MLJLinearModels/2qDvV/src/mlj/interface.jl:39


Machine trained 1 time; caches data
  model: Pipeline293(feature_selector = FeatureSelector(features = [:hp1, :hp2, :hp3], …), …)
  args: 
    1:	Source @402 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @593 ⏎ `AbstractVector{Continuous}`


### Predict

In [10]:
ŷ = predict(polyreg, rows=test)

196-element Vector{Float64}:
 18.446766076547128
 20.19517863147687
 21.183927100867344
 25.159114376427162
 18.446766076547128
 21.183927100867344
 22.252568009919713
 14.843530650146377
 23.40374959241761
 17.11985983943672
 15.792127956150892
 24.640120082144648
 19.821455922458483
  ⋮
 27.08871617210813
 19.28367436796468
 20.19517863147687
 30.16194443511018
 21.601648070636983
 13.258395305679187
 25.692332100233877
 13.597850195179532
 30.16194443511018
 15.792127956150892
 28.886847332536952
 28.886847332536952

### Evaluation

In [11]:
rms(ŷ, y[test])

4.402480169024947