<a href="https://colab.research.google.com/github/visiont3lab/machine-learning-course/blob/main/regression/RegressionScikitLearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression Scikit-Learn

## Example without pipeline

In [15]:
#machine learning --> scikit-learn --> pycaret
import numpy as np
import plotly.graph_objects as go

X1  = np.linspace(0,20,1000)
Y = np.cos(X1)*np.exp(-0.2*X1)


fig = go.Figure()
fig.add_traces( go.Scatter( x=X1,y=Y, mode="markers" ))
fig.update_layout(
    hovermode="x"
)
fig.show()

In [16]:
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures

# Inizializzione modello
m = LinearRegression(fit_intercept=True)
#m = GradientBoostingRegressor()

# Preprocessing
poly = PolynomialFeatures(degree=9)
X1p = poly.fit_transform(X1.reshape(-1,1))

# Training ---> w0, w1, w2 , wn
m.fit(X1p,Y.reshape(-1,1)) # W
#print("Coefficient: ", m.coef_ ) # w1 w2 w3
#print("Termine noto: " , m.intercept_) # w0

# Prediction
Y_hat = m.predict(X1p) # Y = w0 + w1*X1

# Visualizzazione
fig = go.Figure()
fig.add_traces( go.Scatter( x=X1,y=Y, mode="markers" ))
fig.add_traces( go.Scatter( x=X1,y=Y_hat.flatten(), mode="markers" ))
fig.update_layout(
    hovermode="x"
)
fig.show()

In [17]:
# Run with new data
X1n = np.array([56,7,8,9,0,9])
X1np = poly.transform(X1n.reshape(-1,1))
Y_hat = m.predict(X1np)
Y_hat

array([[ 2.38421383e+07],
       [ 1.68644146e-01],
       [ 7.98583460e-03],
       [-1.10824093e-01],
       [ 8.25781960e-01],
       [-1.10824093e-01]])

## Example with Pipeline

In [18]:
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import numpy as np
import plotly.graph_objects as go
import pickle
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,  mean_squared_error

X1  = np.linspace(0,20,1000)
Y = np.cos(X1)*np.exp(-0.2*X1)

# Preparazione
X1p = X1.reshape(-1,1)
Yp = Y.reshape(-1,1)

# Divisione train e test
X_train, X_test, y_train, y_test = train_test_split(X1p, Yp, shuffle=True, test_size=0.2)

pipe = Pipeline([
                ("normalize", StandardScaler()),
                #("preprocess", PCA(n_components=0.99)),
                #("preprocess", PolynomialFeatures(degree=9)),
                ("model", LinearRegression(fit_intercept=True)),
                #("model",RandomForestRegressor())
])

# Training 
pipe.fit(X_train,y_train) 

# Prediction
Y_hat_test = pipe.predict(X_test) 
Y_hat_v = pipe.predict(X1p) 

# Score
mse = mean_squared_error(Y_hat_test, y_test) 
mae = mean_absolute_error(Y_hat_test, y_test) 
rmse = np.sqrt( mse )
print(f"MSE: %s , MAE: %s, RMSE: %s" % (mse,mae,rmse))

# err1 =np.sqrt( np.sum( np.abs(Y_hat_test - y_test) ) / len(y_test) )

# Save model pickle
pickle.dump(pipe, open("pipeline.pkl", 'wb'))

# Visualizzazione
fig = go.Figure()
fig.add_traces( go.Scatter( x=X1,y=Y.flatten(), mode="markers" ))
fig.add_traces( go.Scatter( x=X1,y=Y_hat_v.flatten(), mode="markers" ))
fig.update_layout(
    hovermode="x"
)
fig.show()

MSE: 0.06306193609335942 , MAE: 0.16109956530885672, RMSE: 0.2511213573023199


In [19]:
# Test
import pickle
import numpy as np
X1n = np.array([56,7,8,9,0,9])
pipe_load = pickle.load(open("pipeline.pkl", 'rb'))
# pipe_load["model"].coef_
Y_hatn = pipe_load.predict(X1n.reshape(-1,1))
Y_hatn

array([[-0.17166937],
       [ 0.01995485],
       [ 0.01604415],
       [ 0.01213345],
       [ 0.04732974],
       [ 0.01213345]])

In [23]:
pipe_load["model"].coef_

array([[-0.02247712]])

## Use in google Colab together with python

In [11]:
%load_ext rpy2.ipython
# https://towardsdatascience.com/how-to-use-r-in-google-colab-b6e02d736497

In [None]:
%%R
x <- seq(0, 2*pi, length.out=50)
x

In [20]:
# Assign R variable to pyhthon variable
k = %R x
k

array([0.        , 0.12822827, 0.25645654, 0.38468481, 0.51291309,
       0.64114136, 0.76936963, 0.8975979 , 1.02582617, 1.15405444,
       1.28228272, 1.41051099, 1.53873926, 1.66696753, 1.7951958 ,
       1.92342407, 2.05165235, 2.17988062, 2.30810889, 2.43633716,
       2.56456543, 2.6927937 , 2.82102197, 2.94925025, 3.07747852,
       3.20570679, 3.33393506, 3.46216333, 3.5903916 , 3.71861988,
       3.84684815, 3.97507642, 4.10330469, 4.23153296, 4.35976123,
       4.48798951, 4.61621778, 4.74444605, 4.87267432, 5.00090259,
       5.12913086, 5.25735913, 5.38558741, 5.51381568, 5.64204395,
       5.77027222, 5.89850049, 6.02672876, 6.15495704, 6.28318531])