# Section 15: Linear Regression

# Machine Learning with Python and Scikit-Learn

In [32]:
# - We are going to be using the Sckikit-Learn Package -> Already has many algorithms built in 
# - Install with conda install scikit-learn

# Every algorithm in scikit-learn is exposed through an estimator 

# First we import the model with the following general form: 

# from sklearn.family import Model

##e.g. 

In [33]:
from sklearn.linear_model import LinearRegression ## Linear Regression -> Estimator object
                                                  ## linear_model -> Family of models 
    
## Estimator parameters 
## -All the parameters of an estimator can be set when it is instantiated, and have suitable default values. 
## -Values can be explored with the shortcut shift+tab in jupyter 

## Example 

model = LinearRegression(normalize = True) ##normalize is a parameter than can be specified 

## Lets check the model
print(model)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)


In [38]:
## Once we have a model created with some parameters, its time to fit our model on some data! 
## Remember -> It is recommended to split this data into a test and training set 

##Example with some fake data:
import numpy as np
from sklearn.model_selection import train_test_split

X,y = np.arange(10).reshape((5,2)), range(5) ##X is our data and y is our label vector for each row of features 
print(X)
print(y)

##Lets split this data into a train and test sets: 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
range(0, 5)


In [39]:
X_train  ##Features for training data

array([[4, 5],
       [6, 7],
       [0, 1]])

In [40]:
X_test  ##Features for test data

array([[8, 9],
       [2, 3]])

In [41]:
y_train ##Labels for training data

[2, 3, 0]

In [42]:
y_test ##Labels for test data 

[4, 1]

In [None]:
### Now that we have split the data, we can train/fit our model on the training data. 

### This is done through the model.fit() method 

In [43]:
model.fit(X_train, y_train) ##Modelhas been fit and trained on training data

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)

In [44]:
## Model is ready to predict labels and values (Supervised learning process)
## To do so we use the predict method and the test data:

predictions = model.predict(X_test)

In [45]:
predictions

array([4., 1.])

In [None]:
## Now we can evaluate out model by comparing the predictions to the correct values. 
 
## The evaluation method depends on what sort of machines learning algorithm we are using 
#  (e.g. Regression, Classification, Clustering, etc...)