## Linear Regression

Import necessary libraries

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

Import dataset

In [None]:
insurance = pd.read_csv('insurance.csv')

Showing the contents of our data

In [None]:
insurance.head() #shows the first 5 rows of our dataframe

Dividing the dataset into independent and dependent variables

In [None]:
X_insurance = insurance.iloc[:, :-1].values
y_insurance = insurance.iloc[:, -1].values

In [None]:
#Showing the values of the independent (X_insurance) variable
X_insurance

In [None]:
#Showing the values of the dependent (y_insurance) variable
y_insurance

Splitting the dataset into train and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_insurance, y_insurance, random_state = 0)

Training the algorithm

In [None]:
linreg = LinearRegression().fit(X_train, y_train)

In [None]:
#Retrieving the intercept and regression slope

print('linear model intercept: {}'
     .format(linreg.intercept_))
print('linear model coeff:\n{}'
     .format(linreg.coef_))

Testing the algorithm

In [None]:
#Comparing predicted results from the model to the actual results in the test set

y_pred = linreg.predict(X_test)
compare = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

compare.head()

In [None]:
#Predicting the amount to be paid given a certain number of claims
#Ex: We want to know how much should be paid if there are 56 claims

linreg.predict(56)

Evaluating our model by getting the coefficient of determination (R-squared)

In [None]:
print('R-squared score (training): {:.3f}'     #shows the result up to 3 decimals
     .format(linreg.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'         #shows the result up to 3 decimals
     .format(linreg.score(X_test, y_test)))

### Scatter Plot with Regression Line

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.figure()
plt.scatter(X_insurance, y_insurance, marker='o', s=50, alpha=0.8) # s: size of the marker ; alpha: marker transparency
plt.plot(X_insurance, linreg.coef_ * X_insurance + linreg.intercept_, 'r-') # r-: red line
plt.title('Auto Insurance in Sweden')
plt.xlabel('No. of claims')
plt.ylabel('Total Payment')
plt.show()