In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

insurance_modif = pd.read_csv("insurance_modif.csv")
insurance_modif.head() #Look at the head of the data (just the first few rows)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,1,27.9,0,1,0,16884.924
1,18,0,33.77,1,0,1,1725.5523
2,28,0,33.0,3,0,1,4449.462
3,33,0,22.705,0,0,2,21984.47061
4,32,0,28.88,0,0,2,3866.8552


In [4]:
feature_cols = ['age', 'smoker'] 
feature_data_in_insurance = insurance_modif[feature_cols] 
label_data_in_insurance = insurance_modif['charges'] 

from sklearn.model_selection import train_test_split

# Dividing data for training and test with 80:20 ratio
train, test, train_labels, test_labels = train_test_split(feature_data_in_insurance,
                                         label_data_in_insurance,
                                         test_size=0.2,
                                         random_state=42)

# Linear Regession

In [5]:
from sklearn.linear_model import LinearRegression

# instantiate a new model
linreg_insurance_modif = LinearRegression() 

# fit the model to our data
model_linreg_insurance_modif = linreg_insurance_modif.fit(train, train_labels) 

# Trying to predict with trained machine
preds_linreg_insurance_modif = linreg_insurance_modif.predict(test)

# Print result
print("Prediction: \n", preds_linreg_insurance_modif)

Prediction: 
 [10033.0543404   7548.96991811 38965.83489094 10309.06372065
 26545.41277952  6996.9511576   2856.8104538  15277.23256522
  5340.89487608 11137.09186141 29581.51596231  8929.01681938
  5616.90425634 33997.66604637 37861.79736992 34273.67542662
 11137.09186141 34273.67542662 10309.06372065 33169.63790561
  4512.86673532  9481.03557989  2856.8104538   3684.83859456
 11413.10124167 12517.13876268 14173.1950442   5064.88549583
 10585.0731009   2580.80107354  8653.00743913 13345.16690344
  3408.82921431  5892.91363659  4788.87611557 10033.0543404
  3408.82921431  9481.03557989 35653.72232789 31237.57224383
  2856.8104538   3684.83859456 13069.15752319 13897.18566395
  6720.94177735 13897.18566395  3408.82921431  4512.86673532
 33169.63790561  6168.92301684 15001.22318496  2580.80107354
  8653.00743913  2580.80107354 13069.15752319 11413.10124167
  5892.91363659 29857.52534256 13621.1762837  12241.12938243
 15001.22318496  5892.91363659 14449.20442446  9205.02619964
 11965.1200

In [8]:
from sklearn import metrics
import numpy as np

# Count the MSE result from prediction with real label
print("Nilai MSE = ",metrics.mean_squared_error(test_labels, preds_linreg_insurance_modif))
print("Nilai MAE = ",metrics.mean_absolute_error(test_labels, preds_linreg_insurance_modif))
print("Nilai RMSE = ",np.sqrt(metrics.mean_squared_error(test_labels, preds_linreg_insurance_modif)))

Nilai MSE =  38274699.675041825
Nilai MAE =  3990.979515251796
Nilai RMSE =  6186.654966542244


# Decision Tree Regression

In [9]:
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# instantiate a new model
decision_tree_insurance_modif = DecisionTreeRegressor(max_depth=3)

# fit the model to our data
model_decision_tree_insurance_modif = decision_tree_insurance_modif.fit(train, train_labels) 

# Trying to predict with trained machine
preds_decision_tree_insurance_modif = decision_tree_insurance_modif.predict(test)

# Print result
print("Prediction: \n", preds_decision_tree_insurance_modif)

Prediction: 
 [10406.77138663  6350.82542396 41548.3632756  10406.77138663
 25740.60476472  6350.82542396  3364.76173199 13752.28669534
  6350.82542396 10406.77138663 29302.57654471  6350.82542396
  6350.82542396 35925.50978255 41548.3632756  35925.50978255
 10406.77138663 35925.50978255 10406.77138663 29302.57654471
  6350.82542396 10406.77138663  3364.76173199  3364.76173199
 10406.77138663 13752.28669534 13752.28669534  6350.82542396
 10406.77138663  3364.76173199  6350.82542396 13752.28669534
  3364.76173199  6350.82542396  6350.82542396 10406.77138663
  3364.76173199 10406.77138663 35925.50978255 29302.57654471
  3364.76173199  3364.76173199 13752.28669534 13752.28669534
  6350.82542396 13752.28669534  3364.76173199  6350.82542396
 29302.57654471  6350.82542396 13752.28669534  3364.76173199
  6350.82542396  3364.76173199 13752.28669534 10406.77138663
  6350.82542396 29302.57654471 13752.28669534 13752.28669534
 13752.28669534  6350.82542396 13752.28669534  6350.82542396
 13752.286

In [10]:
# Count the MSE result from prediction with real label
print("Nilai MSE = ",metrics.mean_squared_error(test_labels, preds_decision_tree_insurance_modif))
print("Nilai MAE = ",metrics.mean_absolute_error(test_labels, preds_decision_tree_insurance_modif))
print("Nilai RMSE = ",np.sqrt(metrics.mean_squared_error(test_labels, preds_decision_tree_insurance_modif)))

Nilai MSE =  39481103.90452745
Nilai MAE =  4106.097544656881
Nilai RMSE =  6283.399072518588
