In [1]:
import pandas as pd

In [2]:
raw_df=pd.read_csv('insurance.csv')

In [3]:
raw_df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [4]:
inputs=raw_df[['age','sex','bmi','children','smoker','region']]
target=raw_df['charges']

In [5]:
numerical_cols=['age','bmi','children']

In [6]:
from sklearn.model_selection import train_test_split
inputs_train,input_test,target_train,target_test=train_test_split(inputs, target, test_size=0.25, random_state=42)

In [7]:
from sklearn.preprocessing import MinMaxScaler

In [8]:
min_max=MinMaxScaler()
min_max.fit(raw_df[numerical_cols])
inputs_train[numerical_cols] = min_max.transform(inputs_train[numerical_cols])
input_test[numerical_cols] = min_max.transform(input_test[numerical_cols])

In [9]:
dict1={'no':0,'yes':1}

In [10]:
dict2={'male':1,'female':0}

In [11]:
dict3={'southeast':1,'southwest':2,'northeast':3,'northwest':4}

In [12]:
inputs_train['smoker']=inputs_train.smoker.map(dict1)
input_test['smoker']=input_test.smoker.map(dict1)

In [13]:
inputs_train['sex']=inputs_train.sex.map(dict2)
input_test['sex']=input_test.sex.map(dict2)

In [14]:
inputs_train['region']=inputs_train.region.map(dict3)
input_test['region']=input_test.region.map(dict3)

In [15]:
input_test['region']=input_test['region']/4
inputs_train['region']=inputs_train['region']/4

In [16]:
input_test

Unnamed: 0,age,sex,bmi,children,smoker,region
764,0.586957,0,0.247915,0.4,0,0.75
887,0.391304,0,0.378262,0.0,0,1.00
890,1.000000,0,0.293920,0.0,1,1.00
1293,0.608696,1,0.263250,0.6,0,1.00
259,0.021739,1,0.429379,0.0,1,1.00
...,...,...,...,...,...,...
342,0.913043,0,0.311811,0.0,0,0.75
308,0.869565,1,0.508609,0.0,0,0.75
1128,0.347826,1,0.453054,0.2,0,0.50
503,0.021739,1,0.384450,0.0,1,0.25


In [17]:
inputs_train

Unnamed: 0,age,sex,bmi,children,smoker,region
693,0.130435,1,0.207022,0.0,0,1.00
1297,0.217391,0,0.283831,0.4,0,0.25
634,0.717391,1,0.638687,0.2,0,0.50
1022,0.630435,1,0.541297,0.2,1,0.25
178,0.608696,0,0.348130,0.4,0,0.50
...,...,...,...,...,...,...
1095,0.000000,0,0.414044,0.8,0,0.75
1130,0.456522,0,0.212806,1.0,0,0.25
1294,0.869565,1,0.247915,0.0,0,0.75
860,0.413043,0,0.851224,0.4,1,0.50


In [18]:
from sklearn.svm import SVR

In [19]:
model=SVR(kernel='linear',C=100.0, epsilon=0.9)

In [20]:
model.fit(inputs_train,target_train)

In [21]:
preds=model.predict(inputs_train)

In [22]:
preds

array([ 3000.31043986,  4694.32338439,  9864.02585949, ...,
       10912.19779671, 21091.13785804, 10169.2124235 ])

In [23]:
from sklearn.metrics import r2_score

In [47]:
import numpy as np
def rmse(targets, predictions):
    return np.sqrt(np.mean(np.square(targets - predictions)))

In [24]:
lossy=r2_score(target_train,preds)

In [25]:
lossy

0.528709783319241

In [26]:
predict=model.predict(input_test)

In [27]:
lossy1=r2_score(target_test,predict)

In [28]:
lossy1

0.5363759916459658

In [37]:
from sklearn.model_selection import GridSearchCV

In [38]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'epsilon': [0.001, 0.01, 0.1, 0.5, 1],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']  
}

In [39]:
svr = SVR()

In [41]:
grid_search = GridSearchCV(estimator=svr,param_grid=param_grid,cv=5, scoring='r2', verbose=2, n_jobs=-1)

In [42]:
grid_search.fit(inputs_train,target_train)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits


In [36]:
grid_search.best_params_

{'kernel': 'poly', 'gamma': 'scale', 'epsilon': 0.001, 'C': 1000}

In [44]:
best_model = grid_search.best_estimator_
pred_train=best_model.predict(inputs_train)
pred_test = best_model.predict(input_test)
train_r2_score=r2_score(target_train,pred_train)
test_r2_score = r2_score(target_test, pred_test)

In [46]:
train_r2_score

0.8289587665297401

In [45]:
 test_r2_score

0.8436409094795787

In [48]:
train_rmse=rmse(target_train,pred_train)
test_rmse=rmse(target_test, pred_test)

In [49]:
train_rmse

4981.61017377828

In [50]:
test_rmse

4857.283332460189