In [50]:
import pandas as pd

In [51]:
dataset = pd.read_csv('insurance_pre.csv')

In [52]:
dataset.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [53]:
dataset = pd.get_dummies(dataset,drop_first = True)

In [54]:
dataset.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,0,1
1,18,33.77,1,1725.5523,1,0
2,28,33.0,3,4449.462,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.88,0,3866.8552,1,0


In [55]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [56]:
independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dependent = dataset['charges']

In [57]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(independent,dependent,test_size = 0.30,random_state = 0)

In [58]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [62]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
param_grid = {'kernel':['rbf','poly','sigmoid','linear'],
'C':[10,100,1000,2000,3000],'gamma':['auto','scale']}

In [63]:
grid = GridSearchCV(SVR(),param_grid,refit = True,verbose=3,n_jobs=-1)

In [64]:
grid.fit(x_train,y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END ......C=10, gamma=auto, kernel=rbf;, score=0.004 total time=   0.1s
[CV 2/5] END ......C=10, gamma=auto, kernel=rbf;, score=0.013 total time=   0.1s
[CV 3/5] END .....C=10, gamma=auto, kernel=rbf;, score=-0.104 total time=   0.1s
[CV 4/5] END .....C=10, gamma=auto, kernel=rbf;, score=-0.095 total time=   0.1s
[CV 5/5] END .....C=10, gamma=auto, kernel=rbf;, score=-0.102 total time=   0.2s
[CV 1/5] END .....C=10, gamma=auto, kernel=poly;, score=0.056 total time=   0.1s
[CV 2/5] END .....C=10, gamma=auto, kernel=poly;, score=0.070 total time=   0.1s
[CV 3/5] END ....C=10, gamma=auto, kernel=poly;, score=-0.046 total time=   0.1s
[CV 4/5] END ....C=10, gamma=auto, kernel=poly;, score=-0.025 total time=   0.1s
[CV 5/5] END ....C=10, gamma=auto, kernel=poly;, score=-0.050 total time=   0.1s
[CV 1/5] END ..C=10, gamma=auto, kernel=sigmoid;, score=0.050 total time=   0.1s
[CV 2/5] END ..C=10, gamma=auto, kernel=sigmoid

In [65]:
re = grid.cv_results_
print("R Score for best parameter {}".format(grid.best_params_))

R Score for best parameter {'C': 3000, 'gamma': 'scale', 'kernel': 'poly'}


In [66]:
table = pd.DataFrame.from_dict(re)

In [67]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.103,0.041603,0.0232,0.012156,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.004055,0.013366,-0.103821,-0.095119,-0.101604,-0.056625,0.053504,35
1,0.0526,0.013291,0.0096,0.0052,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.056274,0.069532,-0.045601,-0.025079,-0.049592,0.001107,0.051309,32
2,0.0854,0.016268,0.0172,0.002315,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",0.049905,0.075905,-0.046585,-0.041004,-0.046507,-0.001657,0.053391,34
3,0.066,0.025807,0.0136,0.007684,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.377969,0.479601,0.317872,0.337979,0.324422,0.367569,0.059777,25
4,0.0826,0.029709,0.0156,0.003262,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.004126,0.013244,-0.103775,-0.095165,-0.101602,-0.056634,0.053486,36
5,0.068,0.033094,0.0086,0.0008,10,scale,poly,"{'C': 10, 'gamma': 'scale', 'kernel': 'poly'}",0.054964,0.071297,-0.046513,-0.024157,-0.049652,0.001188,0.051594,31
6,0.0866,0.011377,0.0202,0.008085,10,scale,sigmoid,"{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}",0.049644,0.076323,-0.046798,-0.040824,-0.046521,-0.001635,0.053474,33
7,0.0634,0.008686,0.0086,0.001743,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.377969,0.479601,0.317872,0.337979,0.324422,0.367569,0.059777,25
8,0.091,0.041323,0.0232,0.016437,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",0.300573,0.339474,0.173708,0.217991,0.183375,0.243024,0.065733,29
9,0.0842,0.034225,0.014,0.007376,100,auto,poly,"{'C': 100, 'gamma': 'auto', 'kernel': 'poly'}",0.540643,0.575051,0.474839,0.53573,0.42464,0.510181,0.053582,21


In [69]:
age_input= input('enter age')
age_input=int() 
bmi_input = input('enter bmi') 
bmi_input = float() 
children_input=input("Children:") 
children_input = int() 
sex_male_input=input("Sex Male 0 or 1:") 
sex_male_input = int() 
smoker_yes_input=input("Smoker Yes 0 or 1:") 
smoker_yes_input = int()

Children: 2
enter age 40
enter bmi 50.02
Sex Male 0 or 1: 1
Smoker Yes 0 or 1: 1


In [71]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])

print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[8223.81872033]
