In [1]:
# importing pandas file to read csv data
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
# reading csv file dataset with th help of pandas
df = pd.read_csv('insurance.csv')

In [3]:
# calling first five rows of columns
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [4]:
# viewing the dataset shape
df.shape

(1338, 7)

In [5]:
# checking for null values in dataset
df.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [6]:
# Performing categorical encoding i.e. converting the categorical datat into numerical format
# using label encoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])

In [7]:
# viewing the dataset after performing categorical encoding on dataset
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [8]:
# checking columns in datasets
df.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

In [9]:
# separating the dependent variable(x) and independent variable(y)
x = df.iloc[:,0:5].values
y = df.iloc[:,6].values

In [10]:
# calling the dependent variable
x

array([[19.  ,  0.  , 27.9 ,  0.  ,  1.  ],
       [18.  ,  1.  , 33.77,  1.  ,  0.  ],
       [28.  ,  1.  , 33.  ,  3.  ,  0.  ],
       ...,
       [18.  ,  0.  , 36.85,  0.  ,  0.  ],
       [21.  ,  0.  , 25.8 ,  0.  ,  0.  ],
       [61.  ,  0.  , 29.07,  0.  ,  1.  ]])

In [11]:
# calling the independent variable
y

array([16884.924 ,  1725.5523,  4449.462 , ...,  1629.8335,  2007.945 ,
       29141.3603])

In [12]:
# splitting the dataset into training and testing dataset with 80:20 ratio
# using train_test_split for splitting the dataset
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [13]:
# length of training and testing dataset
len(x_train), len(x_test)

(1070, 268)

In [14]:
# using RadomForestRegressor(Regression algorithm) for building our model
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()

In [15]:
# training our model with training data
model.fit(x_train,y_train)

In [16]:
# make prediction on test datasets
pred = model.predict(x_test)

In [17]:
# evaluating our model
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, pred)
print('Mean Absolute Error:', mae)

Mean Absolute Error: 2494.6554412731657


In [18]:
model.score(x_test, y_test)

0.8566993477141082

In [19]:
model.predict([[22,1,25.7,0,0]])

array([2011.558928])

In [20]:
# taking input and predicting output with the help of our trained model

# taking input from user
age = int(input('Enter your age:'))
sex = input('Enter your gender:')
if sex == 'Male' or sex== 'male':
    sex = 1
else:
    sex = 0
bmi = float(input('Enter your bmi index:'))
children = int(input('Number of children you have:'))
smoker = input('Do you smoke:')
if smoker == 'Yes' or smoker == 'yes':
    smoker = 1
else:
    smoker = 0
    
# making prediction based on input
prediction = model.predict([[age,sex,bmi,children,smoker]])

# printing output of our model
print('Your insurance amount will be approx Rs.'+str(round(prediction[0])))

Enter your age: 25
Enter your gender: 1
Enter your bmi index: 40000
Number of children you have: 3
Do you smoke: 1


Your insurance amount will be approx Rs.4359


In [21]:
# Using gradio library for creating interface and deploying our model
import gradio as gr

# creating function to take input
def predict_insurance_price(age,gender,bmi,children,smoker):
    gender=1 if gender=='Male' else 0
    smoker=1 if smoker=='Yes' else 0
    prediction=model.predict([[age,gender,bmi,children,smoker]])
    return 'Rs '+str(round(prediction[0]))

# creating and launching the interface for our model
import gradio as gr
age_input=gr.Textbox(label='Age')
gender_input=gr.Dropdown(choices=['Male','Female'],label='Gender')
bmi_input=gr.Textbox(label='BMI')
children_input=gr.Textbox(label='Children')
smoker_input=gr.Dropdown(choices=['Yes','No'],label='Smoke')
output=gr.Textbox(label='Predicted Insurance Price')

interface=gr.Interface(
    fn=predict_insurance_price, 
    inputs=[age_input,gender_input,bmi_input,children_input,smoker_input],
    outputs=output,
    title='Insurance Price Predictor')

# share=True is for public host or else you can launch interface without any argument for local host
interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://f31a8025112279e7e0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


