## **Library**

In [None]:
import time #menghitung waktu
import pickle #tipe data menyimpan model

import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier #metode belajar
from sklearn.model_selection import train_test_split #proses belajar
from sklearn.metrics import classification_report #hasil belajar

## **Load Data**

In [None]:
data = pd.read_csv('Prediction Insurance.csv')
data.head(1)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1


In [None]:
data.head(3)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1
1,2,Male,76,1,3,0,1-2 Year,No,33536,26,183,0
2,3,Male,47,1,28,0,> 2 Years,Yes,38294,26,27,1


In [None]:
data.groupby(['Previously_Insured','Response']).agg({'id':'count'}).unstack()

Unnamed: 0_level_0,id,id
Response,0,1
Previously_Insured,Unnamed: 1_level_2,Unnamed: 2_level_2
0,159929,46552
1,174470,158


In [None]:
data.groupby(['Gender','Response']).agg({'id':'count'}).unstack()

Unnamed: 0_level_0,id,id
Response,0,1
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2
Female,156835,18185
Male,177564,28525


In [None]:
data.shape

(381109, 12)

In [None]:
data['Region_Code'].unique()

array([28,  3, 11, 41, 33,  6, 35, 50, 15, 45,  8, 36, 30, 26, 16, 47, 48,
       19, 39, 23, 37,  5, 17,  2,  7, 29, 46, 27, 25, 13, 18, 20, 49, 22,
       44,  0,  9, 31, 12, 34, 21, 10, 14, 38, 24, 40, 43, 32,  4, 51, 42,
        1, 52])

In [None]:
data.head(5)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1
1,2,Male,76,1,3,0,1-2 Year,No,33536,26,183,0
2,3,Male,47,1,28,0,> 2 Years,Yes,38294,26,27,1
3,4,Male,21,1,11,1,< 1 Year,No,28619,152,203,0
4,5,Female,29,1,41,1,< 1 Year,No,27496,152,39,0


In [None]:
pd.get_dummies(data['Vehicle_Age']).head(5)

Unnamed: 0,1-2 Year,< 1 Year,> 2 Years
0,False,False,True
1,True,False,False
2,False,False,True
3,False,True,False
4,False,True,False


In [None]:
df_Vehicle_Age = pd.get_dummies(data['Vehicle_Age'])
df_Vehicle_Age

Unnamed: 0,1-2 Year,< 1 Year,> 2 Years
0,False,False,True
1,True,False,False
2,False,False,True
3,False,True,False
4,False,True,False
...,...,...,...
381104,True,False,False
381105,False,True,False
381106,False,True,False
381107,False,False,True


In [None]:
data.head(1)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1


In [None]:
#data preprocessing
data = data.drop('id', axis=1)

data['Gender'] = data['Gender'].map({'Male':1, 'Female':0})
df_Vehicle_Age = pd.get_dummies(data['Vehicle_Age'])

df = data[['Gender','Age','Previously_Insured','Response']].merge(df_Vehicle_Age, left_index=True, right_index=True)
df.head(1)

Unnamed: 0,Gender,Age,Previously_Insured,Response,1-2 Year,< 1 Year,> 2 Years
0,1,44,0,1,False,False,True


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(df[['Age']])
df_age = scaler.transform(df[['Age']])

In [None]:
df = df.drop('Age', axis=1)
df = df.merge(pd.DataFrame(df_age, columns=['Age']), left_index=True, right_index=True)

df = df.replace({False:0, True:1})
df.head(5)

Unnamed: 0,Gender,Previously_Insured,Response,1-2 Year,< 1 Year,> 2 Years,Age
0,1,0,1,0,0,1,0.333777
1,1,0,0,1,0,0,2.396751
2,1,0,1,0,0,1,0.527181
3,1,1,0,0,1,0,-1.148985
4,0,1,0,0,1,0,-0.633242


In [None]:
#data modelling
x = df.drop('Response', axis=1) #inputan
y = df['Response'] #outputan

#proses belajar dan cara belajar
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)
start = time.time()
model = KNeighborsClassifier(n_neighbors=5)

#hasil belajar
model.fit(x_train, y_train)
stop = time.time()
print(f"Training Time {stop-start} Sekon.....")

Training Time 0.7816779613494873 Sekon.....


In [None]:
#model export
with open('modelTRIX.pkl','wb') as file:
    pickle.dump(model, file)

In [None]:
#model evaluation
y_predict = model.predict(x_test)
print(classification_report(y_predict, y_test))

              precision    recall  f1-score   support

           0       0.94      0.89      0.91     70199
           1       0.19      0.29      0.23      6023

    accuracy                           0.84     76222
   macro avg       0.56      0.59      0.57     76222
weighted avg       0.88      0.84      0.86     76222

