# Health Insurance Class

In [80]:
import pickle
import numpy as np
import pandas as pd

class HealthInsurance():
    def __init__(self):

        #local API test needs abs home_path
        self.home_path='/home/rafael/Repos/cross_sell_prediction/health_insurance_app/'
        self.annual_premium_scaler            = pickle.load( open( self.home_path + 'parameter/annual_premium_scaler.pkl', 'rb'))
        self.age_scaler                       = pickle.load( open( self.home_path + 'parameter/age_scaler.pkl', 'rb'))
        self.vintage_scaler                   = pickle.load( open( self.home_path + 'parameter/vintage_scaler.pkl', 'rb'))
        self.gender_target_encoder            = pickle.load( open( self.home_path + 'parameter/gender_target_encoder.pkl', 'rb'))
        self.region_code_target_encoder       = pickle.load( open( self.home_path + 'parameter/region_code_target_encoder.pkl', 'rb'))
        self.policy_sales_freq_encoder        = pickle.load( open( self.home_path + 'parameter/policy_sales_freq_encoder.pkl', 'rb'))


    def feature_engineering (self, df2):
      
        # vehicle age
        dict_vehicle_age = {'> 2 Years':'over_2_years', '1-2 Year':'between_1_2_year', '< 1 Year':'below_1_year'}
        df2['vehicle_age'] = df2['vehicle_age'].map(dict_vehicle_age)

        # vehicle damage
        dict_vehicle_damage = {'Yes': 1, 'No': 0}
        df2['vehicle_damage'] = df2['vehicle_damage'].map(dict_vehicle_damage)

        return df2


    def data_preparation (self, df3):

        # transformations
        df3['annual_premium'] = self.annual_premium_scaler.transform( df3[['annual_premium']].values )
        df3['age'] = self.age_scaler.transform( df3[['age']].values )
        df3['vintage'] = self.vintage_scaler.transform( df3[['vintage']].values )
        #df3.loc[:,'gender'] = df3['gender'].map(self.gender_target_encoder) #was not selected
        df3.loc[:,'region_code'] = df3['region_code'].map(self.region_code_target_encoder)
        df3.loc[:,'policy_sales_channel'] = df3['policy_sales_channel'].map(self.policy_sales_freq_encoder)
        #vars 'vehicle_damage' and 'vehicle_prev_insured' didn't have trasnformations.
        #df3.loc[:,'policy_sales_channel'] = df3.loc[:,'policy_sales_channel'].fillna(0)
        #df3 = df3.dropna()
        #df3 = df3.reset_index(drop=True)

        # feature Selection
        cols_selected = ['vintage','annual_premium','age', 'region_code','vehicle_damage', 
                 'policy_sales_channel', 'previously_insured']
        #cols 'id', 'gender', 'driving_license' and 'vehicle_age' were features not selected.

        return df3[cols_selected]


    def get_prediction( self, model, original_data, test_data ):

        #model prediction
        pred = model.predict_proba( test_data )

        #join prediction into original data and sort
        original_data['score'] = pred[:, 1].tolist()
        original_data = original_data.sort_values('score', ascending=False)

        return original_data.to_json( orient= 'records', date_format = 'iso' )



# API Handler local

In [None]:
import pickle
import os
import pandas as pd
from flask import Flask, request, Response
from healthinsurance.HealthInsurance import HealthInsurance

# load model in memory
model = pickle.load(open('model/Health_Insurance.pkl', 'rb'))

# initialize API
app = Flask(__name__)

# create endpoint
@app.route('/predict', methods=['POST'])
def health_insurance_predict():
    test_json = request.get_json()

    if test_json: #there is data
        if isinstance(test_json, dict): #unique row 
            test_raw = pd.DataFrame(test_json, index = [0])
        else: # multiple rows
            test_raw = pd.DataFrame(test_json, columns = test_json[0].keys()) 
        
        #needed cause test_raw will be overwritten on pipeline
        test_raw_original = test_raw.copy()

        # instantiate HealthInsurance class
        pipeline = HealthInsurance()

        # feature engineering
        df2 = pipeline.feature_engineering(df1)
        #print('df2 done')
        #print(df2.head())
        #print(df2.head().values)

        # data preparation
        df3 = pipeline.data_preparation(df2)
        #print('df3 done')
        #print(df3.head())
        #print(df3.head().values)
    
        # prediction
        df_response = pipeline.get_prediction(model, test_raw_original, df3)
        
        #returns a json
        return df_response
       
    else: #if empty:
        return Response('{}', status = 200, mimetype = 'application/json')

if __name__ == '__main__':
    port = os.environ.get( 'PORT', 5000 )
    app.run( host = '0.0.0.0', port=port )

# API Tester

To make this test on localhost, go to Terminal and run 'python handler.py' first.

To make this test on cloud, first change url for the API call here.

In [40]:
import requests, json
import pickle
import sys
import pandas as pd

In [41]:
# loading test dataset
df_test = pd.read_csv( '/home/rafael/Repos/cross_sell_prediction/data/df_test.csv', index_col=0)
# df_test = df_test.drop(df_test.columns[11], axis=1)

# convert dataframe to json
data = json.dumps(df_test.to_dict(orient = 'records'))

In [42]:
# API call
url = 'http://0.0.0.0:5000/predict' # localhost
# url = 'https://healthinsurancervelozo.herokuapp.com/predict'
header = {'Content-type':'application/json'}
r = requests.post(url, data=data, headers=header)
print('Status Code: {}'.format(r.status_code))

Status Code: 200


In [43]:
d1 = pd.DataFrame(r.json(), columns = r.json()[0].keys())
# d1.sort_values('score', ascending = False, inplace = True)

In [44]:
d1

Unnamed: 0,id,gender,age,region_code,policy_sales_channel,driving_license,vehicle_age,vehicle_damage,previously_insured,annual_premium,vintage,response,score
0,54744,Male,26,28.0,156.0,1,< 1 Year,Yes,0,540165.0,245,1,0.434155
1,15025,Female,32,28.0,155.0,1,1-2 Year,Yes,0,315565.0,150,0,0.388883
2,132758,Female,46,28.0,55.0,1,1-2 Year,Yes,0,308615.0,184,0,0.379220
3,281680,Female,45,28.0,26.0,1,1-2 Year,Yes,0,472042.0,155,1,0.375604
4,203750,Female,27,28.0,156.0,1,< 1 Year,Yes,0,214595.0,139,0,0.366692
...,...,...,...,...,...,...,...,...,...,...,...,...,...
76217,90109,Female,48,25.0,152.0,1,1-2 Year,No,1,2630.0,281,0,0.000386
76218,144573,Female,62,44.0,152.0,1,1-2 Year,No,1,26439.0,199,0,0.000385
76219,138841,Female,62,25.0,152.0,1,1-2 Year,No,1,33543.0,282,0,0.000383
76220,184576,Male,65,25.0,152.0,1,1-2 Year,No,1,21082.0,166,0,0.000378


In [38]:
recall = recall_at_k(d1, 'score', 0.4)
recall

0.8705844572896596

In [39]:
precision = precision_at_k(d1, 'score', 0.4)
precision

0.2667519433238217