In [3]:
############################ 0. PREPARATION ############################

#-------------------------- import packages --------------------------
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_transformer
from NAM_models import ActivationLayer, FeatureNN, NAM

#-------------------------- import data --------------------------
'''we use the popular French Motor TPL insurance claim data '''
freq = pd.read_csv("data/freMTPL2freq.csv")
sev = pd.read_csv("data/freMTPL2sev.csv")

In [92]:
############################ 1. PREPROCESSING ############################

#-------------------------- merge/filter claim data --------------------------
# complete claim severity data
claimsev = sev.merge(freq, on = 'IDpol', how = 'left')
claimsev = claimsev.drop(columns = ['ClaimNb', 'Exposure'])

# drop ID
claimfreq = freq.drop(columns = "IDpol")


#-------------------------- subsample and split --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    claimfreq.drop("ClaimNb", axis = 1), claimfreq["ClaimNb"], random_state = 2024)


#-------------------------- feature transformation --------------------------
# define transformer
ct = make_column_transformer(
    ("passthrough", ["Exposure"]),
    (OrdinalEncoder(), ["Area", "VehGas"]),
    (OneHotEncoder(), ["VehBrand", "Region"]),
    remainder = StandardScaler(),
    verbose_feature_names_out = False
)

# fit & transform
train = ct.fit_transform(X_train)
test = ct.transform(X_test)
feature_names = ct.get_feature_names_out()  # get the columns' names

# transform data back to dataframe
X_train_ct = pd.DataFrame(train.toarray(), columns = feature_names)
X_test_ct = pd.DataFrame(test.toarray(), columns = feature_names)

In [98]:
freq

Unnamed: 0,IDpol,ClaimNb,Exposure,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,Area,Density,Region
0,1.0,1,0.10000,5,0,55,50,B12,Regular,D,1217,Rhone-Alpes
1,3.0,1,0.77000,5,0,55,50,B12,Regular,D,1217,Rhone-Alpes
2,5.0,1,0.75000,6,2,52,50,B12,Diesel,B,54,Picardie
3,10.0,1,0.09000,7,0,46,50,B12,Diesel,B,76,Aquitaine
4,11.0,1,0.84000,7,0,46,50,B12,Diesel,B,76,Aquitaine
...,...,...,...,...,...,...,...,...,...,...,...,...
678008,6114326.0,0,0.00274,4,0,54,50,B12,Regular,E,3317,Provence-Alpes-Cotes-D'Azur
678009,6114327.0,0,0.00274,4,0,41,95,B12,Regular,E,9850,Ile-de-France
678010,6114328.0,0,0.00274,6,2,45,50,B12,Diesel,D,1323,Rhone-Alpes
678011,6114329.0,0,0.00274,4,0,60,50,B12,Regular,B,95,Bourgogne


In [94]:
############################ 2. NAM ############################

#-------------------------- fit a NAM model --------------------------
nam_model = NAM(num_inputs = X_train_ct.shape[1], num_units = 20, trainable=True, shallow=False)
nam_model.compile(optimizer='adam', loss = "poisson", metrics = ['mse'])
history = nam_model.fit(X_train_ct, y_train, 
                        epochs = 5, 
                        batch_size = 32, 
                        validation_split = 0.2)

#-------------------------- evaluate on test set --------------------------
test_loss, test_mse = nam_model.evaluate(X_test_ct, y_test)
print(f"Test Loss: {test_loss}, Test MSE: {test_mse}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: nan, Test MSE: 4.519535051701993e+21


In [96]:
predicted_value = nam_model.predict(X_test_ct)




In [97]:
predicted_value

array([-6.5929421e+10, -6.3642231e+10, -6.5907614e+10, ...,
       -6.5100317e+10, -6.7328459e+10, -6.4217784e+10], dtype=float32)